Extract functions to ImVectorWrapper

2026-02-16 21:07:43 +01:00 · 2023-12-09 01:40:54 +09:00 · 2023-12-09 01:40:54 +09:00 · 6b65ee9940
commit 6b65ee9940
parent ca321e59e4
3 changed files with 286 additions and 174 deletions
--- a/Dalamud/Interface/Internal/ImGuiClipboardFunctionProvider.cs
+++ b/Dalamud/Interface/Internal/ImGuiClipboardFunctionProvider.cs
@ -80,9 +80,9 @@ internal sealed unsafe class ImGuiClipboardFunctionProvider : IServiceType, IDis
    private void SetClipboardTextImpl(byte* text)
    {
        var buffer = ImGuiCurrentContextClipboardHandlerData;
-        Utf8Utils.SetFromNullTerminatedBytes(ref buffer, text);
-        Utf8Utils.Normalize(ref buffer);
-        Utf8Utils.AddNullTerminatorIfMissing(ref buffer);
+        buffer.SetFromZeroTerminatedSequence(text);
+        buffer.Utf8Normalize();
+        buffer.AddZeroTerminatorIfMissing();
        this.setTextOriginal(this.clipboardUserDataOriginal, buffer.Data);
    }

@ -91,159 +91,9 @@ internal sealed unsafe class ImGuiClipboardFunctionProvider : IServiceType, IDis
        _ = this.getTextOriginal(this.clipboardUserDataOriginal);

        var buffer = ImGuiCurrentContextClipboardHandlerData;
-        Utf8Utils.TrimNullTerminator(ref buffer);
-        Utf8Utils.Normalize(ref buffer);
-        Utf8Utils.AddNullTerminatorIfMissing(ref buffer);
+        buffer.TrimZeroTerminator();
+        buffer.Utf8Normalize();
+        buffer.AddZeroTerminatorIfMissing();
        return buffer.Data;
    }
-
-    private static class Utf8Utils
-    {
-        /// <summary>
-        /// Sets <paramref name="buf"/> from <paramref name="psz"/>, a null terminated UTF-8 string.
-        /// </summary>
-        /// <param name="buf">The target buffer. It will not contain a null terminator.</param>
-        /// <param name="psz">The pointer to the null-terminated UTF-8 string.</param>
-        public static void SetFromNullTerminatedBytes(ref ImVectorWrapper<byte> buf, byte* psz)
-        {
-            var len = 0;
-            while (psz[len] != 0)
-                len++;
-
-            buf.Clear();
-            buf.AddRange(new Span<byte>(psz, len));
-        }
-
-        /// <summary>
-        /// Removes the null terminator.
-        /// </summary>
-        /// <param name="buf">The UTF-8 string buffer.</param>
-        public static void TrimNullTerminator(ref ImVectorWrapper<byte> buf)
-        {
-            while (buf.Length > 0 && buf[^1] == 0)
-                buf.LengthUnsafe--;
-        }
-        
-        /// <summary>
-        /// Adds a null terminator to the buffer.
-        /// </summary>
-        /// <param name="buf">The buffer.</param>
-        public static void AddNullTerminatorIfMissing(ref ImVectorWrapper<byte> buf)
-        {
-            if (buf.Length > 0 && buf[^1] == 0)
-                return;
-            buf.Add(0);
-        }
-
-        /// <summary>
-        /// Counts the number of bytes for the UTF-8 character.
-        /// </summary>
-        /// <param name="b">The bytes.</param>
-        /// <param name="avail">Available number of bytes.</param>
-        /// <returns>Number of bytes taken, or -1 if the byte was invalid.</returns>
-        public static int CountBytes(byte* b, int avail)
-        {
-            if (avail <= 0)
-                return 0;
-            if ((b[0] & 0x80) == 0)
-                return 1;
-            if ((b[0] & 0xE0) == 0xC0 && avail >= 2)
-                return (b[1] & 0xC0) == 0x80 ? 2 : -1;
-            if ((b[0] & 0xF0) == 0xE0 && avail >= 3)
-                return (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 ? 3 : -1;
-            if ((b[0] & 0xF8) == 0xF0 && avail >= 4)
-                return (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 && (b[3] & 0xC0) == 0x80 ? 4 : -1;
-            return -1;
-        }
-
-        /// <summary>
-        /// Gets the codepoint.
-        /// </summary>
-        /// <param name="b">The bytes.</param>
-        /// <param name="cb">The result from <see cref="CountBytes"/>.</param>
-        /// <returns>The codepoint, or \xFFFD replacement character if failed.</returns>
-        public static int GetCodepoint(byte* b, int cb) => cb switch
-        {
-            1 => b[0],
-            2 => ((b[0] & 0x1F) << 6) | (b[1] & 0x3F),
-            3 => ((b[0] & 0x0F) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F),
-            4 => ((b[0] & 0x07) << 18) | ((b[1] & 0x3F) << 12) | ((b[2] & 0x3F) << 6) | (b[3] & 0x3F),
-            _ => 0xFFFD,
-        };
-
-        /// <summary>
-        /// Replaces a sequence with another.
-        /// </summary>
-        /// <param name="buf">The buffer.</param>
-        /// <param name="offset">Offset of the sequence to be replaced.</param>
-        /// <param name="length">Length of the sequence to be replaced.</param>
-        /// <param name="replacement">The replacement sequence.</param>
-        /// <returns>The length of <paramref name="replacement"/>.</returns>
-        public static int ReplaceSequence(
-            ref ImVectorWrapper<byte> buf,
-            int offset,
-            int length,
-            ReadOnlySpan<byte> replacement)
-        {
-            var i = 0;
-            for (; i < replacement.Length; i++)
-            {
-                if (length >= i + 1)
-                    buf[offset++] = replacement[i];
-                else
-                    buf.Insert(offset++, replacement[i]);
-            }
-
-            for (; i < length; i++)
-                buf.RemoveAt(offset);
-
-            return replacement.Length;
-        }
-
-        /// <summary>
-        /// Normalize the given text for our use case.
-        /// </summary>
-        /// <param name="buf">The buffer.</param>
-        public static void Normalize(ref ImVectorWrapper<byte> buf)
-        {
-            // Ensure an implicit null after the end of the string.
-            buf.EnsureCapacity(buf.Length + 1);
-            buf.StorageSpan[buf.Length] = 0;
-
-            for (var i = 0; i < buf.Length;)
-            {
-                var cb = CountBytes(buf.Data + i, buf.Length - i);
-                var currInt = GetCodepoint(buf.Data + i, cb);
-                switch (currInt)
-                {
-                    // Note that buf.Data[i + 1] is always defined. See the beginning of the function.
-                    case '\r' when buf.Data[i + 1] == '\n': // Already CR LF?
-                        i += 2;
-                        continue;
-
-                    case 0xFFFE or 0xFFFF: // Simply invalid
-                    case > char.MaxValue: // ImWchar is same size with char; does not support
-                    case >= 0xD800 and <= 0xDBFF: // UTF-16 surrogate; does not support
-                        // Replace with \uFFFD in UTF-8: EF BF BD
-                        i += ReplaceSequence(ref buf, i, cb, "\uFFFD"u8);
-                        break;
-
-                    // See String.Manipulation.cs: IndexOfNewlineChar.
-                    case '\r': // CR; Carriage Return
-                    case '\n': // LF; Line Feed
-                    case '\f': // FF; Form Feed
-                    case '\u0085': // NEL; Next Line
-                    case '\u2028': // LS; Line Separator
-                    case '\u2029': // PS; Paragraph Separator
-                        i += ReplaceSequence(ref buf, i, cb, "\r\n"u8);
-                        break;
-
-                    default:
-                        // Not a newline char.
-                        i += cb;
-                        break;
-                }
-            }
-        }
-    }
 }
--- a/Dalamud/Interface/Utility/ImVectorWrapper.ZeroTerminatedSequence.cs
+++ b/Dalamud/Interface/Utility/ImVectorWrapper.ZeroTerminatedSequence.cs
@ -0,0 +1,207 @@
+using System.Numerics;
+using System.Text;
+
+namespace Dalamud.Interface.Utility;
+
+/// <summary>
+/// Utility methods for <see cref="ImVectorWrapper{T}"/>.
+/// </summary>
+public static partial class ImVectorWrapper
+{
+    /// <summary>
+    /// Appends <paramref name="buf"/> from <paramref name="psz"/>, a zero terminated sequence.
+    /// </summary>
+    /// <typeparam name="T">The element type.</typeparam>
+    /// <param name="buf">The target buffer.</param>
+    /// <param name="psz">The pointer to the zero-terminated sequence.</param>
+    public static unsafe void AppendZeroTerminatedSequence<T>(this ref ImVectorWrapper<T> buf, T* psz)
+        where T : unmanaged, INumber<T>
+    {
+        var len = 0;
+        while (psz[len] != default)
+            len++;
+
+        buf.AddRange(new Span<T>(psz, len));
+    }
+
+    /// <summary>
+    /// Sets <paramref name="buf"/> from <paramref name="psz"/>, a zero terminated sequence.
+    /// </summary>
+    /// <typeparam name="T">The element type.</typeparam>
+    /// <param name="buf">The target buffer.</param>
+    /// <param name="psz">The pointer to the zero-terminated sequence.</param>
+    public static unsafe void SetFromZeroTerminatedSequence<T>(this ref ImVectorWrapper<T> buf, T* psz)
+        where T : unmanaged, INumber<T>
+    {
+        buf.Clear();
+        buf.AppendZeroTerminatedSequence(psz);
+    }
+
+    /// <summary>
+    /// Trims zero terminator(s).
+    /// </summary>
+    /// <typeparam name="T">The element type.</typeparam>
+    /// <param name="buf">The buffer.</param>
+    public static void TrimZeroTerminator<T>(this ref ImVectorWrapper<T> buf)
+        where T : unmanaged, INumber<T>
+    {
+        ref var len = ref buf.LengthUnsafe;
+        while (len > 0 && buf[len - 1] == default)
+            len--;
+    }
+
+    /// <summary>
+    /// Adds a zero terminator to the buffer, if missing.
+    /// </summary>
+    /// <typeparam name="T">The element type.</typeparam>
+    /// <param name="buf">The buffer.</param>
+    public static void AddZeroTerminatorIfMissing<T>(this ref ImVectorWrapper<T> buf)
+        where T : unmanaged, INumber<T>
+    {
+        if (buf.Length > 0 && buf[^1] == default)
+            return;
+        buf.Add(default);
+    }
+
+    /// <summary>
+    /// Gets the codepoint at the given offset.
+    /// </summary>
+    /// <param name="buf">The buffer containing bytes in UTF-8.</param>
+    /// <param name="offset">The offset in bytes.</param>
+    /// <param name="numBytes">Number of bytes occupied by the character, invalid or not.</param>
+    /// <param name="invalid">The fallback character, if no valid UTF-8 character could be found.</param>
+    /// <returns>The parsed codepoint, or <paramref name="invalid"/> if it could not be parsed correctly.</returns>
+    public static unsafe int Utf8GetCodepoint(
+        this in ImVectorWrapper<byte> buf,
+        int offset,
+        out int numBytes,
+        int invalid = 0xFFFD)
+    {
+        var cb = buf.LengthUnsafe - offset;
+        if (cb <= 0)
+        {
+            numBytes = 0;
+            return invalid;
+        }
+
+        numBytes = 1;
+
+        var b = buf.DataUnsafe + offset;
+        if ((b[0] & 0x80) == 0)
+            return b[0];
+
+        if (cb < 2 || (b[1] & 0xC0) != 0x80)
+            return invalid;
+        if ((b[0] & 0xE0) == 0xC0)
+        {
+            numBytes = 2;
+            return ((b[0] & 0x1F) << 6) | (b[1] & 0x3F);
+        }
+
+        if (cb < 3 || (b[2] & 0xC0) != 0x80)
+            return invalid;
+        if ((b[0] & 0xF0) == 0xE0)
+        {
+            numBytes = 3;
+            return ((b[0] & 0x0F) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F);
+        }
+
+        if (cb < 4 || (b[3] & 0xC0) != 0x80)
+            return invalid;
+        if ((b[0] & 0xF8) == 0xF0)
+        {
+            numBytes = 4;
+            return ((b[0] & 0x07) << 18) | ((b[1] & 0x3F) << 12) | ((b[2] & 0x3F) << 6) | (b[3] & 0x3F);
+        }
+
+        return invalid;
+    }
+
+    /// <summary>
+    /// Normalizes the given UTF-8 string.<br />
+    /// Using the default values will ensure the best interop between the game, ImGui, and Windows.
+    /// </summary>
+    /// <param name="buf">The buffer containing bytes in UTF-8.</param>
+    /// <param name="lineEnding">The replacement line ending. If empty, CR LF will be used.</param>
+    /// <param name="invalidChar">The replacement invalid character. If empty, U+FFFD REPLACEMENT CHARACTER will be used.</param>
+    /// <param name="normalizeLineEndings">Specify whether to normalize the line endings.</param>
+    /// <param name="sanitizeInvalidCharacters">Specify whether to replace invalid characters.</param>
+    /// <param name="sanitizeNonUcs2Characters">Specify whether to replace characters that requires the use of surrogate, when encoded in UTF-16.</param>
+    /// <param name="sanitizeSurrogates">Specify whether to make sense out of WTF-8.</param>
+    public static unsafe void Utf8Normalize(
+        this ref ImVectorWrapper<byte> buf,
+        ReadOnlySpan<byte> lineEnding = default,
+        ReadOnlySpan<byte> invalidChar = default,
+        bool normalizeLineEndings = true,
+        bool sanitizeInvalidCharacters = true,
+        bool sanitizeNonUcs2Characters = true,
+        bool sanitizeSurrogates = true)
+    {
+        if (lineEnding.IsEmpty)
+            lineEnding = "\r\n"u8;
+        if (invalidChar.IsEmpty)
+            invalidChar = "\uFFFD"u8;
+
+        // Ensure an implicit null after the end of the string.
+        buf.EnsureCapacity(buf.Length + 1);
+        buf.StorageSpan[buf.Length] = 0;
+
+        Span<char> charsBuf = stackalloc char[2];
+        Span<byte> bytesBuf = stackalloc byte[4];
+        for (var i = 0; i < buf.Length;)
+        {
+            var c1 = buf.Utf8GetCodepoint(i, out var cb, -1);
+            switch (c1)
+            {
+                // Note that buf.Data[i + 1] is always defined. See the beginning of the function.
+                case '\r' when buf.Data[i + 1] == '\n':
+                    // If it's already CR LF, it passes all filters.
+                    i += 2;
+                    break;
+
+                case >= 0xD800 and <= 0xDFFF when sanitizeSurrogates:
+                {
+                    var c2 = buf.Utf8GetCodepoint(i + cb, out var cb2);
+                    if (c1 is < 0xD800 or >= 0xDC00)
+                        goto case -2;
+                    if (c2 is < 0xDC00 or >= 0xE000)
+                        goto case -2;
+                    charsBuf[0] = unchecked((char)c1);
+                    charsBuf[1] = unchecked((char)c2);
+                    var bytesLen = Encoding.UTF8.GetBytes(charsBuf, bytesBuf);
+                    buf.ReplaceRange(i, cb + cb2, bytesBuf[..bytesLen]);
+                    // Do not alter i; now that the WTF-8 has been dealt with, apply other filters.
+                    break;
+                }
+
+                case -2:
+                case -1 or 0xFFFE or 0xFFFF when sanitizeInvalidCharacters:
+                case >= 0xD800 and <= 0xDFFF when sanitizeInvalidCharacters:
+                case > char.MaxValue when sanitizeNonUcs2Characters:
+                {
+                    buf.ReplaceRange(i, cb, invalidChar);
+                    i += invalidChar.Length;
+                    break;
+                }
+
+                // See String.Manipulation.cs: IndexOfNewlineChar.
+                // CR; Carriage Return
+                // LF; Line Feed
+                // FF; Form Feed
+                // NEL; Next Line
+                // LS; Line Separator
+                // PS; Paragraph Separator
+                case '\r' or '\n' or '\f' or '\u0085' or '\u2028' or '\u2029' when normalizeLineEndings:
+                {
+                    buf.ReplaceRange(i, cb, lineEnding);
+                    i += lineEnding.Length;
+                    break;
+                }
+
+                default:
+                    i += cb;
+                    break;
+            }
+        }
+    }
+}
--- a/Dalamud/Interface/Utility/ImVectorWrapper.cs
+++ b/Dalamud/Interface/Utility/ImVectorWrapper.cs
@ -13,7 +13,7 @@ namespace Dalamud.Interface.Utility;
 /// <summary>
 /// Utility methods for <see cref="ImVectorWrapper{T}"/>.
 /// </summary>
-public static class ImVectorWrapper
+public static partial class ImVectorWrapper
 {
    /// <summary>
    /// Creates a new instance of the <see cref="ImVectorWrapper{T}"/> struct, initialized with
@ -394,7 +394,7 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
    }

    /// <inheritdoc cref="List{T}.AddRange"/>
-    public void AddRange(Span<T> items)
+    public void AddRange(ReadOnlySpan<T> items)
    {
        this.EnsureCapacityExponential(this.LengthUnsafe + items.Length);
        foreach (var item in items)
@ -466,7 +466,7 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
    /// <param name="capacity">The minimum capacity to ensure.</param>
    /// <returns>Whether the capacity has been changed.</returns>
    public bool EnsureCapacityExponential(int capacity)
-        => this.EnsureCapacity(1 << ((sizeof(int) * 8) - BitOperations.LeadingZeroCount((uint)this.LengthUnsafe)));
+        => this.EnsureCapacity(1 << ((sizeof(int) * 8) - BitOperations.LeadingZeroCount((uint)capacity)));

    /// <summary>
    /// Resizes the underlying array and fills with zeroes if grown.
@ -545,8 +545,8 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
        }
    }

-    /// <inheritdoc cref="List{T}.AddRange"/>
-    public void InsertRange(int index, Span<T> items)
+    /// <inheritdoc cref="List{T}.InsertRange"/>
+    public void InsertRange(int index, ReadOnlySpan<T> items)
    {
        this.EnsureCapacityExponential(this.LengthUnsafe + items.Length);
        var num = this.LengthUnsafe - index;
@ -561,16 +561,7 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
    /// </summary>
    /// <param name="index">The index.</param>
    /// <param name="skipDestroyer">Whether to skip calling the destroyer function.</param>
-    public void RemoveAt(int index, bool skipDestroyer = false)
-    {
-        this.EnsureIndex(index);
-        var num = this.LengthUnsafe - index - 1;
-        if (!skipDestroyer)
-            this.destroyer?.Invoke(&this.DataUnsafe[index]);
-
-        Buffer.MemoryCopy(this.DataUnsafe + index + 1, this.DataUnsafe + index, num * sizeof(T), num * sizeof(T));
-        this.LengthUnsafe -= 1;
-    }
+    public void RemoveAt(int index, bool skipDestroyer = false) => this.RemoveRange(index, 1, skipDestroyer);

    /// <inheritdoc/>
    void IList<T>.RemoveAt(int index) => this.RemoveAt(index);
@ -578,6 +569,73 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
    /// <inheritdoc/>
    void IList.RemoveAt(int index) => this.RemoveAt(index);

+    /// <summary>
+    /// Removes <paramref name="count"/> elements at the given index.
+    /// </summary>
+    /// <param name="index">The index of the first item to remove.</param>
+    /// <param name="count">Number of items to remove.</param>
+    /// <param name="skipDestroyer">Whether to skip calling the destroyer function.</param>
+    public void RemoveRange(int index, int count, bool skipDestroyer = false)
+    {
+        this.EnsureIndex(index);
+        if (count < 0)
+            throw new ArgumentOutOfRangeException(nameof(count), count, "Must be positive.");
+        if (count == 0)
+            return;
+
+        if (!skipDestroyer && this.destroyer is { } d)
+        {
+            for (var i = 0; i < count; i++)
+                d(this.DataUnsafe + index + i);
+        }
+
+        var numItemsToMove = this.LengthUnsafe - index - count;
+        var numBytesToMove = numItemsToMove * sizeof(T);
+        Buffer.MemoryCopy(this.DataUnsafe + index + count, this.DataUnsafe + index, numBytesToMove, numBytesToMove);
+        this.LengthUnsafe -= count;
+    }
+
+    /// <summary>
+    /// Replaces a sequence at given offset <paramref name="index"/> of <paramref name="count"/> items with
+    /// <paramref name="replacement"/>.
+    /// </summary>
+    /// <param name="index">The index of the first item to be replaced.</param>
+    /// <param name="count">The number of items to be replaced.</param>
+    /// <param name="replacement">The replacement.</param>
+    /// <param name="skipDestroyer">Whether to skip calling the destroyer function.</param>
+    public void ReplaceRange(int index, int count, ReadOnlySpan<T> replacement, bool skipDestroyer = false)
+    {
+        this.EnsureIndex(index);
+        if (count < 0)
+            throw new ArgumentOutOfRangeException(nameof(count), count, "Must be positive.");
+        if (count == 0)
+            return;
+
+        // Ensure the capacity first, so that we can safely destroy the items first.
+        this.EnsureCapacityExponential((this.LengthUnsafe + replacement.Length) - count);
+
+        if (!skipDestroyer && this.destroyer is { } d)
+        {
+            for (var i = 0; i < count; i++)
+                d(this.DataUnsafe + index + i);
+        }
+
+        if (count == replacement.Length)
+        {
+            replacement.CopyTo(this.DataSpan[index..]);
+        }
+        else if (count > replacement.Length)
+        {
+            replacement.CopyTo(this.DataSpan[index..]);
+            this.RemoveRange(index + replacement.Length, count - replacement.Length);
+        }
+        else
+        {
+            replacement[..count].CopyTo(this.DataSpan[index..]);
+            this.InsertRange(index + count, replacement[count..]);
+        }
+    }
+
    /// <summary>
    /// Sets the capacity exactly as requested.
    /// </summary>
@ -615,9 +673,6 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi

        if (!oldSpan.IsEmpty && !newSpan.IsEmpty)
            oldSpan[..this.LengthUnsafe].CopyTo(newSpan);
-// #if DEBUG
-//         new Span<byte>(newAlloc + this.LengthUnsafe, sizeof(T) * (capacity - this.LengthUnsafe)).Fill(0xCC);
-// #endif

        if (oldAlloc != null)
            ImGuiNative.igMemFree(oldAlloc);