From 6b65ee9940fa309b0cab7953c44d71669e63470f Mon Sep 17 00:00:00 2001 From: Soreepeong Date: Sat, 9 Dec 2023 01:40:54 +0900 Subject: [PATCH] Extract functions to ImVectorWrapper --- .../ImGuiClipboardFunctionProvider.cs | 162 +------------- .../ImVectorWrapper.ZeroTerminatedSequence.cs | 207 ++++++++++++++++++ Dalamud/Interface/Utility/ImVectorWrapper.cs | 91 ++++++-- 3 files changed, 286 insertions(+), 174 deletions(-) create mode 100644 Dalamud/Interface/Utility/ImVectorWrapper.ZeroTerminatedSequence.cs diff --git a/Dalamud/Interface/Internal/ImGuiClipboardFunctionProvider.cs b/Dalamud/Interface/Internal/ImGuiClipboardFunctionProvider.cs index f14210b97..265bc5812 100644 --- a/Dalamud/Interface/Internal/ImGuiClipboardFunctionProvider.cs +++ b/Dalamud/Interface/Internal/ImGuiClipboardFunctionProvider.cs @@ -80,9 +80,9 @@ internal sealed unsafe class ImGuiClipboardFunctionProvider : IServiceType, IDis private void SetClipboardTextImpl(byte* text) { var buffer = ImGuiCurrentContextClipboardHandlerData; - Utf8Utils.SetFromNullTerminatedBytes(ref buffer, text); - Utf8Utils.Normalize(ref buffer); - Utf8Utils.AddNullTerminatorIfMissing(ref buffer); + buffer.SetFromZeroTerminatedSequence(text); + buffer.Utf8Normalize(); + buffer.AddZeroTerminatorIfMissing(); this.setTextOriginal(this.clipboardUserDataOriginal, buffer.Data); } @@ -91,159 +91,9 @@ internal sealed unsafe class ImGuiClipboardFunctionProvider : IServiceType, IDis _ = this.getTextOriginal(this.clipboardUserDataOriginal); var buffer = ImGuiCurrentContextClipboardHandlerData; - Utf8Utils.TrimNullTerminator(ref buffer); - Utf8Utils.Normalize(ref buffer); - Utf8Utils.AddNullTerminatorIfMissing(ref buffer); + buffer.TrimZeroTerminator(); + buffer.Utf8Normalize(); + buffer.AddZeroTerminatorIfMissing(); return buffer.Data; } - - private static class Utf8Utils - { - /// - /// Sets from , a null terminated UTF-8 string. - /// - /// The target buffer. It will not contain a null terminator. - /// The pointer to the null-terminated UTF-8 string. - public static void SetFromNullTerminatedBytes(ref ImVectorWrapper buf, byte* psz) - { - var len = 0; - while (psz[len] != 0) - len++; - - buf.Clear(); - buf.AddRange(new Span(psz, len)); - } - - /// - /// Removes the null terminator. - /// - /// The UTF-8 string buffer. - public static void TrimNullTerminator(ref ImVectorWrapper buf) - { - while (buf.Length > 0 && buf[^1] == 0) - buf.LengthUnsafe--; - } - - /// - /// Adds a null terminator to the buffer. - /// - /// The buffer. - public static void AddNullTerminatorIfMissing(ref ImVectorWrapper buf) - { - if (buf.Length > 0 && buf[^1] == 0) - return; - buf.Add(0); - } - - /// - /// Counts the number of bytes for the UTF-8 character. - /// - /// The bytes. - /// Available number of bytes. - /// Number of bytes taken, or -1 if the byte was invalid. - public static int CountBytes(byte* b, int avail) - { - if (avail <= 0) - return 0; - if ((b[0] & 0x80) == 0) - return 1; - if ((b[0] & 0xE0) == 0xC0 && avail >= 2) - return (b[1] & 0xC0) == 0x80 ? 2 : -1; - if ((b[0] & 0xF0) == 0xE0 && avail >= 3) - return (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 ? 3 : -1; - if ((b[0] & 0xF8) == 0xF0 && avail >= 4) - return (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 && (b[3] & 0xC0) == 0x80 ? 4 : -1; - return -1; - } - - /// - /// Gets the codepoint. - /// - /// The bytes. - /// The result from . - /// The codepoint, or \xFFFD replacement character if failed. - public static int GetCodepoint(byte* b, int cb) => cb switch - { - 1 => b[0], - 2 => ((b[0] & 0x1F) << 6) | (b[1] & 0x3F), - 3 => ((b[0] & 0x0F) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F), - 4 => ((b[0] & 0x07) << 18) | ((b[1] & 0x3F) << 12) | ((b[2] & 0x3F) << 6) | (b[3] & 0x3F), - _ => 0xFFFD, - }; - - /// - /// Replaces a sequence with another. - /// - /// The buffer. - /// Offset of the sequence to be replaced. - /// Length of the sequence to be replaced. - /// The replacement sequence. - /// The length of . - public static int ReplaceSequence( - ref ImVectorWrapper buf, - int offset, - int length, - ReadOnlySpan replacement) - { - var i = 0; - for (; i < replacement.Length; i++) - { - if (length >= i + 1) - buf[offset++] = replacement[i]; - else - buf.Insert(offset++, replacement[i]); - } - - for (; i < length; i++) - buf.RemoveAt(offset); - - return replacement.Length; - } - - /// - /// Normalize the given text for our use case. - /// - /// The buffer. - public static void Normalize(ref ImVectorWrapper buf) - { - // Ensure an implicit null after the end of the string. - buf.EnsureCapacity(buf.Length + 1); - buf.StorageSpan[buf.Length] = 0; - - for (var i = 0; i < buf.Length;) - { - var cb = CountBytes(buf.Data + i, buf.Length - i); - var currInt = GetCodepoint(buf.Data + i, cb); - switch (currInt) - { - // Note that buf.Data[i + 1] is always defined. See the beginning of the function. - case '\r' when buf.Data[i + 1] == '\n': // Already CR LF? - i += 2; - continue; - - case 0xFFFE or 0xFFFF: // Simply invalid - case > char.MaxValue: // ImWchar is same size with char; does not support - case >= 0xD800 and <= 0xDBFF: // UTF-16 surrogate; does not support - // Replace with \uFFFD in UTF-8: EF BF BD - i += ReplaceSequence(ref buf, i, cb, "\uFFFD"u8); - break; - - // See String.Manipulation.cs: IndexOfNewlineChar. - case '\r': // CR; Carriage Return - case '\n': // LF; Line Feed - case '\f': // FF; Form Feed - case '\u0085': // NEL; Next Line - case '\u2028': // LS; Line Separator - case '\u2029': // PS; Paragraph Separator - i += ReplaceSequence(ref buf, i, cb, "\r\n"u8); - break; - - default: - // Not a newline char. - i += cb; - break; - } - } - } - } } diff --git a/Dalamud/Interface/Utility/ImVectorWrapper.ZeroTerminatedSequence.cs b/Dalamud/Interface/Utility/ImVectorWrapper.ZeroTerminatedSequence.cs new file mode 100644 index 000000000..507bdce20 --- /dev/null +++ b/Dalamud/Interface/Utility/ImVectorWrapper.ZeroTerminatedSequence.cs @@ -0,0 +1,207 @@ +using System.Numerics; +using System.Text; + +namespace Dalamud.Interface.Utility; + +/// +/// Utility methods for . +/// +public static partial class ImVectorWrapper +{ + /// + /// Appends from , a zero terminated sequence. + /// + /// The element type. + /// The target buffer. + /// The pointer to the zero-terminated sequence. + public static unsafe void AppendZeroTerminatedSequence(this ref ImVectorWrapper buf, T* psz) + where T : unmanaged, INumber + { + var len = 0; + while (psz[len] != default) + len++; + + buf.AddRange(new Span(psz, len)); + } + + /// + /// Sets from , a zero terminated sequence. + /// + /// The element type. + /// The target buffer. + /// The pointer to the zero-terminated sequence. + public static unsafe void SetFromZeroTerminatedSequence(this ref ImVectorWrapper buf, T* psz) + where T : unmanaged, INumber + { + buf.Clear(); + buf.AppendZeroTerminatedSequence(psz); + } + + /// + /// Trims zero terminator(s). + /// + /// The element type. + /// The buffer. + public static void TrimZeroTerminator(this ref ImVectorWrapper buf) + where T : unmanaged, INumber + { + ref var len = ref buf.LengthUnsafe; + while (len > 0 && buf[len - 1] == default) + len--; + } + + /// + /// Adds a zero terminator to the buffer, if missing. + /// + /// The element type. + /// The buffer. + public static void AddZeroTerminatorIfMissing(this ref ImVectorWrapper buf) + where T : unmanaged, INumber + { + if (buf.Length > 0 && buf[^1] == default) + return; + buf.Add(default); + } + + /// + /// Gets the codepoint at the given offset. + /// + /// The buffer containing bytes in UTF-8. + /// The offset in bytes. + /// Number of bytes occupied by the character, invalid or not. + /// The fallback character, if no valid UTF-8 character could be found. + /// The parsed codepoint, or if it could not be parsed correctly. + public static unsafe int Utf8GetCodepoint( + this in ImVectorWrapper buf, + int offset, + out int numBytes, + int invalid = 0xFFFD) + { + var cb = buf.LengthUnsafe - offset; + if (cb <= 0) + { + numBytes = 0; + return invalid; + } + + numBytes = 1; + + var b = buf.DataUnsafe + offset; + if ((b[0] & 0x80) == 0) + return b[0]; + + if (cb < 2 || (b[1] & 0xC0) != 0x80) + return invalid; + if ((b[0] & 0xE0) == 0xC0) + { + numBytes = 2; + return ((b[0] & 0x1F) << 6) | (b[1] & 0x3F); + } + + if (cb < 3 || (b[2] & 0xC0) != 0x80) + return invalid; + if ((b[0] & 0xF0) == 0xE0) + { + numBytes = 3; + return ((b[0] & 0x0F) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F); + } + + if (cb < 4 || (b[3] & 0xC0) != 0x80) + return invalid; + if ((b[0] & 0xF8) == 0xF0) + { + numBytes = 4; + return ((b[0] & 0x07) << 18) | ((b[1] & 0x3F) << 12) | ((b[2] & 0x3F) << 6) | (b[3] & 0x3F); + } + + return invalid; + } + + /// + /// Normalizes the given UTF-8 string.
+ /// Using the default values will ensure the best interop between the game, ImGui, and Windows. + ///
+ /// The buffer containing bytes in UTF-8. + /// The replacement line ending. If empty, CR LF will be used. + /// The replacement invalid character. If empty, U+FFFD REPLACEMENT CHARACTER will be used. + /// Specify whether to normalize the line endings. + /// Specify whether to replace invalid characters. + /// Specify whether to replace characters that requires the use of surrogate, when encoded in UTF-16. + /// Specify whether to make sense out of WTF-8. + public static unsafe void Utf8Normalize( + this ref ImVectorWrapper buf, + ReadOnlySpan lineEnding = default, + ReadOnlySpan invalidChar = default, + bool normalizeLineEndings = true, + bool sanitizeInvalidCharacters = true, + bool sanitizeNonUcs2Characters = true, + bool sanitizeSurrogates = true) + { + if (lineEnding.IsEmpty) + lineEnding = "\r\n"u8; + if (invalidChar.IsEmpty) + invalidChar = "\uFFFD"u8; + + // Ensure an implicit null after the end of the string. + buf.EnsureCapacity(buf.Length + 1); + buf.StorageSpan[buf.Length] = 0; + + Span charsBuf = stackalloc char[2]; + Span bytesBuf = stackalloc byte[4]; + for (var i = 0; i < buf.Length;) + { + var c1 = buf.Utf8GetCodepoint(i, out var cb, -1); + switch (c1) + { + // Note that buf.Data[i + 1] is always defined. See the beginning of the function. + case '\r' when buf.Data[i + 1] == '\n': + // If it's already CR LF, it passes all filters. + i += 2; + break; + + case >= 0xD800 and <= 0xDFFF when sanitizeSurrogates: + { + var c2 = buf.Utf8GetCodepoint(i + cb, out var cb2); + if (c1 is < 0xD800 or >= 0xDC00) + goto case -2; + if (c2 is < 0xDC00 or >= 0xE000) + goto case -2; + charsBuf[0] = unchecked((char)c1); + charsBuf[1] = unchecked((char)c2); + var bytesLen = Encoding.UTF8.GetBytes(charsBuf, bytesBuf); + buf.ReplaceRange(i, cb + cb2, bytesBuf[..bytesLen]); + // Do not alter i; now that the WTF-8 has been dealt with, apply other filters. + break; + } + + case -2: + case -1 or 0xFFFE or 0xFFFF when sanitizeInvalidCharacters: + case >= 0xD800 and <= 0xDFFF when sanitizeInvalidCharacters: + case > char.MaxValue when sanitizeNonUcs2Characters: + { + buf.ReplaceRange(i, cb, invalidChar); + i += invalidChar.Length; + break; + } + + // See String.Manipulation.cs: IndexOfNewlineChar. + // CR; Carriage Return + // LF; Line Feed + // FF; Form Feed + // NEL; Next Line + // LS; Line Separator + // PS; Paragraph Separator + case '\r' or '\n' or '\f' or '\u0085' or '\u2028' or '\u2029' when normalizeLineEndings: + { + buf.ReplaceRange(i, cb, lineEnding); + i += lineEnding.Length; + break; + } + + default: + i += cb; + break; + } + } + } +} diff --git a/Dalamud/Interface/Utility/ImVectorWrapper.cs b/Dalamud/Interface/Utility/ImVectorWrapper.cs index d41ee0094..51524efc4 100644 --- a/Dalamud/Interface/Utility/ImVectorWrapper.cs +++ b/Dalamud/Interface/Utility/ImVectorWrapper.cs @@ -13,7 +13,7 @@ namespace Dalamud.Interface.Utility; /// /// Utility methods for . /// -public static class ImVectorWrapper +public static partial class ImVectorWrapper { /// /// Creates a new instance of the struct, initialized with @@ -394,7 +394,7 @@ public unsafe struct ImVectorWrapper : IList, IList, IReadOnlyList, IDi } /// - public void AddRange(Span items) + public void AddRange(ReadOnlySpan items) { this.EnsureCapacityExponential(this.LengthUnsafe + items.Length); foreach (var item in items) @@ -466,7 +466,7 @@ public unsafe struct ImVectorWrapper : IList, IList, IReadOnlyList, IDi /// The minimum capacity to ensure. /// Whether the capacity has been changed. public bool EnsureCapacityExponential(int capacity) - => this.EnsureCapacity(1 << ((sizeof(int) * 8) - BitOperations.LeadingZeroCount((uint)this.LengthUnsafe))); + => this.EnsureCapacity(1 << ((sizeof(int) * 8) - BitOperations.LeadingZeroCount((uint)capacity))); /// /// Resizes the underlying array and fills with zeroes if grown. @@ -545,8 +545,8 @@ public unsafe struct ImVectorWrapper : IList, IList, IReadOnlyList, IDi } } - /// - public void InsertRange(int index, Span items) + /// + public void InsertRange(int index, ReadOnlySpan items) { this.EnsureCapacityExponential(this.LengthUnsafe + items.Length); var num = this.LengthUnsafe - index; @@ -561,16 +561,7 @@ public unsafe struct ImVectorWrapper : IList, IList, IReadOnlyList, IDi /// /// The index. /// Whether to skip calling the destroyer function. - public void RemoveAt(int index, bool skipDestroyer = false) - { - this.EnsureIndex(index); - var num = this.LengthUnsafe - index - 1; - if (!skipDestroyer) - this.destroyer?.Invoke(&this.DataUnsafe[index]); - - Buffer.MemoryCopy(this.DataUnsafe + index + 1, this.DataUnsafe + index, num * sizeof(T), num * sizeof(T)); - this.LengthUnsafe -= 1; - } + public void RemoveAt(int index, bool skipDestroyer = false) => this.RemoveRange(index, 1, skipDestroyer); /// void IList.RemoveAt(int index) => this.RemoveAt(index); @@ -578,6 +569,73 @@ public unsafe struct ImVectorWrapper : IList, IList, IReadOnlyList, IDi /// void IList.RemoveAt(int index) => this.RemoveAt(index); + /// + /// Removes elements at the given index. + /// + /// The index of the first item to remove. + /// Number of items to remove. + /// Whether to skip calling the destroyer function. + public void RemoveRange(int index, int count, bool skipDestroyer = false) + { + this.EnsureIndex(index); + if (count < 0) + throw new ArgumentOutOfRangeException(nameof(count), count, "Must be positive."); + if (count == 0) + return; + + if (!skipDestroyer && this.destroyer is { } d) + { + for (var i = 0; i < count; i++) + d(this.DataUnsafe + index + i); + } + + var numItemsToMove = this.LengthUnsafe - index - count; + var numBytesToMove = numItemsToMove * sizeof(T); + Buffer.MemoryCopy(this.DataUnsafe + index + count, this.DataUnsafe + index, numBytesToMove, numBytesToMove); + this.LengthUnsafe -= count; + } + + /// + /// Replaces a sequence at given offset of items with + /// . + /// + /// The index of the first item to be replaced. + /// The number of items to be replaced. + /// The replacement. + /// Whether to skip calling the destroyer function. + public void ReplaceRange(int index, int count, ReadOnlySpan replacement, bool skipDestroyer = false) + { + this.EnsureIndex(index); + if (count < 0) + throw new ArgumentOutOfRangeException(nameof(count), count, "Must be positive."); + if (count == 0) + return; + + // Ensure the capacity first, so that we can safely destroy the items first. + this.EnsureCapacityExponential((this.LengthUnsafe + replacement.Length) - count); + + if (!skipDestroyer && this.destroyer is { } d) + { + for (var i = 0; i < count; i++) + d(this.DataUnsafe + index + i); + } + + if (count == replacement.Length) + { + replacement.CopyTo(this.DataSpan[index..]); + } + else if (count > replacement.Length) + { + replacement.CopyTo(this.DataSpan[index..]); + this.RemoveRange(index + replacement.Length, count - replacement.Length); + } + else + { + replacement[..count].CopyTo(this.DataSpan[index..]); + this.InsertRange(index + count, replacement[count..]); + } + } + /// /// Sets the capacity exactly as requested. /// @@ -615,9 +673,6 @@ public unsafe struct ImVectorWrapper : IList, IList, IReadOnlyList, IDi if (!oldSpan.IsEmpty && !newSpan.IsEmpty) oldSpan[..this.LengthUnsafe].CopyTo(newSpan); -// #if DEBUG -// new Span(newAlloc + this.LengthUnsafe, sizeof(T) * (capacity - this.LengthUnsafe)).Fill(0xCC); -// #endif if (oldAlloc != null) ImGuiNative.igMemFree(oldAlloc);