Extract functions to ImVectorWrapper

This commit is contained in:
Soreepeong 2023-12-09 01:40:54 +09:00
parent ca321e59e4
commit 6b65ee9940
3 changed files with 286 additions and 174 deletions

View file

@ -80,9 +80,9 @@ internal sealed unsafe class ImGuiClipboardFunctionProvider : IServiceType, IDis
private void SetClipboardTextImpl(byte* text)
{
var buffer = ImGuiCurrentContextClipboardHandlerData;
Utf8Utils.SetFromNullTerminatedBytes(ref buffer, text);
Utf8Utils.Normalize(ref buffer);
Utf8Utils.AddNullTerminatorIfMissing(ref buffer);
buffer.SetFromZeroTerminatedSequence(text);
buffer.Utf8Normalize();
buffer.AddZeroTerminatorIfMissing();
this.setTextOriginal(this.clipboardUserDataOriginal, buffer.Data);
}
@ -91,159 +91,9 @@ internal sealed unsafe class ImGuiClipboardFunctionProvider : IServiceType, IDis
_ = this.getTextOriginal(this.clipboardUserDataOriginal);
var buffer = ImGuiCurrentContextClipboardHandlerData;
Utf8Utils.TrimNullTerminator(ref buffer);
Utf8Utils.Normalize(ref buffer);
Utf8Utils.AddNullTerminatorIfMissing(ref buffer);
buffer.TrimZeroTerminator();
buffer.Utf8Normalize();
buffer.AddZeroTerminatorIfMissing();
return buffer.Data;
}
private static class Utf8Utils
{
/// <summary>
/// Sets <paramref name="buf"/> from <paramref name="psz"/>, a null terminated UTF-8 string.
/// </summary>
/// <param name="buf">The target buffer. It will not contain a null terminator.</param>
/// <param name="psz">The pointer to the null-terminated UTF-8 string.</param>
public static void SetFromNullTerminatedBytes(ref ImVectorWrapper<byte> buf, byte* psz)
{
var len = 0;
while (psz[len] != 0)
len++;
buf.Clear();
buf.AddRange(new Span<byte>(psz, len));
}
/// <summary>
/// Removes the null terminator.
/// </summary>
/// <param name="buf">The UTF-8 string buffer.</param>
public static void TrimNullTerminator(ref ImVectorWrapper<byte> buf)
{
while (buf.Length > 0 && buf[^1] == 0)
buf.LengthUnsafe--;
}
/// <summary>
/// Adds a null terminator to the buffer.
/// </summary>
/// <param name="buf">The buffer.</param>
public static void AddNullTerminatorIfMissing(ref ImVectorWrapper<byte> buf)
{
if (buf.Length > 0 && buf[^1] == 0)
return;
buf.Add(0);
}
/// <summary>
/// Counts the number of bytes for the UTF-8 character.
/// </summary>
/// <param name="b">The bytes.</param>
/// <param name="avail">Available number of bytes.</param>
/// <returns>Number of bytes taken, or -1 if the byte was invalid.</returns>
public static int CountBytes(byte* b, int avail)
{
if (avail <= 0)
return 0;
if ((b[0] & 0x80) == 0)
return 1;
if ((b[0] & 0xE0) == 0xC0 && avail >= 2)
return (b[1] & 0xC0) == 0x80 ? 2 : -1;
if ((b[0] & 0xF0) == 0xE0 && avail >= 3)
return (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 ? 3 : -1;
if ((b[0] & 0xF8) == 0xF0 && avail >= 4)
return (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 && (b[3] & 0xC0) == 0x80 ? 4 : -1;
return -1;
}
/// <summary>
/// Gets the codepoint.
/// </summary>
/// <param name="b">The bytes.</param>
/// <param name="cb">The result from <see cref="CountBytes"/>.</param>
/// <returns>The codepoint, or \xFFFD replacement character if failed.</returns>
public static int GetCodepoint(byte* b, int cb) => cb switch
{
1 => b[0],
2 => ((b[0] & 0x1F) << 6) | (b[1] & 0x3F),
3 => ((b[0] & 0x0F) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F),
4 => ((b[0] & 0x07) << 18) | ((b[1] & 0x3F) << 12) | ((b[2] & 0x3F) << 6) | (b[3] & 0x3F),
_ => 0xFFFD,
};
/// <summary>
/// Replaces a sequence with another.
/// </summary>
/// <param name="buf">The buffer.</param>
/// <param name="offset">Offset of the sequence to be replaced.</param>
/// <param name="length">Length of the sequence to be replaced.</param>
/// <param name="replacement">The replacement sequence.</param>
/// <returns>The length of <paramref name="replacement"/>.</returns>
public static int ReplaceSequence(
ref ImVectorWrapper<byte> buf,
int offset,
int length,
ReadOnlySpan<byte> replacement)
{
var i = 0;
for (; i < replacement.Length; i++)
{
if (length >= i + 1)
buf[offset++] = replacement[i];
else
buf.Insert(offset++, replacement[i]);
}
for (; i < length; i++)
buf.RemoveAt(offset);
return replacement.Length;
}
/// <summary>
/// Normalize the given text for our use case.
/// </summary>
/// <param name="buf">The buffer.</param>
public static void Normalize(ref ImVectorWrapper<byte> buf)
{
// Ensure an implicit null after the end of the string.
buf.EnsureCapacity(buf.Length + 1);
buf.StorageSpan[buf.Length] = 0;
for (var i = 0; i < buf.Length;)
{
var cb = CountBytes(buf.Data + i, buf.Length - i);
var currInt = GetCodepoint(buf.Data + i, cb);
switch (currInt)
{
// Note that buf.Data[i + 1] is always defined. See the beginning of the function.
case '\r' when buf.Data[i + 1] == '\n': // Already CR LF?
i += 2;
continue;
case 0xFFFE or 0xFFFF: // Simply invalid
case > char.MaxValue: // ImWchar is same size with char; does not support
case >= 0xD800 and <= 0xDBFF: // UTF-16 surrogate; does not support
// Replace with \uFFFD in UTF-8: EF BF BD
i += ReplaceSequence(ref buf, i, cb, "\uFFFD"u8);
break;
// See String.Manipulation.cs: IndexOfNewlineChar.
case '\r': // CR; Carriage Return
case '\n': // LF; Line Feed
case '\f': // FF; Form Feed
case '\u0085': // NEL; Next Line
case '\u2028': // LS; Line Separator
case '\u2029': // PS; Paragraph Separator
i += ReplaceSequence(ref buf, i, cb, "\r\n"u8);
break;
default:
// Not a newline char.
i += cb;
break;
}
}
}
}
}

View file

@ -0,0 +1,207 @@
using System.Numerics;
using System.Text;
namespace Dalamud.Interface.Utility;
/// <summary>
/// Utility methods for <see cref="ImVectorWrapper{T}"/>.
/// </summary>
public static partial class ImVectorWrapper
{
/// <summary>
/// Appends <paramref name="buf"/> from <paramref name="psz"/>, a zero terminated sequence.
/// </summary>
/// <typeparam name="T">The element type.</typeparam>
/// <param name="buf">The target buffer.</param>
/// <param name="psz">The pointer to the zero-terminated sequence.</param>
public static unsafe void AppendZeroTerminatedSequence<T>(this ref ImVectorWrapper<T> buf, T* psz)
where T : unmanaged, INumber<T>
{
var len = 0;
while (psz[len] != default)
len++;
buf.AddRange(new Span<T>(psz, len));
}
/// <summary>
/// Sets <paramref name="buf"/> from <paramref name="psz"/>, a zero terminated sequence.
/// </summary>
/// <typeparam name="T">The element type.</typeparam>
/// <param name="buf">The target buffer.</param>
/// <param name="psz">The pointer to the zero-terminated sequence.</param>
public static unsafe void SetFromZeroTerminatedSequence<T>(this ref ImVectorWrapper<T> buf, T* psz)
where T : unmanaged, INumber<T>
{
buf.Clear();
buf.AppendZeroTerminatedSequence(psz);
}
/// <summary>
/// Trims zero terminator(s).
/// </summary>
/// <typeparam name="T">The element type.</typeparam>
/// <param name="buf">The buffer.</param>
public static void TrimZeroTerminator<T>(this ref ImVectorWrapper<T> buf)
where T : unmanaged, INumber<T>
{
ref var len = ref buf.LengthUnsafe;
while (len > 0 && buf[len - 1] == default)
len--;
}
/// <summary>
/// Adds a zero terminator to the buffer, if missing.
/// </summary>
/// <typeparam name="T">The element type.</typeparam>
/// <param name="buf">The buffer.</param>
public static void AddZeroTerminatorIfMissing<T>(this ref ImVectorWrapper<T> buf)
where T : unmanaged, INumber<T>
{
if (buf.Length > 0 && buf[^1] == default)
return;
buf.Add(default);
}
/// <summary>
/// Gets the codepoint at the given offset.
/// </summary>
/// <param name="buf">The buffer containing bytes in UTF-8.</param>
/// <param name="offset">The offset in bytes.</param>
/// <param name="numBytes">Number of bytes occupied by the character, invalid or not.</param>
/// <param name="invalid">The fallback character, if no valid UTF-8 character could be found.</param>
/// <returns>The parsed codepoint, or <paramref name="invalid"/> if it could not be parsed correctly.</returns>
public static unsafe int Utf8GetCodepoint(
this in ImVectorWrapper<byte> buf,
int offset,
out int numBytes,
int invalid = 0xFFFD)
{
var cb = buf.LengthUnsafe - offset;
if (cb <= 0)
{
numBytes = 0;
return invalid;
}
numBytes = 1;
var b = buf.DataUnsafe + offset;
if ((b[0] & 0x80) == 0)
return b[0];
if (cb < 2 || (b[1] & 0xC0) != 0x80)
return invalid;
if ((b[0] & 0xE0) == 0xC0)
{
numBytes = 2;
return ((b[0] & 0x1F) << 6) | (b[1] & 0x3F);
}
if (cb < 3 || (b[2] & 0xC0) != 0x80)
return invalid;
if ((b[0] & 0xF0) == 0xE0)
{
numBytes = 3;
return ((b[0] & 0x0F) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F);
}
if (cb < 4 || (b[3] & 0xC0) != 0x80)
return invalid;
if ((b[0] & 0xF8) == 0xF0)
{
numBytes = 4;
return ((b[0] & 0x07) << 18) | ((b[1] & 0x3F) << 12) | ((b[2] & 0x3F) << 6) | (b[3] & 0x3F);
}
return invalid;
}
/// <summary>
/// Normalizes the given UTF-8 string.<br />
/// Using the default values will ensure the best interop between the game, ImGui, and Windows.
/// </summary>
/// <param name="buf">The buffer containing bytes in UTF-8.</param>
/// <param name="lineEnding">The replacement line ending. If empty, CR LF will be used.</param>
/// <param name="invalidChar">The replacement invalid character. If empty, U+FFFD REPLACEMENT CHARACTER will be used.</param>
/// <param name="normalizeLineEndings">Specify whether to normalize the line endings.</param>
/// <param name="sanitizeInvalidCharacters">Specify whether to replace invalid characters.</param>
/// <param name="sanitizeNonUcs2Characters">Specify whether to replace characters that requires the use of surrogate, when encoded in UTF-16.</param>
/// <param name="sanitizeSurrogates">Specify whether to make sense out of WTF-8.</param>
public static unsafe void Utf8Normalize(
this ref ImVectorWrapper<byte> buf,
ReadOnlySpan<byte> lineEnding = default,
ReadOnlySpan<byte> invalidChar = default,
bool normalizeLineEndings = true,
bool sanitizeInvalidCharacters = true,
bool sanitizeNonUcs2Characters = true,
bool sanitizeSurrogates = true)
{
if (lineEnding.IsEmpty)
lineEnding = "\r\n"u8;
if (invalidChar.IsEmpty)
invalidChar = "\uFFFD"u8;
// Ensure an implicit null after the end of the string.
buf.EnsureCapacity(buf.Length + 1);
buf.StorageSpan[buf.Length] = 0;
Span<char> charsBuf = stackalloc char[2];
Span<byte> bytesBuf = stackalloc byte[4];
for (var i = 0; i < buf.Length;)
{
var c1 = buf.Utf8GetCodepoint(i, out var cb, -1);
switch (c1)
{
// Note that buf.Data[i + 1] is always defined. See the beginning of the function.
case '\r' when buf.Data[i + 1] == '\n':
// If it's already CR LF, it passes all filters.
i += 2;
break;
case >= 0xD800 and <= 0xDFFF when sanitizeSurrogates:
{
var c2 = buf.Utf8GetCodepoint(i + cb, out var cb2);
if (c1 is < 0xD800 or >= 0xDC00)
goto case -2;
if (c2 is < 0xDC00 or >= 0xE000)
goto case -2;
charsBuf[0] = unchecked((char)c1);
charsBuf[1] = unchecked((char)c2);
var bytesLen = Encoding.UTF8.GetBytes(charsBuf, bytesBuf);
buf.ReplaceRange(i, cb + cb2, bytesBuf[..bytesLen]);
// Do not alter i; now that the WTF-8 has been dealt with, apply other filters.
break;
}
case -2:
case -1 or 0xFFFE or 0xFFFF when sanitizeInvalidCharacters:
case >= 0xD800 and <= 0xDFFF when sanitizeInvalidCharacters:
case > char.MaxValue when sanitizeNonUcs2Characters:
{
buf.ReplaceRange(i, cb, invalidChar);
i += invalidChar.Length;
break;
}
// See String.Manipulation.cs: IndexOfNewlineChar.
// CR; Carriage Return
// LF; Line Feed
// FF; Form Feed
// NEL; Next Line
// LS; Line Separator
// PS; Paragraph Separator
case '\r' or '\n' or '\f' or '\u0085' or '\u2028' or '\u2029' when normalizeLineEndings:
{
buf.ReplaceRange(i, cb, lineEnding);
i += lineEnding.Length;
break;
}
default:
i += cb;
break;
}
}
}
}

View file

@ -13,7 +13,7 @@ namespace Dalamud.Interface.Utility;
/// <summary>
/// Utility methods for <see cref="ImVectorWrapper{T}"/>.
/// </summary>
public static class ImVectorWrapper
public static partial class ImVectorWrapper
{
/// <summary>
/// Creates a new instance of the <see cref="ImVectorWrapper{T}"/> struct, initialized with
@ -394,7 +394,7 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
}
/// <inheritdoc cref="List{T}.AddRange"/>
public void AddRange(Span<T> items)
public void AddRange(ReadOnlySpan<T> items)
{
this.EnsureCapacityExponential(this.LengthUnsafe + items.Length);
foreach (var item in items)
@ -466,7 +466,7 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
/// <param name="capacity">The minimum capacity to ensure.</param>
/// <returns>Whether the capacity has been changed.</returns>
public bool EnsureCapacityExponential(int capacity)
=> this.EnsureCapacity(1 << ((sizeof(int) * 8) - BitOperations.LeadingZeroCount((uint)this.LengthUnsafe)));
=> this.EnsureCapacity(1 << ((sizeof(int) * 8) - BitOperations.LeadingZeroCount((uint)capacity)));
/// <summary>
/// Resizes the underlying array and fills with zeroes if grown.
@ -545,8 +545,8 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
}
}
/// <inheritdoc cref="List{T}.AddRange"/>
public void InsertRange(int index, Span<T> items)
/// <inheritdoc cref="List{T}.InsertRange"/>
public void InsertRange(int index, ReadOnlySpan<T> items)
{
this.EnsureCapacityExponential(this.LengthUnsafe + items.Length);
var num = this.LengthUnsafe - index;
@ -561,16 +561,7 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
/// </summary>
/// <param name="index">The index.</param>
/// <param name="skipDestroyer">Whether to skip calling the destroyer function.</param>
public void RemoveAt(int index, bool skipDestroyer = false)
{
this.EnsureIndex(index);
var num = this.LengthUnsafe - index - 1;
if (!skipDestroyer)
this.destroyer?.Invoke(&this.DataUnsafe[index]);
Buffer.MemoryCopy(this.DataUnsafe + index + 1, this.DataUnsafe + index, num * sizeof(T), num * sizeof(T));
this.LengthUnsafe -= 1;
}
public void RemoveAt(int index, bool skipDestroyer = false) => this.RemoveRange(index, 1, skipDestroyer);
/// <inheritdoc/>
void IList<T>.RemoveAt(int index) => this.RemoveAt(index);
@ -578,6 +569,73 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
/// <inheritdoc/>
void IList.RemoveAt(int index) => this.RemoveAt(index);
/// <summary>
/// Removes <paramref name="count"/> elements at the given index.
/// </summary>
/// <param name="index">The index of the first item to remove.</param>
/// <param name="count">Number of items to remove.</param>
/// <param name="skipDestroyer">Whether to skip calling the destroyer function.</param>
public void RemoveRange(int index, int count, bool skipDestroyer = false)
{
this.EnsureIndex(index);
if (count < 0)
throw new ArgumentOutOfRangeException(nameof(count), count, "Must be positive.");
if (count == 0)
return;
if (!skipDestroyer && this.destroyer is { } d)
{
for (var i = 0; i < count; i++)
d(this.DataUnsafe + index + i);
}
var numItemsToMove = this.LengthUnsafe - index - count;
var numBytesToMove = numItemsToMove * sizeof(T);
Buffer.MemoryCopy(this.DataUnsafe + index + count, this.DataUnsafe + index, numBytesToMove, numBytesToMove);
this.LengthUnsafe -= count;
}
/// <summary>
/// Replaces a sequence at given offset <paramref name="index"/> of <paramref name="count"/> items with
/// <paramref name="replacement"/>.
/// </summary>
/// <param name="index">The index of the first item to be replaced.</param>
/// <param name="count">The number of items to be replaced.</param>
/// <param name="replacement">The replacement.</param>
/// <param name="skipDestroyer">Whether to skip calling the destroyer function.</param>
public void ReplaceRange(int index, int count, ReadOnlySpan<T> replacement, bool skipDestroyer = false)
{
this.EnsureIndex(index);
if (count < 0)
throw new ArgumentOutOfRangeException(nameof(count), count, "Must be positive.");
if (count == 0)
return;
// Ensure the capacity first, so that we can safely destroy the items first.
this.EnsureCapacityExponential((this.LengthUnsafe + replacement.Length) - count);
if (!skipDestroyer && this.destroyer is { } d)
{
for (var i = 0; i < count; i++)
d(this.DataUnsafe + index + i);
}
if (count == replacement.Length)
{
replacement.CopyTo(this.DataSpan[index..]);
}
else if (count > replacement.Length)
{
replacement.CopyTo(this.DataSpan[index..]);
this.RemoveRange(index + replacement.Length, count - replacement.Length);
}
else
{
replacement[..count].CopyTo(this.DataSpan[index..]);
this.InsertRange(index + count, replacement[count..]);
}
}
/// <summary>
/// Sets the capacity exactly as requested.
/// </summary>
@ -615,9 +673,6 @@ public unsafe struct ImVectorWrapper<T> : IList<T>, IList, IReadOnlyList<T>, IDi
if (!oldSpan.IsEmpty && !newSpan.IsEmpty)
oldSpan[..this.LengthUnsafe].CopyTo(newSpan);
// #if DEBUG
// new Span<byte>(newAlloc + this.LengthUnsafe, sizeof(T) * (capacity - this.LengthUnsafe)).Fill(0xCC);
// #endif
if (oldAlloc != null)
ImGuiNative.igMemFree(oldAlloc);