SeString renderer: fix colors, add link support (#1983)

* Add coloring options

* Add link support

* simplify

* fixes

* Prevent EncodeString from causing crashes

* Fix link range application and add link example

* Fix test widget

* Make DalamudLinkPayload backward compatible

* make it better to use

* make it better to use

* Mark SeString rendering functions experimental via comments

* rename

* Simplify

* Make sestring draw functions take in draw params

* Improvements
This commit is contained in:
srkizer 2024-08-02 02:36:11 +09:00 committed by GitHub
parent 5fdd88b488
commit b6eb18d550
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 2009 additions and 766 deletions

View file

@ -0,0 +1,149 @@
using System.IO;
using System.Numerics;
using System.Runtime.InteropServices;
using Lumina.Data;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal;
/// <summary>Game font data file.</summary>
internal sealed unsafe class GfdFile : FileResource
{
/// <summary>Gets or sets the file header.</summary>
public GfdHeader Header { get; set; }
/// <summary>Gets or sets the entries.</summary>
public GfdEntry[] Entries { get; set; } = [];
/// <inheritdoc/>
public override void LoadFile()
{
if (this.DataSpan.Length < sizeof(GfdHeader))
throw new InvalidDataException($"Not enough space for a {nameof(GfdHeader)}");
if (this.DataSpan.Length < sizeof(GfdHeader) + (this.Header.Count * sizeof(GfdEntry)))
throw new InvalidDataException($"Not enough space for all the {nameof(GfdEntry)}");
this.Header = MemoryMarshal.AsRef<GfdHeader>(this.DataSpan);
this.Entries = MemoryMarshal.Cast<byte, GfdEntry>(this.DataSpan[sizeof(GfdHeader)..]).ToArray();
}
/// <summary>Attempts to get an entry.</summary>
/// <param name="iconId">The icon ID.</param>
/// <param name="entry">The entry.</param>
/// <param name="followRedirect">Whether to follow redirects.</param>
/// <returns><c>true</c> if found.</returns>
public bool TryGetEntry(uint iconId, out GfdEntry entry, bool followRedirect = true)
{
if (iconId == 0)
{
entry = default;
return false;
}
var entries = this.Entries;
if (iconId <= this.Entries.Length && entries[(int)(iconId - 1)].Id == iconId)
{
if (iconId <= entries.Length)
{
entry = entries[(int)(iconId - 1)];
return !entry.IsEmpty;
}
entry = default;
return false;
}
var lo = 0;
var hi = entries.Length;
while (lo <= hi)
{
var i = lo + ((hi - lo) >> 1);
if (entries[i].Id == iconId)
{
if (followRedirect && entries[i].Redirect != 0)
{
iconId = entries[i].Redirect;
lo = 0;
hi = entries.Length;
continue;
}
entry = entries[i];
return !entry.IsEmpty;
}
if (entries[i].Id < iconId)
lo = i + 1;
else
hi = i - 1;
}
entry = default;
return false;
}
/// <summary>Header of a .gfd file.</summary>
[StructLayout(LayoutKind.Sequential)]
public struct GfdHeader
{
/// <summary>Signature: "gftd0100".</summary>
public fixed byte Signature[8];
/// <summary>Number of entries.</summary>
public int Count;
/// <summary>Unused/unknown.</summary>
public fixed byte Padding[4];
}
/// <summary>An entry of a .gfd file.</summary>
[StructLayout(LayoutKind.Sequential, Size = 0x10)]
public struct GfdEntry
{
/// <summary>ID of the entry.</summary>
public ushort Id;
/// <summary>The left offset of the entry.</summary>
public ushort Left;
/// <summary>The top offset of the entry.</summary>
public ushort Top;
/// <summary>The width of the entry.</summary>
public ushort Width;
/// <summary>The height of the entry.</summary>
public ushort Height;
/// <summary>Unknown/unused.</summary>
public ushort Unk0A;
/// <summary>The redirected entry, maybe.</summary>
public ushort Redirect;
/// <summary>Unknown/unused.</summary>
public ushort Unk0E;
/// <summary>Gets a value indicating whether this entry is effectively empty.</summary>
public bool IsEmpty => this.Width == 0 || this.Height == 0;
/// <summary>Gets or sets the size of this entry.</summary>
public Vector2 Size
{
get => new(this.Width, this.Height);
set => (this.Width, this.Height) = (checked((ushort)value.X), checked((ushort)value.Y));
}
/// <summary>Gets the UV0 of this entry.</summary>
public Vector2 Uv0 => new(this.Left / 512f, this.Top / 1024f);
/// <summary>Gets the UV1 of this entry.</summary>
public Vector2 Uv1 => new((this.Left + this.Width) / 512f, (this.Top + this.Height) / 1024f);
/// <summary>Gets the UV0 of the HQ version of this entry.</summary>
public Vector2 HqUv0 => new(this.Left / 256f, (this.Top + 170.5f) / 512f);
/// <summary>Gets the UV1 of the HQ version of this entry.</summary>
public Vector2 HqUv1 => new((this.Left + this.Width) / 256f, (this.Top + this.Height + 170.5f) / 512f);
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,366 @@
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using static Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing.UnicodeEastAsianWidthClass;
using static Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing.UnicodeGeneralCategory;
using static Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing.UnicodeLineBreakClass;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary>Enumerates line break offsets.</summary>
internal ref struct LineBreakEnumerator
{
private readonly UtfEnumeratorFlags enumeratorFlags;
private readonly int dataLength;
private UtfEnumerator enumerator;
private Entry class1;
private Entry class2;
private Entry space1;
private Entry space2;
private bool spaceStreak;
private int consecutiveRegionalIndicators;
private bool finished;
/// <summary>Initializes a new instance of the <see cref="LineBreakEnumerator"/> struct.</summary>
/// <param name="data">UTF-N byte sequence.</param>
/// <param name="enumeratorFlags">Flags to pass to sub-enumerator.</param>
public LineBreakEnumerator(
ReadOnlySpan<byte> data,
UtfEnumeratorFlags enumeratorFlags = UtfEnumeratorFlags.Default)
{
this.enumerator = UtfEnumerator.From(data, enumeratorFlags);
this.enumeratorFlags = enumeratorFlags;
this.dataLength = data.Length;
}
private LineBreakEnumerator(
int dataLength,
UtfEnumerator enumerator,
UtfEnumeratorFlags enumeratorFlags)
{
this.dataLength = dataLength;
this.enumerator = enumerator;
this.enumeratorFlags = enumeratorFlags;
}
private enum LineBreakMode : byte
{
Prohibited,
Mandatory,
Optional,
}
/// <inheritdoc cref="IEnumerator{T}.Current"/>
public (int ByteOffset, bool Mandatory) Current { get; private set; }
/// <inheritdoc cref="IEnumerator.MoveNext"/>
[SuppressMessage("ReSharper", "ConvertIfStatementToSwitchStatement", Justification = "No")]
public bool MoveNext()
{
if (this.finished)
return false;
while (this.enumerator.MoveNext())
{
var effectiveInt =
this.enumerator.Current.IsSeStringPayload
? UtfEnumerator.RepresentativeCharFor(this.enumerator.Current.MacroCode)
: this.enumerator.Current.EffectiveInt;
if (effectiveInt == -1)
continue;
switch (this.HandleCharacter(effectiveInt))
{
case LineBreakMode.Mandatory:
this.Current = (this.enumerator.Current.ByteOffset, true);
return true;
case LineBreakMode.Optional:
this.Current = (this.enumerator.Current.ByteOffset, false);
return true;
case LineBreakMode.Prohibited:
default:
continue;
}
}
// Start and end of text:
// LB3 Always break at the end of text.
this.Current = (this.dataLength, true);
this.finished = true;
return true;
}
/// <inheritdoc cref="IEnumerable{T}.GetEnumerator"/>
public LineBreakEnumerator GetEnumerator() =>
new(this.dataLength, this.enumerator.GetEnumerator(), this.enumeratorFlags);
private LineBreakMode HandleCharacter(int c)
{
// https://unicode.org/reports/tr14/#Algorithm
// 6.1 Non-tailorable Line Breaking Rules
// Resolve line breaking classes:
// LB1 Assign a line breaking class to each code point of the input.
// => done inside Entry ctor
var curr = new Entry(c);
var (prev1, prev2) = (this.class1, this.class2);
(this.class2, this.class1) = (this.class1, curr);
if (curr.Class == RI)
this.consecutiveRegionalIndicators++;
else
this.consecutiveRegionalIndicators = 0;
var (prevSpaceStreak, prevSpace1, prevSpace2) = (this.spaceStreak, this.space1, this.space2);
this.spaceStreak = curr.Class == SP;
if (this.spaceStreak && !prevSpaceStreak)
(this.space1, this.space2) = (prev1, prev2);
if (!prevSpaceStreak)
(prevSpace1, prevSpace2) = (prev1, prev2);
// Start and end of text:
// LB2 Never break at the start of text.
if (prev1.Class is sot)
return LineBreakMode.Prohibited;
// Mandatory breaks:
// LB4 Always break after hard line breaks.
if (prev1.Class is BK)
return LineBreakMode.Mandatory;
// LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
if (prev2.Class is CR && prev1.Class is LF)
return LineBreakMode.Mandatory;
if (prev1.Class is CR && curr.Class is LF)
return LineBreakMode.Prohibited;
if (prev1.Class is CR or LF or NL)
return LineBreakMode.Mandatory;
// LB6 Do not break before hard line breaks.
if (curr.Class is BK or CR or LF or NL)
return LineBreakMode.Prohibited;
// Explicit breaks and non-breaks:
// LB7 Do not break before spaces or zero width space.
if (curr.Class is SP or ZW)
return LineBreakMode.Prohibited;
// LB8 Break before any character following a zero-width space, even if one or more spaces intervene.
if (prev1.Class is ZW)
return LineBreakMode.Optional;
if (prevSpaceStreak && prevSpace1.Class is ZW)
return LineBreakMode.Optional;
// LB8a Do not break after a zero width joiner.
if (prev1.Class is ZWJ)
return LineBreakMode.Prohibited;
// Combining marks:
// LB9 Do not break a combining character sequence; treat it as if it has the line breaking class of the base character in all of the following rules. Treat ZWJ as if it were CM.
// ?
// LB10 Treat any remaining combining mark or ZWJ as AL.
if (curr.Class is CM or ZWJ)
this.class1 = curr = new('A');
// Word joiner:
// LB11 Do not break before or after Word joiner and related characters.
if (prev1.Class is WJ || curr.Class is WJ)
return LineBreakMode.Prohibited;
// Non-breaking characters:
// LB12 Do not break after NBSP and related characters.
if (prev1.Class is GL)
return LineBreakMode.Prohibited;
// 6.2 Tailorable Line Breaking Rules
// Non-breaking characters:
// LB12a Do not break before NBSP and related characters, except after spaces and hyphens.
if (prev1.Class is not SP and not BA and not HY &&
curr.Class is GL)
return LineBreakMode.Prohibited;
// Opening and closing:
// LB13 Do not break before ] or ! or ; or /, even after spaces.
if (curr.Class is CL or CP or EX or IS or SY)
return LineBreakMode.Prohibited;
// LB14 Do not break after [, even after spaces.
if (prevSpace1.Class is OP)
return LineBreakMode.Prohibited;
// LB15a Do not break after an unresolved initial punctuation that lies at the start of the line, after a space, after opening punctuation, or after an unresolved quotation mark, even after spaces.
if (prevSpace2.Class is sot or BK or CR or LF or NL or OP or QU or GL or SP or ZW &&
prevSpace1.Class is QU &&
prevSpace1.GeneralCategory is Pi)
return LineBreakMode.Prohibited;
var next = this.enumerator.TryPeekNext(out var nextSubsequence, out _)
? new Entry(nextSubsequence.EffectiveChar)
: new(eot);
// LB15b Do not break before an unresolved final punctuation that lies at the end of the line, before a space, before a prohibited break, or before an unresolved quotation mark, even after spaces.
if (curr.Class is QU && curr.GeneralCategory is Pf &&
next.Class is SP or GL or WJ or CL or QU or CP or EX or IS or SY or BK or CR or LF or NL or ZW or eot)
return LineBreakMode.Prohibited;
// LB16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.
if (prevSpace1.Class is CL or CP && next.Class is NS)
return LineBreakMode.Prohibited;
// LB17 Do not break within ‘——’, even with intervening spaces.
if (prevSpace1.Class is B2 && next.Class is B2)
return LineBreakMode.Prohibited;
// Spaces:
// LB18 Break after spaces.
if (prev1.Class is SP)
return LineBreakMode.Optional;
// Special case rules:
// LB19 Do not break before or after quotation marks, such as .
if (prev1.Class is QU || curr.Class is QU)
return LineBreakMode.Prohibited;
// LB20 Break before and after unresolved CB.
if (prev1.Class is CB || curr.Class is CB)
return LineBreakMode.Optional;
// LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana, and other non-starters, or after acute accents.
if (curr.Class is BA or HY or NS || prev1.Class is BB)
return LineBreakMode.Prohibited;
// LB21a Don't break after Hebrew + Hyphen.
if (prev2.Class is HL && prev1.Class is HY or BA)
return LineBreakMode.Prohibited;
// LB21b Dont break between Solidus and Hebrew letters.
if (prev1.Class is SY && curr.Class is HL)
return LineBreakMode.Prohibited;
// LB22 Do not break before ellipses.
if (curr.Class is IN)
return LineBreakMode.Prohibited;
// Numbers:
// LB23 Do not break between digits and letters.
if (prev1.Class is AL or HL && curr.Class is NU)
return LineBreakMode.Prohibited;
if (prev1.Class is NU && curr.Class is AL or HL)
return LineBreakMode.Prohibited;
// LB23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
if (prev1.Class is PR && curr.Class is ID or EB or EM)
return LineBreakMode.Prohibited;
if (prev1.Class is ID or EB or EM && curr.Class is PR)
return LineBreakMode.Prohibited;
// LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.
if (prev1.Class is PR or PO && curr.Class is AL or HL)
return LineBreakMode.Prohibited;
if (prev1.Class is AL or HL && curr.Class is PR or PO)
return LineBreakMode.Prohibited;
// LB25 Do not break between the following pairs of classes relevant to numbers:
if ((prev1.Class, curr.Class) is (CL, PO) or (CP, PO) or (CL, PR) or (CP, PR) or (NU, PO) or (NU, PR)
or (PO, OP) or (PO, NU) or (PR, OP) or (PR, NU) or (HY, NU) or (IS, NU) or (NU, NU) or (SY, NU))
return LineBreakMode.Prohibited;
// Korean syllable blocks:
// LB26 Do not break a Korean syllable.
if (prev1.Class is JL && curr.Class is JL or JV or H2 or H3)
return LineBreakMode.Prohibited;
if (prev1.Class is JV or H2 && curr.Class is JV or JT)
return LineBreakMode.Prohibited;
// LB27 Treat a Korean Syllable Block the same as ID.
if (prev1.Class is JL or JV or JT or H2 or H3 && curr.Class is PO)
return LineBreakMode.Prohibited;
if (prev1.Class is PR && curr.Class is JL or JV or JT or H2 or H3)
return LineBreakMode.Prohibited;
// Finally, join alphabetic letters into words and break everything else.
// LB28 Do not break between alphabetics (“at”).
if (prev1.Class is AL or HL && curr.Class is AL or HL)
return LineBreakMode.Prohibited;
// LB28a Do not break inside the orthographic syllables of Brahmic scripts.
// TODO: what's "◌"?
if (prev1.Class is AP && curr.Class is AK or AS)
return LineBreakMode.Prohibited;
if (prev1.Class is AK or AS && curr.Class is VF or VI)
return LineBreakMode.Prohibited;
if (prev2.Class is AK or AS && prev1.Class is VI && curr.Class is AK)
return LineBreakMode.Prohibited;
if (prev1.Class is AK or AS && curr.Class is AK or AS && next.Class is VF)
return LineBreakMode.Prohibited;
// LB29 Do not break between numeric punctuation and alphabetics (“e.g.”).
if (prev1.Class is IS && curr.Class is AL or HL)
return LineBreakMode.Prohibited;
// LB30 Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses.
if (prev1.Class is AL or HL or NU &&
curr.Class is OP && curr.EastAsianWidth is not F and not W and not H)
return LineBreakMode.Prohibited;
if (prev1.Class is CP && prev1.EastAsianWidth is not F and not W and not H &&
curr.Class is AL or HL or NU)
return LineBreakMode.Prohibited;
// LB30a Break between two regional indicator symbols if and only if there are an even number of regional indicators preceding the position of the break.
if (this.consecutiveRegionalIndicators % 2 == 0)
return LineBreakMode.Optional;
// LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
if (prev1.Class is EB && curr.Class is EM)
return LineBreakMode.Prohibited;
if (prev1.GeneralCategory is Cn &&
(prev1.EmojiProperty & UnicodeEmojiProperty.Extended_Pictographic) != 0 &&
curr.Class is EM)
return LineBreakMode.Prohibited;
// LB31 Break everywhere else.
return LineBreakMode.Optional;
}
private readonly struct Entry
{
public readonly UnicodeLineBreakClass Class;
public readonly UnicodeGeneralCategory GeneralCategory;
public readonly UnicodeEastAsianWidthClass EastAsianWidth;
public readonly UnicodeEmojiProperty EmojiProperty;
public Entry(int c)
{
this.Class = UnicodeData.LineBreak[c] switch
{
AI or SG or XX => AL,
SA when UnicodeData.GeneralCategory[c] is Mn or Mc => CM,
SA => AL,
CJ => NS,
var x => x,
};
this.GeneralCategory = UnicodeData.GeneralCategory[c];
this.EastAsianWidth = UnicodeData.EastAsianWidth[c];
this.EmojiProperty = UnicodeData.EmojiProperty[c];
}
public Entry(
UnicodeLineBreakClass lineBreakClass,
UnicodeGeneralCategory generalCategory = Cn,
UnicodeEastAsianWidthClass eastAsianWidth = N,
UnicodeEmojiProperty emojiProperty = 0)
{
this.Class = lineBreakClass;
this.GeneralCategory = generalCategory;
this.EastAsianWidth = eastAsianWidth;
this.EmojiProperty = emojiProperty;
}
}
}

View file

@ -0,0 +1,118 @@
using System.Globalization;
using System.IO;
using System.Reflection;
using System.Runtime.CompilerServices;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary>Stores unicode data.</summary>
internal static class UnicodeData
{
/// <summary>Line break classes.</summary>
public static readonly UnicodeLineBreakClass[] LineBreak;
/// <summary>East asian width classes.</summary>
public static readonly UnicodeEastAsianWidthClass[] EastAsianWidth;
/// <summary>General categories.</summary>
public static readonly UnicodeGeneralCategory[] GeneralCategory;
/// <summary>Emoji properties.</summary>
public static readonly UnicodeEmojiProperty[] EmojiProperty;
static UnicodeData()
{
// File is from https://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt
LineBreak =
Parse(
typeof(UnicodeData).Assembly.GetManifestResourceStream("LineBreak.txt")!,
UnicodeLineBreakClass.XX);
// https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
EastAsianWidth =
Parse(
typeof(UnicodeData).Assembly.GetManifestResourceStream("EastAsianWidth.txt")!,
UnicodeEastAsianWidthClass.N);
// https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt
GeneralCategory =
Parse(
typeof(UnicodeData).Assembly.GetManifestResourceStream("DerivedGeneralCategory.txt")!,
UnicodeGeneralCategory.Cn);
// https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
EmojiProperty =
Parse(
typeof(UnicodeData).Assembly.GetManifestResourceStream("emoji-data.txt")!,
default(UnicodeEmojiProperty));
}
private static T[] Parse<T>(Stream stream, T defaultValue)
where T : unmanaged, Enum
{
if (Unsafe.SizeOf<T>() != 1)
throw new InvalidOperationException("Enum must be of size 1 byte");
var isFlag = typeof(T).GetCustomAttribute<FlagsAttribute>() is not null;
using var sr = new StreamReader(stream);
var res = new T[0x110000];
res.AsSpan().Fill(defaultValue);
for (string? line; (line = sr.ReadLine()) != null;)
{
var span = line.AsSpan();
// strip comment
var i = span.IndexOf('#');
if (i != -1)
span = span[..i];
span = span.Trim();
if (span.IsEmpty)
continue;
// find delimiter
i = span.IndexOf(';');
if (i == -1)
throw new InvalidDataException();
var range = span[..i].Trim();
var entry = Enum.Parse<T>(span[(i + 1)..].Trim());
i = range.IndexOf("..");
int from, to;
if (i == -1)
{
from = int.Parse(range, NumberStyles.HexNumber);
to = from + 1;
}
else
{
from = int.Parse(range[..i], NumberStyles.HexNumber);
to = int.Parse(range[(i + 2)..], NumberStyles.HexNumber) + 1;
}
if (from > char.MaxValue)
continue;
from = Math.Min(from, res.Length);
to = Math.Min(to, res.Length);
if (isFlag)
{
foreach (ref var v in res.AsSpan()[from..to])
{
unsafe
{
fixed (void* p = &v)
*(byte*)p |= *(byte*)&entry;
}
}
}
else
{
res.AsSpan()[from..to].Fill(entry);
}
}
return res;
}
}

View file

@ -0,0 +1,26 @@
using System.Diagnostics.CodeAnalysis;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary><a href="https://www.unicode.org/reports/tr11/">Unicode east asian width</a>.</summary>
[SuppressMessage("ReSharper", "InconsistentNaming", Justification = "Unicode Data")]
internal enum UnicodeEastAsianWidthClass : byte
{
/// <summary>East Asian Ambiguous.</summary>
A,
/// <summary>East Asian Fullwidth.</summary>
F,
/// <summary>East Asian Halfwidth.</summary>
H,
/// <summary>Neutral.</summary>
N,
/// <summary>East Asian Narrow.</summary>
Na,
/// <summary>East Asian Wide.</summary>
W,
}

View file

@ -0,0 +1,28 @@
using System.Diagnostics.CodeAnalysis;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary><a href="https://www.unicode.org/reports/tr51/#Emoji_Characters">Unicode emoji property</a>.</summary>
[SuppressMessage("ReSharper", "InconsistentNaming", Justification = "Unicode Data")]
[Flags]
internal enum UnicodeEmojiProperty : byte
{
/// <summary>Characters that are emoji.</summary>
Emoji = 1 << 0,
/// <summary>Characters that have emoji presentation by default.</summary>
Emoji_Presentation = 1 << 1,
/// <summary>Characters that are emoji modifiers.</summary>
Emoji_Modifier = 1 << 2,
/// <summary>Characters that can serve as a base for emoji modifiers.</summary>
Emoji_Modifier_Base = 1 << 3,
/// <summary>Characters used in emoji sequences that normally do not appear on emoji keyboards as separate choices,
/// such as keycap base characters or Regional_Indicator characters.</summary>
Emoji_Component = 1 << 4,
/// <summary>Characters that are used to future-proof segmentation.</summary>
Extended_Pictographic = 1 << 5,
}

View file

@ -0,0 +1,99 @@
using System.Diagnostics.CodeAnalysis;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary><a href="https://www.unicode.org/reports/tr44/#General_Category_Values">Unicode general category.</a>.
/// </summary>
[SuppressMessage("ReSharper", "InconsistentNaming", Justification = "Unicode Data")]
internal enum UnicodeGeneralCategory : byte
{
/// <summary>Uppercase_Letter; an uppercase letter.</summary>
Lu,
/// <summary>Lowercase_Letter; a lowercase letter.</summary>
Ll,
/// <summary>Titlecase_Letter; a digraph encoded as a single character, with first part uppercase.</summary>
Lt,
/// <summary>Modifier_Letter; a modifier letter.</summary>
Lm,
/// <summary>Other_Letter; other letters, including syllables and ideographs.</summary>
Lo,
/// <summary>Nonspacing_Mark; a nonspacing combining mark (zero advance width).</summary>
Mn,
/// <summary>Spacing_Mark; a spacing combining mark (positive advance width).</summary>
Mc,
/// <summary>Enclosing_Mark; an enclosing combining mark.</summary>
Me,
/// <summary>Decimal_Number; a decimal digit.</summary>
Nd,
/// <summary>Letter_Number; a letterlike numeric character.</summary>
Nl,
/// <summary>Other_Number; a numeric character of other type.</summary>
No,
/// <summary>Connector_Punctuation; a connecting punctuation mark, like a tie.</summary>
Pc,
/// <summary>Dash_Punctuation; a dash or hyphen punctuation mark.</summary>
Pd,
/// <summary>Open_Punctuation; an opening punctuation mark (of a pair).</summary>
Ps,
/// <summary>Close_Punctuation; a closing punctuation mark (of a pair).</summary>
Pe,
/// <summary>Initial_Punctuation; an initial quotation mark.</summary>
Pi,
/// <summary>Final_Punctuation; a final quotation mark.</summary>
Pf,
/// <summary>Other_Punctuation; a punctuation mark of other type.</summary>
Po,
/// <summary>Math_Symbol; a symbol of mathematical use.</summary>
Sm,
/// <summary>Currency_Symbol; a currency sign.</summary>
Sc,
/// <summary>Modifier_Symbol; a non-letterlike modifier symbol.</summary>
Sk,
/// <summary>Other_Symbol; a symbol of other type.</summary>
So,
/// <summary>Space_Separator; a space character (of various non-zero widths).</summary>
Zs,
/// <summary>Line_Separator; U+2028 LINE SEPARATOR only.</summary>
Zl,
/// <summary>Paragraph_Separator; U+2029 PARAGRAPH SEPARATOR only.</summary>
Zp,
/// <summary>Control; a C0 or C1 control code.</summary>
Cc,
/// <summary>Format; a format control character.</summary>
Cf,
/// <summary>Surrogate; a surrogate code point.</summary>
Cs,
/// <summary>Private_Use; a private-use character.</summary>
Co,
/// <summary>Unassigned; a reserved unassigned code point or a noncharacter.</summary>
Cn,
}

View file

@ -0,0 +1,162 @@
using System.Diagnostics.CodeAnalysis;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary><a href="https://unicode.org/reports/tr14/#Table1">Unicode line break class</a>.</summary>
[SuppressMessage("ReSharper", "InconsistentNaming", Justification = "Unicode Data")]
[SuppressMessage(
"StyleCop.CSharp.DocumentationRules",
"SA1300:Element should begin with an uppercase letter",
Justification = "Unicode Data")]
internal enum UnicodeLineBreakClass : byte
{
/// <summary>Start of text.</summary>
sot,
/// <summary>End of text.</summary>
eot,
/// <summary>Mandatory Break; NL, PARAGRAPH SEPARATOR; Cause a line break (after).</summary>
BK,
/// <summary>Carriage Return; CR; Cause a line break (after), except between CR and LF.</summary>
CR,
/// <summary>Line Feed; LF; Cause a line break (after).</summary>
LF,
/// <summary>Combining Mark; Combining marks, control codes; Prohibit a line break between the character and the preceding character.</summary>
CM,
/// <summary>Next Line; NEL; Cause a line break (after).</summary>
NL,
/// <summary>Surrogate; Surrogates; Do not occur in well-formed text.</summary>
SG,
/// <summary>Word Joiner; WJ; Prohibit line breaks before and after.</summary>
WJ,
/// <summary>Zero Width Space; ZWSP; Provide a break opportunity.</summary>
ZW,
/// <summary>Non-breaking (“Glue”); CGJ, NBSP, ZWNBSP; Prohibit line breaks before and after.</summary>
GL,
/// <summary>Space; SPACE; Enable indirect line breaks.</summary>
SP,
/// <summary>Zero Width Joiner; Zero Width Joiner; Prohibit line breaks within joiner sequences.</summary>
ZWJ,
/// <summary>Break Opportunity Before and After; Em dash; Provide a line break opportunity before and after the character.</summary>
B2,
/// <summary>Break After; Spaces, hyphens; Generally provide a line break opportunity after the character.</summary>
BA,
/// <summary>Break Before; Punctuation used in dictionaries; Generally provide a line break opportunity before the character.</summary>
BB,
/// <summary>Hyphen; HYPHEN-MINUS; Provide a line break opportunity after the character, except in numeric context.</summary>
HY,
/// <summary>Contingent Break Opportunity; Inline objects; Provide a line break opportunity contingent on additional information.</summary>
CB,
/// <summary>Close Punctuation; “}”, “❳”, “⟫” etc.; Prohibit line breaks before.</summary>
CL,
/// <summary>Close Parenthesis; “)”, “]”; Prohibit line breaks before.</summary>
CP,
/// <summary>Exclamation/Interrogation; “!”, “?”, etc.; Prohibit line breaks before.</summary>
EX,
/// <summary>Inseparable; Leaders; Allow only indirect line breaks between pairs.</summary>
IN,
/// <summary>Nonstarter; “‼”, “‽”, “⁇”, “⁉”, etc.; Allow only indirect line breaks before.</summary>
NS,
/// <summary>Open Punctuation; “(“, “[“, “{“, etc.; Prohibit line breaks after.</summary>
OP,
/// <summary>Quotation; Quotation marks; Act like they are both opening and closing.</summary>
QU,
/// <summary>Infix Numeric Separator; . ,; Prevent breaks after any and before numeric.</summary>
IS,
/// <summary>Numeric; Digits; Form numeric expressions for line breaking purposes.</summary>
NU,
/// <summary>Postfix Numeric; %, ¢; Do not break following a numeric expression.</summary>
PO,
/// <summary>Prefix Numeric; $, £, ¥, etc.; Do not break in front of a numeric expression.</summary>
PR,
/// <summary>Symbols Allowing Break After; /; Prevent a break before, and allow a break after.</summary>
SY,
/// <summary>Ambiguous (Alphabetic or Ideographic); Characters with Ambiguous East Asian Width; Act like AL when the resolved EAW is N; otherwise, act as ID.</summary>
AI,
/// <summary>Aksara; Consonants; Form orthographic syllables in Brahmic scripts.</summary>
AK,
/// <summary>Alphabetic; Alphabets and regular symbols; Are alphabetic characters or symbols that are used with alphabetic characters.</summary>
AL,
/// <summary>Aksara Pre-Base; Pre-base repha; Form orthographic syllables in Brahmic scripts.</summary>
AP,
/// <summary>Aksara Start; Independent vowels; Form orthographic syllables in Brahmic scripts.</summary>
AS,
/// <summary>Conditional Japanese Starter; Small kana; Treat as NS or ID for strict or normal breaking.</summary>
CJ,
/// <summary>Emoji Base; All emoji allowing modifiers; Do not break from following Emoji Modifier.</summary>
EB,
/// <summary>Emoji Modifier; Skin tone modifiers; Do not break from preceding Emoji Base.</summary>
EM,
/// <summary>Hangul LV Syllable; Hangul; Form Korean syllable blocks.</summary>
H2,
/// <summary>Hangul LVT Syllable; Hangul; Form Korean syllable blocks.</summary>
H3,
/// <summary>Hebrew Letter; Hebrew; Do not break around a following hyphen; otherwise act as Alphabetic.</summary>
HL,
/// <summary>Ideographic; Ideographs; Break before or after, except in some numeric context.</summary>
ID,
/// <summary>Hangul L Jamo; Conjoining jamo; Form Korean syllable blocks.</summary>
JL,
/// <summary>Hangul V Jamo; Conjoining jamo; Form Korean syllable blocks.</summary>
JV,
/// <summary>Hangul T Jamo; Conjoining jamo; Form Korean syllable blocks.</summary>
JT,
/// <summary>Regional Indicator; REGIONAL INDICATOR SYMBOL LETTER A .. Z; Keep pairs together. For pairs, break before and after other classes.</summary>
RI,
/// <summary>Complex Context Dependent (South East Asian); South East Asian: Thai, Lao, Khmer; Provide a line break opportunity contingent on additional, language-specific context analysis.</summary>
SA,
/// <summary>Virama Final; Viramas for final consonants; Form orthographic syllables in Brahmic scripts.</summary>
VF,
/// <summary>Virama; Conjoining viramas; Form orthographic syllables in Brahmic scripts.</summary>
VI,
/// <summary>Unknown; Most unassigned, private-use; Have as yet unknown line breaking behavior or unassigned code positions.</summary>
XX,
}

View file

@ -0,0 +1,325 @@
using System.Collections;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using Lumina.Text;
using Lumina.Text.Payloads;
using Lumina.Text.ReadOnly;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary>Enumerates a UTF-N byte sequence by codepoint.</summary>
[DebuggerDisplay("{Current}/{data.Length} ({flags}, BE={isBigEndian})")]
internal ref struct UtfEnumerator
{
private readonly ReadOnlySpan<byte> data;
private readonly UtfEnumeratorFlags flags;
private readonly byte numBytesPerUnit;
private bool isBigEndian;
/// <summary>Initializes a new instance of the <see cref="UtfEnumerator"/> struct.</summary>
/// <param name="data">UTF-N byte sequence.</param>
/// <param name="flags">Enumeration flags.</param>
public UtfEnumerator(ReadOnlySpan<byte> data, UtfEnumeratorFlags flags)
{
this.data = data;
this.flags = flags;
this.numBytesPerUnit = (this.flags & UtfEnumeratorFlags.UtfMask) switch
{
UtfEnumeratorFlags.Utf8 or UtfEnumeratorFlags.Utf8SeString => 1,
UtfEnumeratorFlags.Utf16 => 2,
UtfEnumeratorFlags.Utf32 => 4,
_ => throw new ArgumentOutOfRangeException(nameof(this.flags), this.flags, "Multiple UTF flag specified."),
};
this.isBigEndian = (flags & UtfEnumeratorFlags.EndiannessMask) switch
{
UtfEnumeratorFlags.NativeEndian => !BitConverter.IsLittleEndian,
UtfEnumeratorFlags.LittleEndian => false,
UtfEnumeratorFlags.BigEndian => true,
_ => throw new ArgumentOutOfRangeException(nameof(flags), flags, "Multiple endianness flag specified."),
};
}
/// <inheritdoc cref="IEnumerator.Current"/>
public Subsequence Current { get; private set; } = default;
/// <summary>Creates a new instance of the <see cref="UtfEnumerator"/> struct.</summary>
/// <param name="data">UTF-N byte sequence.</param>
/// <param name="flags">Enumeration flags.</param>
/// <returns>A new enumerator.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static UtfEnumerator From(ReadOnlySpan<byte> data, UtfEnumeratorFlags flags) => new(data, flags);
/// <summary>Creates a new instance of the <see cref="UtfEnumerator"/> struct.</summary>
/// <param name="data">UTF-N byte sequence.</param>
/// <param name="flags">Enumeration flags.</param>
/// <returns>A new enumerator.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static UtfEnumerator From(ReadOnlySpan<char> data, UtfEnumeratorFlags flags) =>
new(MemoryMarshal.Cast<char, byte>(data), flags);
/// <summary>Gets the representative <c>char</c> for a given SeString macro code.</summary>
/// <param name="macroCode">The macro code.</param>
/// <returns>Representative <c>char</c>, or <see cref="char.MaxValue"/> if none.</returns>
public static char RepresentativeCharFor(MacroCode macroCode) => macroCode switch
{
MacroCode.NewLine => '\u0085',
MacroCode.SoftHyphen => '\u00AD',
MacroCode.NonBreakingSpace => '\u00A0',
MacroCode.Hyphen => '-',
MacroCode.Icon or MacroCode.Icon2 => '\uFFFC',
_ => char.MaxValue,
};
/// <summary>Attempts to peek the next item.</summary>
/// <param name="nextSubsequence">Retrieved next item.</param>
/// <param name="isStillBigEndian">Whether it still should be parsed in big endian.</param>
/// <returns><c>true</c> if anything is retrieved.</returns>
/// <exception cref="EncoderFallbackException">The sequence is not a fully valid Unicode sequence, and
/// <see cref="UtfEnumeratorFlags.ThrowOnFirstError"/> is set.</exception>
public readonly bool TryPeekNext(out Subsequence nextSubsequence, out bool isStillBigEndian)
{
var offset = this.Current.ByteOffset + this.Current.ByteLength;
isStillBigEndian = this.isBigEndian;
while (true)
{
var subspan = this.data[offset..];
if (subspan.IsEmpty)
{
nextSubsequence = default;
return false;
}
UtfValue value;
int length;
var isBroken =
this.numBytesPerUnit switch
{
1 => !UtfValue.TryDecode8(subspan, out value, out length),
2 => !UtfValue.TryDecode16(subspan, isStillBigEndian, out value, out length),
4 => !UtfValue.TryDecode32(subspan, isStillBigEndian, out value, out length),
_ => throw new InvalidOperationException(),
};
if (!isBroken && value.IntValue == 0xFFFE)
{
if ((this.flags & UtfEnumeratorFlags.DisrespectByteOrderMask) == 0)
{
isStillBigEndian = !isStillBigEndian;
value = 0xFEFF;
}
if ((this.flags & UtfEnumeratorFlags.YieldByteOrderMask) == 0)
{
offset += length;
continue;
}
}
if (isBroken || !Rune.IsValid(value))
{
switch (this.flags & UtfEnumeratorFlags.ErrorHandlingMask)
{
case UtfEnumeratorFlags.ReplaceErrors:
break;
case UtfEnumeratorFlags.IgnoreErrors:
offset = Math.Min(offset + this.numBytesPerUnit, this.data.Length);
continue;
case UtfEnumeratorFlags.ThrowOnFirstError:
if (isBroken)
throw new EncoderFallbackException($"0x{subspan[0]:X02} is not a valid sequence.");
throw new EncoderFallbackException(
$"U+{value.UIntValue:X08} is not a valid unicode codepoint.");
case UtfEnumeratorFlags.TerminateOnFirstError:
default:
nextSubsequence = default;
return false;
}
}
if (isBroken)
value = subspan[0];
if (value == SeString.StartByte && (this.flags & UtfEnumeratorFlags.Utf8SeString) != 0)
{
var e = new ReadOnlySeStringSpan(subspan).GetEnumerator();
e.MoveNext();
switch (this.flags & UtfEnumeratorFlags.ErrorHandlingMask)
{
case var _ when e.Current.Type is ReadOnlySePayloadType.Macro:
nextSubsequence = Subsequence.FromPayload(
e.Current.MacroCode,
offset,
e.Current.EnvelopeByteLength);
return true;
case UtfEnumeratorFlags.ReplaceErrors:
value = '\uFFFE';
length = e.Current.EnvelopeByteLength;
isBroken = true;
break;
case UtfEnumeratorFlags.IgnoreErrors:
offset = Math.Min(offset + e.Current.EnvelopeByteLength, this.data.Length);
continue;
case UtfEnumeratorFlags.ThrowOnFirstError:
throw new EncoderFallbackException("Invalid SeString payload.");
case UtfEnumeratorFlags.TerminateOnFirstError:
default:
nextSubsequence = default;
return false;
}
}
nextSubsequence = Subsequence.FromUnicode(value, offset, length, isBroken);
return true;
}
}
/// <inheritdoc cref="IEnumerator.MoveNext"/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool MoveNext()
{
if (!this.TryPeekNext(out var next, out var isStillBigEndian))
return false;
this.Current = next;
this.isBigEndian = isStillBigEndian;
return true;
}
/// <inheritdoc cref="IEnumerable.GetEnumerator"/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public UtfEnumerator GetEnumerator() => new(this.data, this.flags);
/// <summary>A part of a UTF-N sequence containing one codepoint.</summary>
[StructLayout(LayoutKind.Explicit, Size = 16)]
[DebuggerDisplay("[{ByteOffset}, {ByteLength}] {Value}")]
public readonly struct Subsequence : IEquatable<Subsequence>
{
/// <summary>The codepoint. Valid if <see cref="IsSeStringPayload"/> is <c>false</c>.</summary>
[FieldOffset(0)]
public readonly UtfValue Value;
/// <summary>The macro code. Valid if <see cref="IsSeStringPayload"/> is <c>true</c>.</summary>
[FieldOffset(0)]
public readonly MacroCode MacroCode;
/// <summary>The offset of this part of a UTF-8 sequence.</summary>
[FieldOffset(4)]
public readonly int ByteOffset;
/// <summary>The length of this part of a UTF-8 sequence.</summary>
/// <remarks>This may not match <see cref="UtfValue.Length8"/>, if <see cref="BrokenSequence"/> is <c>true</c>.
/// </remarks>
[FieldOffset(8)]
public readonly int ByteLength;
/// <summary>Whether this part of the UTF-8 sequence is broken.</summary>
[FieldOffset(12)]
public readonly bool BrokenSequence;
/// <summary>Whether this part of the SeString sequence is a payload.</summary>
[FieldOffset(13)]
public readonly bool IsSeStringPayload;
/// <summary>Storage at byte offset 0, for fast <see cref="Equals(Subsequence)"/> implementation.</summary>
[FieldOffset(0)]
private readonly ulong storage0;
/// <summary>Storage at byte offset 8, for fast <see cref="Equals(Subsequence)"/> implementation.</summary>
[FieldOffset(8)]
private readonly ulong storage1;
/// <summary>Initializes a new instance of the <see cref="Subsequence"/> struct.</summary>
/// <param name="value">The value.</param>
/// <param name="byteOffset">The byte offset of this part of a UTF-N sequence.</param>
/// <param name="byteLength">The byte length of this part of a UTF-N sequence.</param>
/// <param name="brokenSequence">Whether this part of the UTF-N sequence is broken.</param>
/// <param name="isSeStringPayload">Whether this part of the SeString sequence is a payload.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private Subsequence(uint value, int byteOffset, int byteLength, bool brokenSequence, bool isSeStringPayload)
{
this.Value = new(value);
this.ByteOffset = byteOffset;
this.ByteLength = byteLength;
this.BrokenSequence = brokenSequence;
this.IsSeStringPayload = isSeStringPayload;
}
/// <summary>Gets the effective <c>char</c> value, with invalid or non-representable codepoints replaced.
/// </summary>
/// <value><see cref="char.MaxValue"/> if the character should not be displayed at all.</value>
public char EffectiveChar
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => this.EffectiveInt is var i and >= 0 and < char.MaxValue ? (char)i : char.MaxValue;
}
/// <summary>Gets the effective <c>int</c> value, with invalid codepoints replaced.</summary>
/// <value><see cref="char.MaxValue"/> if the character should not be displayed at all.</value>
public int EffectiveInt =>
this.IsSeStringPayload
? RepresentativeCharFor(this.MacroCode)
: this.BrokenSequence || !this.Value.TryGetRune(out var rune)
? 0xFFFD
: rune.Value;
/// <summary>Gets the effective <see cref="Rune"/> value, with invalid codepoints replaced.</summary>
/// <value><see cref="char.MaxValue"/> if the character should not be displayed at all.</value>
public Rune EffectiveRune
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => new(this.EffectiveInt);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator ==(Subsequence left, Subsequence right) => left.Equals(right);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator !=(Subsequence left, Subsequence right) => !left.Equals(right);
/// <summary>Creates a new instance of the <see cref="Subsequence"/> struct from a Unicode value.</summary>
/// <param name="codepoint">The codepoint.</param>
/// <param name="byteOffset">The byte offset of this part of a UTF-N sequence.</param>
/// <param name="byteLength">The byte length of this part of a UTF-N sequence.</param>
/// <param name="brokenSequence">Whether this part of the UTF-N sequence is broken.</param>
/// <returns>A new instance.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Subsequence FromUnicode(uint codepoint, int byteOffset, int byteLength, bool brokenSequence) =>
new(codepoint, byteOffset, byteLength, brokenSequence, false);
/// <summary>Creates a new instance of the <see cref="Subsequence"/> struct from a SeString payload.</summary>
/// <param name="macroCode">The macro code.</param>
/// <param name="byteOffset">The byte offset of this part of a UTF-N sequence.</param>
/// <param name="byteLength">The byte length of this part of a UTF-N sequence.</param>
/// <returns>A new instance.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Subsequence FromPayload(MacroCode macroCode, int byteOffset, int byteLength) =>
new((uint)macroCode, byteOffset, byteLength, false, true);
/// <summary>Tests whether this subsequence contains a valid Unicode codepoint.</summary>
/// <returns><c>true</c> if this subsequence contains a valid Unicode codepoint.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool IsValid() => !this.BrokenSequence && Rune.IsValid(this.Value);
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool Equals(Subsequence other) => this.storage0 == other.storage0 && this.storage1 == other.storage1;
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override bool Equals(object? obj) => obj is Subsequence other && this.Equals(other);
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override int GetHashCode() => HashCode.Combine(this.storage0, this.storage1);
}
}

View file

@ -0,0 +1,58 @@
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary>Flags on enumerating a unicode sequence.</summary>
[Flags]
internal enum UtfEnumeratorFlags
{
/// <summary>Use the default configuration of <see cref="Utf8"/> and <see cref="ReplaceErrors"/>.</summary>
Default = default,
/// <summary>Enumerate as UTF-8 (the default.)</summary>
Utf8 = Default,
/// <summary>Enumerate as UTF-8 in a SeString.</summary>
Utf8SeString = 1 << 1,
/// <summary>Enumerate as UTF-16.</summary>
Utf16 = 1 << 2,
/// <summary>Enumerate as UTF-32.</summary>
Utf32 = 1 << 3,
/// <summary>Bitmask for specifying the encoding.</summary>
UtfMask = Utf8 | Utf8SeString | Utf16 | Utf32,
/// <summary>On error, replace to U+FFFD (REPLACEMENT CHARACTER, the default.)</summary>
ReplaceErrors = Default,
/// <summary>On error, drop the invalid byte.</summary>
IgnoreErrors = 1 << 4,
/// <summary>On error, stop the handling.</summary>
TerminateOnFirstError = 1 << 5,
/// <summary>On error, throw an exception.</summary>
ThrowOnFirstError = 1 << 6,
/// <summary>Bitmask for specifying the error handling mode.</summary>
ErrorHandlingMask = ReplaceErrors | IgnoreErrors | TerminateOnFirstError | ThrowOnFirstError,
/// <summary>Use the current system native endianness from <see cref="BitConverter.IsLittleEndian"/>
/// (the default.)</summary>
NativeEndian = Default,
/// <summary>Use little endianness.</summary>
LittleEndian = 1 << 7,
/// <summary>Use big endianness.</summary>
BigEndian = 1 << 8,
/// <summary>Bitmask for specifying endianness.</summary>
EndiannessMask = NativeEndian | LittleEndian | BigEndian,
/// <summary>Disrespect byte order mask.</summary>
DisrespectByteOrderMask = 1 << 9,
/// <summary>Yield byte order masks, if it shows up.</summary>
YieldByteOrderMask = 1 << 10,
}

View file

@ -0,0 +1,665 @@
using System.Buffers.Binary;
using System.Diagnostics;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
namespace Dalamud.Interface.ImGuiSeStringRenderer.Internal.TextProcessing;
/// <summary>Represents a single value to be used in a UTF-N byte sequence.</summary>
[StructLayout(LayoutKind.Explicit, Size = 4)]
[DebuggerDisplay("0x{IntValue,h} ({CharValue})")]
internal readonly struct UtfValue : IEquatable<UtfValue>, IComparable<UtfValue>
{
/// <summary>The unicode codepoint in <c>int</c>, that may not be in a valid range.</summary>
[FieldOffset(0)]
public readonly int IntValue;
/// <summary>The unicode codepoint in <c>uint</c>, that may not be in a valid range.</summary>
[FieldOffset(0)]
public readonly uint UIntValue;
/// <summary>The high UInt16 value in <c>char</c>, that may have been cut off if outside BMP.</summary>
[FieldOffset(0)]
public readonly char CharValue;
/// <summary>Initializes a new instance of the <see cref="UtfValue"/> struct.</summary>
/// <param name="value">The raw codepoint value.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public UtfValue(uint value) => this.UIntValue = value;
/// <inheritdoc cref="UtfValue(uint)"/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public UtfValue(int value) => this.IntValue = value;
/// <summary>Gets the length of this codepoint, encoded in UTF-8.</summary>
public int Length8
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => GetEncodedLength8(this);
}
/// <summary>Gets the length of this codepoint, encoded in UTF-16.</summary>
public int Length16
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => GetEncodedLength16(this);
}
/// <summary>Gets the short name, if supported.</summary>
/// <returns>The buffer containing the short name, or empty if unsupported.</returns>
public ReadOnlySpan<char> ShortName
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => GetShortName(this);
}
public static implicit operator uint(UtfValue c) => c.UIntValue;
public static implicit operator int(UtfValue c) => c.IntValue;
public static implicit operator UtfValue(byte c) => new(c);
public static implicit operator UtfValue(sbyte c) => new(c);
public static implicit operator UtfValue(ushort c) => new(c);
public static implicit operator UtfValue(short c) => new(c);
public static implicit operator UtfValue(uint c) => new(c);
public static implicit operator UtfValue(int c) => new(c);
public static implicit operator UtfValue(char c) => new(c);
public static implicit operator UtfValue(Rune c) => new(c.Value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator ==(UtfValue left, UtfValue right) => left.Equals(right);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator !=(UtfValue left, UtfValue right) => !left.Equals(right);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator <(UtfValue left, UtfValue right) => left.CompareTo(right) < 0;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator >(UtfValue left, UtfValue right) => left.CompareTo(right) > 0;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator <=(UtfValue left, UtfValue right) => left.CompareTo(right) <= 0;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator >=(UtfValue left, UtfValue right) => left.CompareTo(right) >= 0;
/// <summary>Gets the short name of the codepoint, for some select codepoints.</summary>
/// <param name="codepoint">The codepoint.</param>
/// <returns>The value.</returns>
public static ReadOnlySpan<char> GetShortName(int codepoint) =>
codepoint switch
{
0x00 => "NUL",
0x01 => "SOH",
0x02 => "STX",
0x03 => "ETX",
0x04 => "EOT",
0x05 => "ENQ",
0x06 => "ACK",
0x07 => "BEL",
0x08 => "BS",
0x09 => "HT",
0x0a => "LF",
0x0b => "VT",
0x0c => "FF",
0x0d => "CR",
0x0e => "SO",
0x0f => "SI",
0x10 => "DLE",
0x11 => "DC1",
0x12 => "DC2",
0x13 => "DC3",
0x14 => "DC4",
0x15 => "NAK",
0x16 => "SYN",
0x17 => "SOH",
0x18 => "CAN",
0x19 => "EOM",
0x1a => "SUB",
0x1b => "ESC",
0x1c => "FS",
0x1d => "GS",
0x1e => "RS",
0x1f => "US",
0x80 => "PAD",
0x81 => "HOP",
0x82 => "BPH",
0x83 => "NBH",
0x84 => "IND",
0x85 => "NEL",
0x86 => "SSA",
0x87 => "ESA",
0x88 => "HTS",
0x89 => "HTJ",
0x8a => "VTS",
0x8b => "PLD",
0x8c => "PLU",
0x8d => "RI",
0x8e => "SS2",
0x8f => "SS3",
0x90 => "DCS",
0x91 => "PU1",
0x92 => "PU2",
0x93 => "STS",
0x94 => "CCH",
0x95 => "MW",
0x96 => "SPA",
0x97 => "EPA",
0x98 => "SOS",
0x99 => "SGC",
0x9a => "SCI",
0x9b => "CSI",
0x9c => "ST",
0x9d => "OSC",
0x9e => "PM",
0x9f => "APC",
0xa0 => "NBSP",
0xad => "SHY",
_ => default,
};
/// <summary>Gets the length of the codepoint, when encoded in UTF-8.</summary>
/// <param name="codepoint">The codepoint to encode.</param>
/// <returns>The length.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int GetEncodedLength8(int codepoint) => (uint)codepoint switch
{
< 1u << 7 => 1,
< 1u << 11 => 2,
< 1u << 16 => 3,
< 1u << 21 => 4,
// Not a valid Unicode codepoint anymore below.
< 1u << 26 => 5,
< 1u << 31 => 6,
_ => 7,
};
/// <summary>Gets the length of the codepoint, when encoded in UTF-16.</summary>
/// <param name="codepoint">The codepoint to encode.</param>
/// <returns>The length.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int GetEncodedLength16(int codepoint) => (uint)codepoint switch
{
< 0x10000 => 2,
< 0x10000 + (1 << 20) => 4,
// Not a valid Unicode codepoint anymore below.
< 0x10000 + (1 << 30) => 6,
_ => 8,
};
/// <inheritdoc cref="TryDecode8(ReadOnlySpan{byte}, out UtfValue, out int)"/>
/// <remarks>Trims <paramref name="source"/> at beginning by <paramref name="length"/>.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool TryDecode8(ref ReadOnlySpan<byte> source, out UtfValue value, out int length)
{
var v = TryDecode8(source, out value, out length);
source = source[length..];
return v;
}
/// <summary>Attempts to decode a value from a UTF-8 byte sequence.</summary>
/// <param name="source">The span to decode from.</param>
/// <param name="value">The decoded value.</param>
/// <param name="length">The length of the consumed bytes. <c>1</c> if sequence is broken.</param>
/// <returns><c>true</c> if <paramref name="source"/> is successfully decoded.</returns>
/// <remarks>Codepoints that results in <c>false</c> from <see cref="Rune.IsValid(int)"/> can still be returned,
/// including unpaired surrogate characters, or codepoints above U+10FFFFF. This function returns a value only
/// indicating whether the sequence could be decoded into a number, without being too short.</remarks>
public static unsafe bool TryDecode8(ReadOnlySpan<byte> source, out UtfValue value, out int length)
{
if (source.IsEmpty)
{
value = default;
length = 0;
return false;
}
fixed (byte* ptr = source)
{
if ((ptr[0] & 0x80) == 0)
{
length = 1;
value = ptr[0];
}
else if ((ptr[0] & 0b11100000) == 0b11000000 && source.Length >= 2
&& ((uint)ptr[1] & 0b11000000) == 0b10000000)
{
length = 2;
value = (((uint)ptr[0] & 0x1F) << 6) |
(((uint)ptr[1] & 0x3F) << 0);
}
else if (((uint)ptr[0] & 0b11110000) == 0b11100000 && source.Length >= 3
&& ((uint)ptr[1] & 0b11000000) == 0b10000000
&& ((uint)ptr[2] & 0b11000000) == 0b10000000)
{
length = 3;
value = (((uint)ptr[0] & 0x0F) << 12) |
(((uint)ptr[1] & 0x3F) << 6) |
(((uint)ptr[2] & 0x3F) << 0);
}
else if (((uint)ptr[0] & 0b11111000) == 0b11110000 && source.Length >= 4
&& ((uint)ptr[1] & 0b11000000) == 0b10000000
&& ((uint)ptr[2] & 0b11000000) == 0b10000000
&& ((uint)ptr[3] & 0b11000000) == 0b10000000)
{
length = 4;
value = (((uint)ptr[0] & 0x07) << 18) |
(((uint)ptr[1] & 0x3F) << 12) |
(((uint)ptr[2] & 0x3F) << 6) |
(((uint)ptr[3] & 0x3F) << 0);
}
else if (((uint)ptr[0] & 0b11111100) == 0b11111000 && source.Length >= 5
&& ((uint)ptr[1] & 0b11000000) == 0b10000000
&& ((uint)ptr[2] & 0b11000000) == 0b10000000
&& ((uint)ptr[3] & 0b11000000) == 0b10000000
&& ((uint)ptr[4] & 0b11000000) == 0b10000000)
{
length = 5;
value = (((uint)ptr[0] & 0x03) << 24) |
(((uint)ptr[1] & 0x3F) << 18) |
(((uint)ptr[2] & 0x3F) << 12) |
(((uint)ptr[3] & 0x3F) << 6) |
(((uint)ptr[4] & 0x3F) << 0);
}
else if (((uint)ptr[0] & 0b11111110) == 0b11111100 && source.Length >= 6
&& ((uint)ptr[1] & 0b11000000) == 0b10000000
&& ((uint)ptr[2] & 0b11000000) == 0b10000000
&& ((uint)ptr[3] & 0b11000000) == 0b10000000
&& ((uint)ptr[4] & 0b11000000) == 0b10000000
&& ((uint)ptr[5] & 0b11000000) == 0b10000000)
{
length = 6;
value = (((uint)ptr[0] & 0x01) << 30) |
(((uint)ptr[1] & 0x3F) << 24) |
(((uint)ptr[2] & 0x3F) << 18) |
(((uint)ptr[3] & 0x3F) << 12) |
(((uint)ptr[4] & 0x3F) << 6) |
(((uint)ptr[5] & 0x3F) << 0);
}
else if (((uint)ptr[0] & 0b11111111) == 0b11111110 && source.Length >= 7
&& ((uint)ptr[1] & 0b11111100) == 0b10000000
&& ((uint)ptr[2] & 0b11000000) == 0b10000000
&& ((uint)ptr[3] & 0b11000000) == 0b10000000
&& ((uint)ptr[4] & 0b11000000) == 0b10000000
&& ((uint)ptr[5] & 0b11000000) == 0b10000000
&& ((uint)ptr[6] & 0b11000000) == 0b10000000)
{
length = 7;
value = (((uint)ptr[1] & 0x03) << 30) |
(((uint)ptr[2] & 0x3F) << 24) |
(((uint)ptr[3] & 0x3F) << 18) |
(((uint)ptr[4] & 0x3F) << 12) |
(((uint)ptr[5] & 0x3F) << 6) |
(((uint)ptr[6] & 0x3F) << 0);
}
else
{
length = 1;
value = default;
return false;
}
return true;
}
}
/// <inheritdoc cref="TryDecode16(ReadOnlySpan{byte}, bool, out UtfValue, out int)"/>
/// <remarks>Trims <paramref name="source"/> at beginning by <paramref name="length"/>.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool TryDecode16(ref ReadOnlySpan<byte> source, bool be, out UtfValue value, out int length)
{
var v = TryDecode16(source, be, out value, out length);
source = source[length..];
return v;
}
/// <summary>Attempts to decode a value from a UTF-16 byte sequence.</summary>
/// <param name="source">The span to decode from.</param>
/// <param name="be">Whether to use big endian.</param>
/// <param name="value">The decoded value.</param>
/// <param name="length">The length of the consumed bytes. <c>1</c> if cut short.
/// <c>2</c> if sequence is broken.</param>
/// <returns><c>true</c> if <paramref name="source"/> is successfully decoded.</returns>
/// <remarks>Codepoints that results in <c>false</c> from <see cref="Rune.IsValid(int)"/> can still be returned,
/// including unpaired surrogate characters, or codepoints above U+10FFFFF. This function returns a value only
/// indicating whether the sequence could be decoded into a number, without being too short.</remarks>
public static unsafe bool TryDecode16(ReadOnlySpan<byte> source, bool be, out UtfValue value, out int length)
{
if (source.Length < 2)
{
value = default;
length = source.Length;
return false;
}
fixed (byte* ptr = source)
{
var p16 = (ushort*)ptr;
var val = be == BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness(*p16) : *p16;
if (char.IsHighSurrogate((char)val))
{
var lookahead1 = source.Length >= 4 ? p16[1] : 0;
var lookahead2 = source.Length >= 6 ? p16[2] : 0;
var lookahead3 = source.Length >= 8 ? p16[3] : 0;
if (char.IsLowSurrogate((char)lookahead1))
{
// Not a valid Unicode codepoint anymore inside the block below.
if (char.IsLowSurrogate((char)lookahead2))
{
if (char.IsLowSurrogate((char)lookahead3))
{
value = 0x10000
+ (((val & 0x3) << 30) |
((lookahead1 & 0x3FF) << 20) |
((lookahead2 & 0x3FF) << 10) |
((lookahead3 & 0x3FF) << 0));
length = 8;
return true;
}
value = 0x10000
+ (((val & 0x3FF) << 20) |
((lookahead1 & 0x3FF) << 10) |
((lookahead2 & 0x3FF) << 0));
length = 6;
return true;
}
value = 0x10000 +
(((val & 0x3FF) << 10) |
((lookahead1 & 0x3FF) << 0));
length = 4;
return true;
}
}
// Calls are supposed to handle unpaired surrogates.
value = val;
length = 2;
return true;
}
}
/// <inheritdoc cref="TryDecode32(ReadOnlySpan{byte}, bool, out UtfValue, out int)"/>
/// <remarks>Trims <paramref name="source"/> at beginning by <paramref name="length"/>.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool TryDecode32(ref ReadOnlySpan<byte> source, bool be, out UtfValue value, out int length)
{
var v = TryDecode32(source, be, out value, out length);
source = source[length..];
return v;
}
/// <summary>Attempts to decode a value from a UTF-32 byte sequence.</summary>
/// <param name="source">The span to decode from.</param>
/// <param name="be">Whether to use big endian.</param>
/// <param name="value">The decoded value.</param>
/// <param name="length">The length of the consumed bytes. <c>1 to 3</c> if cut short.
/// <c>4</c> if sequence is broken.</param>
/// <returns><c>true</c> if <paramref name="source"/> is successfully decoded.</returns>
/// <remarks>Codepoints that results in <c>false</c> from <see cref="Rune.IsValid(int)"/> can still be returned,
/// including unpaired surrogate characters, or codepoints above U+10FFFFF. This function returns a value only
/// indicating whether the sequence could be decoded into a number, without being too short.</remarks>
public static bool TryDecode32(ReadOnlySpan<byte> source, bool be, out UtfValue value, out int length)
{
if (source.Length < 4)
{
value = default;
length = source.Length;
return false;
}
length = 4;
if ((be && BinaryPrimitives.TryReadInt32BigEndian(source, out var i32))
|| (!be && BinaryPrimitives.TryReadInt32LittleEndian(source, out i32)))
{
value = i32;
return true;
}
value = default;
return false;
}
/// <summary>Encodes the codepoint to the target in UTF-8.</summary>
/// <param name="target">The target stream.</param>
/// <param name="codepoint">The codepoint to encode.</param>
/// <returns>The length of the encoded data.</returns>
/// <remarks>Trims <paramref name="target"/> at beginning by the length.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Encode8(Stream target, int codepoint)
{
Span<byte> buf = stackalloc byte[7];
Encode8(buf, codepoint, out var length);
target.Write(buf[..length]);
return length;
}
/// <summary>Encodes the codepoint to the target in UTF-8.</summary>
/// <param name="target">The target byte span.</param>
/// <param name="codepoint">The codepoint to encode.</param>
/// <returns>The length of the encoded data.</returns>
/// <remarks>Trims <paramref name="target"/> at beginning by the length.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Encode8(ref Span<byte> target, int codepoint)
{
target = Encode8(target, codepoint, out var length);
return length;
}
/// <summary>Encodes the codepoint to the target in UTF-8.</summary>
/// <param name="target">The optional target byte span.</param>
/// <param name="codepoint">The codepoint to encode.</param>
/// <param name="length">The length of the encoded data.</param>
/// <returns>The remaning region of <paramref name="target"/>.</returns>
public static Span<byte> Encode8(Span<byte> target, int codepoint, out int length)
{
var value = (uint)codepoint;
length = GetEncodedLength8(codepoint);
if (target.IsEmpty)
return target;
switch (length)
{
case 1:
target[0] = (byte)value;
return target[1..];
case 2:
target[0] = (byte)(0xC0 | ((value >> 6) & 0x1F));
target[1] = (byte)(0x80 | ((value >> 0) & 0x3F));
return target[2..];
case 3:
target[0] = (byte)(0xE0 | ((value >> 12) & 0x0F));
target[1] = (byte)(0x80 | ((value >> 6) & 0x3F));
target[2] = (byte)(0x80 | ((value >> 0) & 0x3F));
return target[3..];
case 4:
target[0] = (byte)(0xF0 | ((value >> 18) & 0x07));
target[1] = (byte)(0x80 | ((value >> 12) & 0x3F));
target[2] = (byte)(0x80 | ((value >> 6) & 0x3F));
target[3] = (byte)(0x80 | ((value >> 0) & 0x3F));
return target[4..];
case 5:
target[0] = (byte)(0xF8 | ((value >> 24) & 0x03));
target[1] = (byte)(0x80 | ((value >> 18) & 0x3F));
target[2] = (byte)(0x80 | ((value >> 12) & 0x3F));
target[3] = (byte)(0x80 | ((value >> 6) & 0x3F));
target[4] = (byte)(0x80 | ((value >> 0) & 0x3F));
return target[5..];
case 6:
target[0] = (byte)(0xFC | ((value >> 30) & 0x01));
target[1] = (byte)(0x80 | ((value >> 24) & 0x3F));
target[2] = (byte)(0x80 | ((value >> 18) & 0x3F));
target[3] = (byte)(0x80 | ((value >> 12) & 0x3F));
target[4] = (byte)(0x80 | ((value >> 6) & 0x3F));
target[5] = (byte)(0x80 | ((value >> 0) & 0x3F));
return target[6..];
case 7:
target[0] = 0xFE;
target[1] = (byte)(0x80 | ((value >> 30) & 0x03));
target[2] = (byte)(0x80 | ((value >> 24) & 0x3F));
target[3] = (byte)(0x80 | ((value >> 18) & 0x3F));
target[4] = (byte)(0x80 | ((value >> 12) & 0x3F));
target[5] = (byte)(0x80 | ((value >> 6) & 0x3F));
target[6] = (byte)(0x80 | ((value >> 0) & 0x3F));
return target[7..];
default:
Debug.Assert(false, $"{nameof(Length8)} property should have produced all possible cases.");
return target;
}
}
/// <summary>Encodes the codepoint to the target in UTF-16.</summary>
/// <param name="target">The target stream.</param>
/// <param name="codepoint">The codepoint to encode.</param>
/// <param name="be">Whether to use big endian.</param>
/// <returns>The length of the encoded data.</returns>
/// <remarks>Trims <paramref name="target"/> at beginning by the length.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Encode16(Stream target, int codepoint, bool be)
{
Span<byte> buf = stackalloc byte[8];
Encode16(buf, codepoint, be, out var length);
target.Write(buf[..length]);
return length;
}
/// <summary>Encodes the codepoint to the target in UTF-16.</summary>
/// <param name="target">The target byte span.</param>
/// <param name="codepoint">The codepoint to encode.</param>
/// <param name="be">Whether to use big endian.</param>
/// <returns>The length of the encoded data.</returns>
/// <remarks>Trims <paramref name="target"/> at beginning by the length.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Encode16(ref Span<byte> target, int codepoint, bool be)
{
target = Encode16(target, codepoint, be, out var length);
return length;
}
/// <summary>Encodes the codepoint to the target in UTF-16.</summary>
/// <param name="target">The optional target byte span.</param>
/// <param name="codepoint">The codepoint to encode.</param>
/// <param name="be">Whether to use big endian.</param>
/// <param name="length">The length of the encoded data.</param>
/// <returns>The remaning region of <paramref name="target"/>.</returns>
public static Span<byte> Encode16(Span<byte> target, int codepoint, bool be, out int length)
{
var value = (uint)codepoint;
length = GetEncodedLength16(codepoint);
if (target.IsEmpty)
return target;
if (be)
{
switch (length)
{
case 2:
BinaryPrimitives.WriteUInt16BigEndian(target[0..], (ushort)value);
return target[2..];
case 4:
value -= 0x10000;
BinaryPrimitives.WriteUInt16BigEndian(target[0..], (ushort)(0xD800 | ((value >> 10) & 0x3FF)));
BinaryPrimitives.WriteUInt16BigEndian(target[2..], (ushort)(0xDC00 | ((value >> 00) & 0x3FF)));
return target[4..];
case 6:
value -= 0x10000;
BinaryPrimitives.WriteUInt16BigEndian(target[0..], (ushort)(0xD800 | ((value >> 20) & 0x3FF)));
BinaryPrimitives.WriteUInt16BigEndian(target[2..], (ushort)(0xDC00 | ((value >> 10) & 0x3FF)));
BinaryPrimitives.WriteUInt16BigEndian(target[4..], (ushort)(0xDC00 | ((value >> 00) & 0x3FF)));
return target[6..];
case 8:
value -= 0x10000;
BinaryPrimitives.WriteUInt16BigEndian(target[0..], (ushort)(0xD800 | ((value >> 30) & 0x3)));
BinaryPrimitives.WriteUInt16BigEndian(target[2..], (ushort)(0xDC00 | ((value >> 20) & 0x3FF)));
BinaryPrimitives.WriteUInt16BigEndian(target[4..], (ushort)(0xDC00 | ((value >> 10) & 0x3FF)));
BinaryPrimitives.WriteUInt16BigEndian(target[6..], (ushort)(0xDC00 | ((value >> 00) & 0x3FF)));
return target[8..];
default:
Debug.Assert(false, $"{nameof(Length16)} property should have produced all possible cases.");
return target;
}
}
switch (length)
{
case 2:
BinaryPrimitives.WriteUInt16LittleEndian(target[0..], (ushort)value);
return target[2..];
case 4:
value -= 0x10000;
BinaryPrimitives.WriteUInt16LittleEndian(target[0..], (ushort)(0xD800 | ((value >> 10) & 0x3FF)));
BinaryPrimitives.WriteUInt16LittleEndian(target[2..], (ushort)(0xDC00 | ((value >> 00) & 0x3FF)));
return target[4..];
case 6:
value -= 0x10000;
BinaryPrimitives.WriteUInt16LittleEndian(target[0..], (ushort)(0xD800 | ((value >> 20) & 0x3FF)));
BinaryPrimitives.WriteUInt16LittleEndian(target[2..], (ushort)(0xDC00 | ((value >> 10) & 0x3FF)));
BinaryPrimitives.WriteUInt16LittleEndian(target[4..], (ushort)(0xDC00 | ((value >> 00) & 0x3FF)));
return target[6..];
case 8:
value -= 0x10000;
BinaryPrimitives.WriteUInt16LittleEndian(target[0..], (ushort)(0xD800 | ((value >> 30) & 0x3)));
BinaryPrimitives.WriteUInt16LittleEndian(target[2..], (ushort)(0xDC00 | ((value >> 20) & 0x3FF)));
BinaryPrimitives.WriteUInt16LittleEndian(target[4..], (ushort)(0xDC00 | ((value >> 10) & 0x3FF)));
BinaryPrimitives.WriteUInt16LittleEndian(target[6..], (ushort)(0xDC00 | ((value >> 00) & 0x3FF)));
return target[8..];
default:
Debug.Assert(false, $"{nameof(Length16)} property should have produced all possible cases.");
return target;
}
}
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int CompareTo(UtfValue other) => this.IntValue.CompareTo(other.IntValue);
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool Equals(UtfValue other) => this.IntValue == other.IntValue;
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override bool Equals(object? obj) => obj is UtfValue other && this.Equals(other);
/// <inheritdoc/>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override int GetHashCode() => this.IntValue;
/// <summary>Attempts to get the corresponding rune.</summary>
/// <param name="rune">The retrieved rune.</param>
/// <returns><c>true</c> if retrieved.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetRune(out Rune rune)
{
if (Rune.IsValid(this.IntValue))
{
rune = new(this.IntValue);
return true;
}
rune = default;
return false;
}
/// <summary>Encodes the codepoint to the target.</summary>
/// <param name="target">The target byte span.</param>
/// <returns>The remaning region of <paramref name="target"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Span<byte> Encode8(Span<byte> target) => Encode8(target, this, out _);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,168 @@
using System.Diagnostics.CodeAnalysis;
using System.Numerics;
using System.Runtime.InteropServices;
using ImGuiNET;
using Lumina.Text.Payloads;
namespace Dalamud.Interface.ImGuiSeStringRenderer;
/// <summary>Render styles for a SeString.</summary>
public record struct SeStringDrawParams
{
/// <summary>Gets or sets the target draw list.</summary>
/// <value>Target draw list, <c>default(ImDrawListPtr)</c> to not draw, or <c>null</c> to use
/// <see cref="ImGui.GetWindowDrawList"/> (the default).</value>
/// <remarks>If this value is set, <see cref="ImGui.Dummy"/> will not be called, and ImGui ID will be ignored.
/// </remarks>
public ImDrawListPtr? TargetDrawList { get; set; }
/// <summary>Gets or sets the font to use.</summary>
/// <value>Font to use, or <c>null</c> to use <see cref="ImGui.GetFont"/> (the default).</value>
public ImFontPtr? Font { get; set; }
/// <summary>Gets or sets the screen offset of the left top corner.</summary>
/// <value>Screen offset to draw at, or <c>null</c> to use <see cref="ImGui.GetCursorScreenPos"/>.</value>
public Vector2? ScreenOffset { get; set; }
/// <summary>Gets or sets the font size.</summary>
/// <value>Font size in pixels, or <c>0</c> to use the current ImGui font size <see cref="ImGui.GetFontSize"/>.
/// </value>
public float? FontSize { get; set; }
/// <summary>Gets or sets the line height ratio.</summary>
/// <value><c>1</c> or <c>null</c> (the default) will use <see cref="FontSize"/> as the line height.
/// <c>2</c> will make line height twice the <see cref="FontSize"/>.</value>
public float? LineHeight { get; set; }
/// <summary>Gets or sets the wrapping width.</summary>
/// <value>Width in pixels, or <c>null</c> to wrap at the end of available content region from
/// <see cref="ImGui.GetContentRegionAvail"/> (the default).</value>
public float? WrapWidth { get; set; }
/// <summary>Gets or sets the thickness of underline under links.</summary>
public float? LinkUnderlineThickness { get; set; }
/// <summary>Gets or sets the opacity, commonly called &quot;alpha&quot;.</summary>
/// <value>Opacity value ranging from 0(invisible) to 1(fully visible), or <c>null</c> to use the current ImGui
/// opacity from <see cref="ImGuiStyle.Alpha"/> accessed using <see cref="ImGui.GetStyle"/>.</value>
public float? Opacity { get; set; }
/// <summary>Gets or sets the strength of the edge, which will have effects on the edge opacity.</summary>
/// <value>Strength value ranging from 0(invisible) to 1(fully visible), or <c>null</c> to use the default value
/// of <c>0.25f</c> that might be subject to change in the future.</value>
public float? EdgeStrength { get; set; }
/// <summary>Gets or sets the color of the rendered text.</summary>
/// <value>Color in RGBA, or <c>null</c> to use <see cref="ImGuiCol.Text"/> (the default).</value>
public uint? Color { get; set; }
/// <summary>Gets or sets the color of the rendered text edge.</summary>
/// <value>Color in RGBA, or <c>null</c> to use opaque black (the default).</value>
public uint? EdgeColor { get; set; }
/// <summary>Gets or sets the color of the rendered text shadow.</summary>
/// <value>Color in RGBA, or <c>null</c> to use opaque black (the default).</value>
public uint? ShadowColor { get; set; }
/// <summary>Gets or sets the background color of a link when hovered.</summary>
/// <value>Color in RGBA, or <c>null</c> to use <see cref="ImGuiCol.ButtonHovered"/> (the default).</value>
public uint? LinkHoverBackColor { get; set; }
/// <summary>Gets or sets the background color of a link when active.</summary>
/// <value>Color in RGBA, or <c>null</c> to use <see cref="ImGuiCol.ButtonActive"/> (the default).</value>
public uint? LinkActiveBackColor { get; set; }
/// <summary>Gets or sets a value indicating whether to force the color of the rendered text edge.</summary>
/// <remarks>If set, then <see cref="MacroCode.EdgeColor"/> and <see cref="MacroCode.EdgeColorType"/> will be
/// ignored.</remarks>
public bool ForceEdgeColor { get; set; }
/// <summary>Gets or sets a value indicating whether the text is rendered bold.</summary>
public bool Bold { get; set; }
/// <summary>Gets or sets a value indicating whether the text is rendered italic.</summary>
public bool Italic { get; set; }
/// <summary>Gets or sets a value indicating whether the text is rendered with edge.</summary>
public bool Edge { get; set; }
/// <summary>Gets or sets a value indicating whether the text is rendered with shadow.</summary>
public bool Shadow { get; set; }
private readonly unsafe ImFont* EffectiveFont =>
(this.Font ?? ImGui.GetFont()) is var f && f.NativePtr is not null
? f.NativePtr
: throw new ArgumentException("Specified font is empty.");
private readonly float EffectiveLineHeight => (this.FontSize ?? ImGui.GetFontSize()) * (this.LineHeight ?? 1f);
private readonly float EffectiveOpacity => this.Opacity ?? ImGui.GetStyle().Alpha;
/// <summary>Calculated values from <see cref="SeStringDrawParams"/> using ImGui styles.</summary>
[SuppressMessage(
"StyleCop.CSharp.OrderingRules",
"SA1214:Readonly fields should appear before non-readonly fields",
Justification = "Matching the above order.")]
[StructLayout(LayoutKind.Sequential)]
internal unsafe struct Resolved(in SeStringDrawParams ssdp)
{
/// <inheritdoc cref="SeStringDrawParams.TargetDrawList"/>
public readonly ImDrawList* DrawList = ssdp.TargetDrawList ?? ImGui.GetWindowDrawList();
/// <inheritdoc cref="SeStringDrawParams.Font"/>
public readonly ImFont* Font = ssdp.EffectiveFont;
/// <inheritdoc cref="SeStringDrawParams.ScreenOffset"/>
public readonly Vector2 ScreenOffset = ssdp.ScreenOffset ?? ImGui.GetCursorScreenPos();
/// <inheritdoc cref="SeStringDrawParams.FontSize"/>
public readonly float FontSize = ssdp.FontSize ?? ImGui.GetFontSize();
/// <inheritdoc cref="SeStringDrawParams.LineHeight"/>
public readonly float LineHeight = MathF.Round(ssdp.EffectiveLineHeight);
/// <inheritdoc cref="SeStringDrawParams.WrapWidth"/>
public readonly float WrapWidth = ssdp.WrapWidth ?? ImGui.GetContentRegionAvail().X;
/// <inheritdoc cref="SeStringDrawParams.LinkUnderlineThickness"/>
public readonly float LinkUnderlineThickness = ssdp.LinkUnderlineThickness ?? 0f;
/// <inheritdoc cref="SeStringDrawParams.Opacity"/>
public readonly float Opacity = ssdp.EffectiveOpacity;
/// <inheritdoc cref="SeStringDrawParams.EdgeStrength"/>
public readonly float EdgeOpacity = (ssdp.EdgeStrength ?? 0.25f) * ssdp.EffectiveOpacity;
/// <inheritdoc cref="SeStringDrawParams.Color"/>
public uint Color = ssdp.Color ?? ImGui.GetColorU32(ImGuiCol.Text);
/// <inheritdoc cref="SeStringDrawParams.EdgeColor"/>
public uint EdgeColor = ssdp.EdgeColor ?? 0xFF000000;
/// <inheritdoc cref="SeStringDrawParams.ShadowColor"/>
public uint ShadowColor = ssdp.ShadowColor ?? 0xFF000000;
/// <inheritdoc cref="SeStringDrawParams.LinkHoverBackColor"/>
public readonly uint LinkHoverBackColor = ssdp.LinkHoverBackColor ?? ImGui.GetColorU32(ImGuiCol.ButtonHovered);
/// <inheritdoc cref="SeStringDrawParams.LinkActiveBackColor"/>
public readonly uint LinkActiveBackColor = ssdp.LinkActiveBackColor ?? ImGui.GetColorU32(ImGuiCol.ButtonActive);
/// <inheritdoc cref="SeStringDrawParams.ForceEdgeColor"/>
public readonly bool ForceEdgeColor = ssdp.ForceEdgeColor;
/// <inheritdoc cref="SeStringDrawParams.Bold"/>
public bool Bold = ssdp.Bold;
/// <inheritdoc cref="SeStringDrawParams.Italic"/>
public bool Italic = ssdp.Italic;
/// <inheritdoc cref="SeStringDrawParams.Edge"/>
public bool Edge = ssdp.Edge;
/// <inheritdoc cref="SeStringDrawParams.Shadow"/>
public bool Shadow = ssdp.Shadow;
}
}

View file

@ -0,0 +1,31 @@
using System.Linq;
using System.Numerics;
using Dalamud.Game.Text.SeStringHandling;
namespace Dalamud.Interface.ImGuiSeStringRenderer;
/// <summary>Represents the result of n rendered interactable SeString.</summary>
public ref struct SeStringDrawResult
{
private Payload? lazyPayload;
/// <summary>Gets the visible size of the text rendered/to be rendered.</summary>
public Vector2 Size { get; init; }
/// <summary>Gets a value indicating whether a payload or the whole text has been clicked.</summary>
public bool Clicked { get; init; }
/// <summary>Gets the offset of the interacted payload, or <c>-1</c> if none.</summary>
public int InteractedPayloadOffset { get; init; }
/// <summary>Gets the interacted payload envelope, or <see cref="ReadOnlySpan{T}.Empty"/> if none.</summary>
public ReadOnlySpan<byte> InteractedPayloadEnvelope { get; init; }
/// <summary>Gets the interacted payload, or <c>null</c> if none.</summary>
public Payload? InteractedPayload =>
this.lazyPayload ??=
this.InteractedPayloadEnvelope.IsEmpty
? default
: SeString.Parse(this.InteractedPayloadEnvelope).Payloads.FirstOrDefault();
}