SeStringEvaluator: Fix HeadAll not capitalizing correctly (#2240)

* Fix obsoletes

* Fix HeadAll not capitalizing correctly

* Fix incorrect denoun cases in SeString Creator

* Implement Utf8String.ToUpper in C#

* Handle characters with accents too

* Add remarks to ToUpper functions
This commit is contained in:
Haselnussbomber 2025-04-09 22:13:11 +02:00 committed by GitHub
parent f96e2ae37c
commit 499952b3d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 138 additions and 9 deletions

View file

@ -1,5 +1,8 @@
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Text;
using Dalamud.Game;
using FFXIVClientStructs.FFXIV.Client.UI;
@ -10,6 +13,9 @@ namespace Dalamud.Utility;
/// </summary>
public static class StringExtensions
{
private static readonly string[] CommonExcludedWords = ["sas", "zos", "van", "nan", "tol", "deus", "mal", "de", "rem", "out", "yae", "bas", "cen", "quo", "viator", "la"];
private static readonly string[] EnglishExcludedWords = ["of", "the", "to", "and", "a", "an", "or", "at", "by", "for", "in", "on", "with", "from", .. CommonExcludedWords];
/// <summary>
/// An extension method to chain usage of string.Format.
/// </summary>
@ -77,7 +83,7 @@ public static class StringExtensions
public static string StripSoftHyphen(this string input) => input.Replace("\u00AD", string.Empty);
/// <summary>
/// Truncates the given string to the specified maximum number of characters,
/// Truncates the given string to the specified maximum number of characters,
/// appending an ellipsis if truncation occurs.
/// </summary>
/// <param name="input">The string to truncate.</param>
@ -88,4 +94,128 @@ public static class StringExtensions
{
return string.IsNullOrEmpty(input) || input.Length <= maxChars ? input : input[..maxChars] + ellipses;
}
/// <summary>
/// Converts the input string to uppercase based on specified options like capitalizing the first character,
/// normalizing vowels, and excluding certain words based on the selected language.
/// </summary>
/// <param name="input">The input string to be converted to uppercase.</param>
/// <param name="firstCharOnly">Whether to capitalize only the first character of the string.</param>
/// <param name="everyWord">Whether to capitalize the first letter of each word.</param>
/// <param name="normalizeVowels">Whether to normalize vowels to uppercase if they appear at the beginning of a word.</param>
/// <param name="language">The language context used to determine which words to exclude from capitalization.</param>
/// <returns>A new string with the appropriate characters converted to uppercase.</returns>
/// <remarks>This is a C# implementation of Client::System::String::Utf8String.ToUpper with word exclusion lists as used by the HeadAll macro.</remarks>
public static string ToUpper(this string input, bool firstCharOnly, bool everyWord, bool normalizeVowels, ClientLanguage language)
{
return ToUpper(input, firstCharOnly, everyWord, normalizeVowels, language switch
{
ClientLanguage.Japanese => [],
ClientLanguage.English => EnglishExcludedWords,
ClientLanguage.German => CommonExcludedWords,
ClientLanguage.French => CommonExcludedWords,
_ => [],
});
}
/// <summary>
/// Converts the input string to uppercase based on specified options like capitalizing the first character,
/// normalizing vowels, and excluding certain words based on the selected language.
/// </summary>
/// <param name="input">The input string to be converted to uppercase.</param>
/// <param name="firstCharOnly">Whether to capitalize only the first character of the string.</param>
/// <param name="everyWord">Whether to capitalize the first letter of each word.</param>
/// <param name="normalizeVowels">Whether to normalize vowels to uppercase if they appear at the beginning of a word.</param>
/// <param name="excludedWords">A list of words to exclude from being capitalized. Words in this list will remain lowercase.</param>
/// <returns>A new string with the appropriate characters converted to uppercase.</returns>
/// <remarks>This is a C# implementation of Client::System::String::Utf8String.ToUpper.</remarks>
public static string ToUpper(this string input, bool firstCharOnly, bool everyWord, bool normalizeVowels, ReadOnlySpan<string> excludedWords)
{
if (string.IsNullOrEmpty(input))
return input;
var builder = new StringBuilder(input);
var isWordBeginning = true;
var length = firstCharOnly && !everyWord ? 1 : builder.Length;
for (var i = 0; i < length; i++)
{
var ch = builder[i];
if (ch == ' ')
{
isWordBeginning = true;
continue;
}
if (firstCharOnly && !isWordBeginning)
continue;
// Basic ASCII a-z
if (ch >= 'a' && ch <= 'z')
{
var substr = builder.ToString(i, builder.Length - i);
var isExcluded = false;
// Do not exclude words at the beginning
if (i > 0)
{
foreach (var excludedWord in excludedWords)
{
if (substr.StartsWith(excludedWord + " ", StringComparison.OrdinalIgnoreCase))
{
isExcluded = true;
break;
}
}
}
if (!isExcluded)
{
builder[i] = char.ToUpperInvariant(ch);
}
}
// Special œ → Œ
else if (ch == 'œ')
{
builder[i] = 'Œ';
}
// Characters with accents
else if (ch >= 'à' && ch <= 'ý' && ch != '÷')
{
builder[i] = char.ToUpperInvariant(ch);
}
// Normalize vowels with accents
else if (normalizeVowels && isWordBeginning)
{
if ("àáâãäå".Contains(ch))
{
builder[i] = 'A';
}
else if ("èéêë".Contains(ch))
{
builder[i] = 'E';
}
else if ("ìíîï".Contains(ch))
{
builder[i] = 'I';
}
else if ("òóôõö".Contains(ch))
{
builder[i] = 'O';
}
else if ("ùúûü".Contains(ch))
{
builder[i] = 'U';
}
}
isWordBeginning = false;
}
return builder.ToString();
}
}