refactor: improve sanitizer

This commit is contained in:
kalilistic 2021-04-04 13:09:54 -04:00
parent e5524539a3
commit 20dfe5cbc9
2 changed files with 81 additions and 98 deletions

View file

@ -1,67 +1,32 @@
// ReSharper disable StringLiteralTypo // ReSharper disable StringLiteralTypo
using System.Collections.Generic;
using System.Linq; using System.Linq;
using Xunit; using Xunit;
namespace Dalamud.Test.Plugin.Sanitizer { namespace Dalamud.Test.Plugin.Sanitizer {
public class SanitizerTests { public class SanitizerTests {
global::Dalamud.Plugin.Sanitizer.Sanitizer sanitizer; private global::Dalamud.Plugin.Sanitizer.Sanitizer sanitizer;
[Theory]
[InlineData( ClientLanguage.English, "Pixie Cotton Hood of Healing", "Pixie Cotton Hood of Healing" )]
[InlineData( ClientLanguage.Japanese, "アラガントームストーン:真理", "アラガントームストーン:真理" )]
[InlineData( ClientLanguage.German, "Anemos-Pan\x02\x16\x01\x03zer\x02\x16\x01\x03hand\x02\x16\x01\x03schu\x02\x16\x01\x03he des Drachenbluts", "Anemos-Panzerhandschuhe des Drachenbluts" )]
[InlineData( ClientLanguage.German, "Bienen-Spatha †", "Bienen-Spatha" )]
[InlineData( ClientLanguage.French, "Le Diademe\x02\x1D\x01\x03: terrains de chasse|Le Diademe\x02\x1D\x01\x03: terrains de chasse", "Le Diademe: terrains de chasse|Le Diademe: terrains de chasse" )]
[InlineData( ClientLanguage.French, "Cuir de bœuf", "Cuir de boeuf" )]
public void StringsAreSanitizedCorrectly(
ClientLanguage clientLanguage, string unsanitizedString, string sanitizedString)
{
var sanitizedStrings = new List<string> {unsanitizedString};
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(clientLanguage);
Assert.Equal(sanitizedString, sanitizer.Sanitize(unsanitizedString));
Assert.Equal(sanitizedString, sanitizer.Sanitize(sanitizedStrings).First());
[Fact]
public void Sanitize_NormalString_NoChange() {
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.English); sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.English);
const string str = "Pixie Cotton Hood of Healing"; Assert.Equal(sanitizedString, sanitizer.Sanitize(unsanitizedString, clientLanguage));
var sanitizedString = sanitizer.Sanitize(str); Assert.Equal(sanitizedString, sanitizer.Sanitize(sanitizedStrings, clientLanguage).First());
Assert.Equal(str, sanitizedString);
}
[Fact]
public void Sanitize_DESpecialCharacters_Sanitized() {
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.German);
const string str = @"Anemos-Panzerhandschuhe des Drachenbluts";
var sanitizedString = sanitizer.Sanitize(str);
Assert.Equal(@"Anemos-Panzerhandschuhe des Drachenbluts", sanitizedString);
}
[Fact]
public void Sanitize_FRSpecialCharacters_Sanitized() {
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.French);
const string str = @"Le Diademe: terrains de chasse|Le Diademe: terrains de chasse";
var sanitizedString = sanitizer.Sanitize(str);
Assert.Equal(@"Le Diademe: terrains de chasse|Le Diademe: terrains de chasse", sanitizedString);
}
[Fact]
public void Sanitize_SpecifyLanguage_Sanitized() {
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.French);
const string str = @"Le Diademe: terrains de chasse|Le Diademe: terrains de chasse";
var sanitizedString = sanitizer.Sanitize(str, ClientLanguage.French);
Assert.Equal(@"Le Diademe: terrains de chasse|Le Diademe: terrains de chasse", sanitizedString);
}
[Fact]
public void Sanitize_List_Sanitized() {
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.German);
const string str = @"Anemos-Panzerhandschuhe des Drachenbluts";
var sanitizedStrings = sanitizer.Sanitize(new[] {str});
Assert.Equal(@"Anemos-Panzerhandschuhe des Drachenbluts", sanitizedStrings.First());
}
[Fact]
public void Sanitize_SpecifyLanguageList_Sanitized() {
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.German);
const string str = @"Anemos-Panzerhandschuhe des Drachenbluts";
var sanitizedStrings = sanitizer.Sanitize(new[] {str}, ClientLanguage.German);
Assert.Equal(@"Anemos-Panzerhandschuhe des Drachenbluts", sanitizedStrings.First());
}
[Fact]
public void Sanitize_UseAlternateLanguage_Sanitized() {
sanitizer = new global::Dalamud.Plugin.Sanitizer.Sanitizer(ClientLanguage.English);
const string str = @"Anemos-Panzerhandschuhe des Drachenbluts";
var sanitizedStrings = sanitizer.Sanitize(new[] {str}, ClientLanguage.German);
Assert.Equal(@"Anemos-Panzerhandschuhe des Drachenbluts", sanitizedStrings.First());
} }
} }
} }

View file

@ -1,7 +1,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text;
namespace Dalamud.Plugin.Sanitizer namespace Dalamud.Plugin.Sanitizer
{ {
@ -10,21 +9,25 @@ namespace Dalamud.Plugin.Sanitizer
/// </summary> /// </summary>
public class Sanitizer : ISanitizer public class Sanitizer : ISanitizer
{ {
private readonly KeyValuePair<string, string> softHyphen = new KeyValuePair<string, string>("\u0002\u0016\u0001\u0003", string.Empty); private static readonly Dictionary<string, string> DESanitizationDict = new Dictionary<string, string>
private readonly KeyValuePair<string, string> emphasisOpen = new KeyValuePair<string, string>("\u0002\u001A\u0003", string.Empty); {
private readonly KeyValuePair<string, string> emphasisClose = new KeyValuePair<string, string>("\u0002\u001A\u0001\u0003", string.Empty); { "\u0020\u2020", string.Empty }, // dagger
private readonly KeyValuePair<string, string> indent = new KeyValuePair<string, string>("\u0002\u001D\u0001\u0003", string.Empty); };
private readonly KeyValuePair<string, string> dagger = new KeyValuePair<string, string>("\u0020\u2020", string.Empty);
private readonly KeyValuePair<string, string> ligatureOE = new KeyValuePair<string, string>("\u0153", "\u006F\u0065"); private static readonly Dictionary<string, string> FRSanitizationDict = new Dictionary<string, string>
private readonly List<KeyValuePair<string, string>> defaultSanitizationList; {
{ "\u0153", "\u006F\u0065" }, // ligature oe
};
private readonly ClientLanguage defaultClientLanguage;
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="Sanitizer"/> class. /// Initializes a new instance of the <see cref="Sanitizer"/> class.
/// </summary> /// </summary>
/// <param name="clientLanguage">Default clientLanguage for sanitizing strings.</param> /// <param name="defaultClientLanguage">Default clientLanguage for sanitizing strings.</param>
public Sanitizer(ClientLanguage clientLanguage) public Sanitizer(ClientLanguage defaultClientLanguage)
{ {
this.defaultSanitizationList = this.BuildSanitizationList(clientLanguage); this.defaultClientLanguage = defaultClientLanguage;
} }
/// <summary> /// <summary>
@ -34,7 +37,7 @@ namespace Dalamud.Plugin.Sanitizer
/// <returns>A sanitized string.</returns> /// <returns>A sanitized string.</returns>
public string Sanitize(string unsanitizedString) public string Sanitize(string unsanitizedString)
{ {
return this.defaultSanitizationList == null ? unsanitizedString : ApplySanitizationList(unsanitizedString, this.defaultSanitizationList); return SanitizeByLanguage(unsanitizedString, this.defaultClientLanguage);
} }
/// <summary> /// <summary>
@ -45,8 +48,7 @@ namespace Dalamud.Plugin.Sanitizer
/// <returns>A sanitized string.</returns> /// <returns>A sanitized string.</returns>
public string Sanitize(string unsanitizedString, ClientLanguage clientLanguage) public string Sanitize(string unsanitizedString, ClientLanguage clientLanguage)
{ {
var newSanitizationList = this.BuildSanitizationList(clientLanguage); return SanitizeByLanguage(unsanitizedString, clientLanguage);
return newSanitizationList == null ? unsanitizedString : ApplySanitizationList(unsanitizedString, newSanitizationList);
} }
/// <summary> /// <summary>
@ -56,8 +58,7 @@ namespace Dalamud.Plugin.Sanitizer
/// <returns>A list of sanitized strings.</returns> /// <returns>A list of sanitized strings.</returns>
public IEnumerable<string> Sanitize(IEnumerable<string> unsanitizedStrings) public IEnumerable<string> Sanitize(IEnumerable<string> unsanitizedStrings)
{ {
return this.defaultSanitizationList == null ? unsanitizedStrings.Select(unsanitizedString => unsanitizedString) : return SanitizeByLanguage(unsanitizedStrings, this.defaultClientLanguage);
unsanitizedStrings.Select(unsanitizedString => ApplySanitizationList(unsanitizedString, this.defaultSanitizationList));
} }
/// <summary> /// <summary>
@ -68,46 +69,63 @@ namespace Dalamud.Plugin.Sanitizer
/// <returns>A list of sanitized strings.</returns> /// <returns>A list of sanitized strings.</returns>
public IEnumerable<string> Sanitize(IEnumerable<string> unsanitizedStrings, ClientLanguage clientLanguage) public IEnumerable<string> Sanitize(IEnumerable<string> unsanitizedStrings, ClientLanguage clientLanguage)
{ {
var newSanitizationList = this.BuildSanitizationList(clientLanguage); return SanitizeByLanguage(unsanitizedStrings, clientLanguage);
return newSanitizationList == null ? unsanitizedStrings.Select(unsanitizedString => unsanitizedString) :
unsanitizedStrings.Select(unsanitizedString => ApplySanitizationList(unsanitizedString, newSanitizationList));
} }
private static string ApplySanitizationList(string unsanitizedString, IEnumerable<KeyValuePair<string, string>> sanitizationList) private static string SanitizeByLanguage(string unsanitizedString, ClientLanguage clientLanguage)
{
var sanitizedValue = new StringBuilder(unsanitizedString);
foreach (var item in sanitizationList) sanitizedValue.Replace(item.Key, item.Value);
return sanitizedValue.ToString();
}
private List<KeyValuePair<string, string>> BuildSanitizationList(ClientLanguage clientLanguage)
{ {
var sanitizedString = FilterUnprintableCharacters(unsanitizedString);
switch (clientLanguage) switch (clientLanguage)
{ {
case ClientLanguage.Japanese: case ClientLanguage.Japanese:
break;
case ClientLanguage.English: case ClientLanguage.English:
break; return sanitizedString;
case ClientLanguage.German: case ClientLanguage.German:
return new List<KeyValuePair<string, string>> return FilterByDict(sanitizedString, DESanitizationDict);
{
this.softHyphen,
this.emphasisOpen,
this.emphasisClose,
this.dagger,
};
case ClientLanguage.French: case ClientLanguage.French:
return new List<KeyValuePair<string, string>> return FilterByDict(sanitizedString, FRSanitizationDict);
{
this.softHyphen,
this.indent,
this.ligatureOE,
};
default: default:
throw new ArgumentOutOfRangeException(); throw new ArgumentOutOfRangeException(nameof(clientLanguage), clientLanguage, null);
}
} }
return null; private static IEnumerable<string> SanitizeByLanguage(
IEnumerable<string> unsanitizedStrings, ClientLanguage clientLanguage)
{
var sanitizedStrings = new List<string>();
switch (clientLanguage)
{
case ClientLanguage.Japanese:
case ClientLanguage.English:
sanitizedStrings.AddRange(unsanitizedStrings.Select(FilterUnprintableCharacters));
return sanitizedStrings;
case ClientLanguage.German:
sanitizedStrings.AddRange(
unsanitizedStrings.Select(
unsanitizedString =>
FilterByDict(FilterUnprintableCharacters(unsanitizedString), DESanitizationDict)));
return sanitizedStrings;
case ClientLanguage.French:
sanitizedStrings.AddRange(
unsanitizedStrings.Select(
unsanitizedString =>
FilterByDict(FilterUnprintableCharacters(unsanitizedString), FRSanitizationDict)));
return sanitizedStrings;
default:
throw new ArgumentOutOfRangeException(nameof(clientLanguage), clientLanguage, null);
}
}
private static string FilterUnprintableCharacters(string str)
{
return new string(str?.Where(ch => ch >= 0x20).ToArray());
}
private static string FilterByDict(string str, Dictionary<string, string> dict)
{
return dict.Aggregate(
str, (current, kvp) =>
current.Replace(kvp.Key, kvp.Value));
} }
} }
} }