Add Byte String stuff, remove Services, cleanup and refactor interop stuff, disable path resolver for the moment

This commit is contained in:
Ottermandias 2022-03-06 00:40:42 +01:00
parent 0e8f839471
commit c3454f1d16
65 changed files with 4707 additions and 3371 deletions

View file

@ -0,0 +1,94 @@
using System.Linq;
using System.Runtime.InteropServices;
using Penumbra.GameData.Util;
namespace Penumbra.GameData.ByteString;
public static unsafe partial class ByteStringFunctions
{
private static readonly byte[] AsciiLowerCaseBytes = Enumerable.Range( 0, 256 )
.Select( i => ( byte )char.ToLowerInvariant( ( char )i ) )
.ToArray();
// Convert a byte to its ASCII-lowercase version.
public static byte AsciiToLower( byte b )
=> AsciiLowerCaseBytes[ b ];
// Check if a byte is ASCII-lowercase.
public static bool AsciiIsLower( byte b )
=> AsciiToLower( b ) == b;
// Check if a byte array of given length is ASCII-lowercase.
public static bool IsAsciiLowerCase( byte* path, int length )
{
var end = path + length;
for( ; path < end; ++path )
{
if( *path != AsciiLowerCaseBytes[*path] )
{
return false;
}
}
return true;
}
// Compare two byte arrays of given lengths ASCII-case-insensitive.
public static int AsciiCaselessCompare( byte* lhs, int lhsLength, byte* rhs, int rhsLength )
{
if( lhsLength == rhsLength )
{
return lhs == rhs ? 0 : Functions.MemCmpCaseInsensitiveUnchecked( lhs, rhs, rhsLength );
}
if( lhsLength < rhsLength )
{
var cmp = Functions.MemCmpCaseInsensitiveUnchecked( lhs, rhs, lhsLength );
return cmp != 0 ? cmp : -1;
}
var cmp2 = Functions.MemCmpCaseInsensitiveUnchecked( lhs, rhs, rhsLength );
return cmp2 != 0 ? cmp2 : 1;
}
// Check two byte arrays of given lengths for ASCII-case-insensitive equality.
public static bool AsciiCaselessEquals( byte* lhs, int lhsLength, byte* rhs, int rhsLength )
{
if( lhsLength != rhsLength )
{
return false;
}
if( lhs == rhs || lhsLength == 0 )
{
return true;
}
return Functions.MemCmpCaseInsensitiveUnchecked( lhs, rhs, lhsLength ) == 0;
}
// Check if a byte array of given length consists purely of ASCII characters.
public static bool IsAscii( byte* path, int length )
{
var length8 = length / 8;
var end8 = ( ulong* )path + length8;
for( var ptr8 = ( ulong* )path; ptr8 < end8; ++ptr8 )
{
if( ( *ptr8 & 0x8080808080808080ul ) != 0 )
{
return false;
}
}
var end = path + length;
for( path += length8 * 8; path < end; ++path )
{
if( *path > 127 )
{
return false;
}
}
return true;
}
}

View file

@ -0,0 +1,95 @@
using Penumbra.GameData.Util;
namespace Penumbra.GameData.ByteString;
public static unsafe partial class ByteStringFunctions
{
// Lexicographically compare two byte arrays of given length.
public static int Compare( byte* lhs, int lhsLength, byte* rhs, int rhsLength )
{
if( lhsLength == rhsLength )
{
return lhs == rhs ? 0 : Functions.MemCmpUnchecked( lhs, rhs, rhsLength );
}
if( lhsLength < rhsLength )
{
var cmp = Functions.MemCmpUnchecked( lhs, rhs, lhsLength );
return cmp != 0 ? cmp : -1;
}
var cmp2 = Functions.MemCmpUnchecked( lhs, rhs, rhsLength );
return cmp2 != 0 ? cmp2 : 1;
}
// Lexicographically compare one byte array of given length with a null-terminated byte array of unknown length.
public static int Compare( byte* lhs, int lhsLength, byte* rhs )
{
var end = lhs + lhsLength;
for( var tmp = lhs; tmp < end; ++tmp, ++rhs )
{
if( *rhs == 0 )
{
return 1;
}
var diff = *tmp - *rhs;
if( diff != 0 )
{
return diff;
}
}
return 0;
}
// Lexicographically compare two null-terminated byte arrays of unknown length not larger than maxLength.
public static int Compare( byte* lhs, byte* rhs, int maxLength = int.MaxValue )
{
var end = lhs + maxLength;
for( var tmp = lhs; tmp < end; ++tmp, ++rhs )
{
if( *lhs == 0 )
{
return *rhs == 0 ? 0 : -1;
}
if( *rhs == 0 )
{
return 1;
}
var diff = *tmp - *rhs;
if( diff != 0 )
{
return diff;
}
}
return 0;
}
// Check two byte arrays of given length for equality.
public static bool Equals( byte* lhs, int lhsLength, byte* rhs, int rhsLength )
{
if( lhsLength != rhsLength )
{
return false;
}
if( lhs == rhs || lhsLength == 0 )
{
return true;
}
return Functions.MemCmpUnchecked( lhs, rhs, lhsLength ) == 0;
}
// Check one byte array of given length for equality against a null-terminated byte array of unknown length.
private static bool Equal( byte* lhs, int lhsLength, byte* rhs )
=> Compare( lhs, lhsLength, rhs ) == 0;
// Check two null-terminated byte arrays of unknown length not larger than maxLength for equality.
private static bool Equal( byte* lhs, byte* rhs, int maxLength = int.MaxValue )
=> Compare( lhs, rhs, maxLength ) == 0;
}

View file

@ -0,0 +1,68 @@
using System;
using System.Runtime.InteropServices;
using System.Text;
using Penumbra.GameData.Util;
namespace Penumbra.GameData.ByteString;
public static unsafe partial class ByteStringFunctions
{
// Used for static null-terminators.
public class NullTerminator
{
public readonly byte* NullBytePtr;
public NullTerminator()
{
NullBytePtr = ( byte* )Marshal.AllocHGlobal( 1 );
*NullBytePtr = 0;
}
~NullTerminator()
=> Marshal.FreeHGlobal( ( IntPtr )NullBytePtr );
}
// Convert a C# unicode-string to an unmanaged UTF8-byte array and return the pointer.
// If the length would exceed the given maxLength, return a nullpointer instead.
public static byte* Utf8FromString( string s, out int length, int maxLength = int.MaxValue )
{
length = Encoding.UTF8.GetByteCount( s );
if( length >= maxLength )
{
return null;
}
var path = ( byte* )Marshal.AllocHGlobal( length + 1 );
fixed( char* ptr = s )
{
Encoding.UTF8.GetBytes( ptr, length, path, length + 1 );
}
path[ length ] = 0;
return path;
}
// Create a copy of a given string and return the pointer.
public static byte* CopyString( byte* path, int length )
{
var ret = ( byte* )Marshal.AllocHGlobal( length + 1 );
Functions.MemCpyUnchecked( ret, path, length );
ret[ length ] = 0;
return ret;
}
// Check the length of a null-terminated byte array no longer than the given maxLength.
public static int CheckLength( byte* path, int maxLength = int.MaxValue )
{
var end = path + maxLength;
for( var it = path; it < end; ++it )
{
if( *it == 0 )
{
return ( int )( it - path );
}
}
throw new ArgumentOutOfRangeException( "Null-terminated path too long" );
}
}

View file

@ -0,0 +1,45 @@
using System.Runtime.InteropServices;
namespace Penumbra.GameData.ByteString;
public static unsafe partial class ByteStringFunctions
{
// Replace all occurrences of from in a byte array of known length with to.
public static int Replace( byte* ptr, int length, byte from, byte to )
{
var end = ptr + length;
var numReplaced = 0;
for( ; ptr < end; ++ptr )
{
if( *ptr == from )
{
*ptr = to;
++numReplaced;
}
}
return numReplaced;
}
// Convert a byte array of given length to ASCII-lowercase.
public static void AsciiToLowerInPlace( byte* path, int length )
{
for( var i = 0; i < length; ++i )
{
path[ i ] = AsciiLowerCaseBytes[ path[ i ] ];
}
}
// Copy a byte array and convert the copy to ASCII-lowercase.
public static byte* AsciiToLower( byte* path, int length )
{
var ptr = ( byte* )Marshal.AllocHGlobal( length + 1 );
ptr[ length ] = 0;
for( var i = 0; i < length; ++i )
{
ptr[ i ] = AsciiLowerCaseBytes[ path[ i ] ];
}
return ptr;
}
}

View file

@ -0,0 +1,96 @@
using System;
using System.IO;
using Penumbra.GameData.Util;
namespace Penumbra.GameData.ByteString;
public readonly struct FullPath : IComparable, IEquatable< FullPath >
{
public readonly string FullName;
public readonly Utf8String InternalName;
public readonly ulong Crc64;
public FullPath( DirectoryInfo baseDir, NewRelPath relPath )
: this( Path.Combine( baseDir.FullName, relPath.ToString() ) )
{ }
public FullPath( FileInfo file )
: this( file.FullName )
{ }
public FullPath( string s )
{
FullName = s;
InternalName = Utf8String.FromString( FullName, out var name, true ) ? name : Utf8String.Empty;
Crc64 = Functions.ComputeCrc64( InternalName.Span );
}
public bool Exists
=> File.Exists( FullName );
public string Extension
=> Path.GetExtension( FullName );
public string Name
=> Path.GetFileName( FullName );
public bool ToGamePath( DirectoryInfo dir, out NewGamePath path )
{
path = NewGamePath.Empty;
if( !InternalName.IsAscii || !FullName.StartsWith( dir.FullName ) )
{
return false;
}
var substring = InternalName.Substring( dir.FullName.Length + 1 );
path = new NewGamePath( substring.Replace( ( byte )'\\', ( byte )'/' ) );
return true;
}
public bool ToRelPath( DirectoryInfo dir, out NewRelPath path )
{
path = NewRelPath.Empty;
if( !FullName.StartsWith( dir.FullName ) )
{
return false;
}
var substring = InternalName.Substring( dir.FullName.Length + 1 );
path = new NewRelPath( substring );
return true;
}
public int CompareTo( object? obj )
=> obj switch
{
FullPath p => InternalName.CompareTo( p.InternalName ),
FileInfo f => string.Compare( FullName, f.FullName, StringComparison.InvariantCultureIgnoreCase ),
Utf8String u => InternalName.CompareTo( u ),
string s => string.Compare( FullName, s, StringComparison.InvariantCultureIgnoreCase ),
_ => -1,
};
public bool Equals( FullPath other )
{
if( Crc64 != other.Crc64 )
{
return false;
}
if( FullName.Length == 0 || other.FullName.Length == 0 )
{
return true;
}
return InternalName.Equals( other.InternalName );
}
public override int GetHashCode()
=> InternalName.Crc32;
public override string ToString()
=> FullName;
}

View file

@ -0,0 +1,159 @@
using System;
using System.IO;
using Dalamud.Utility;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace Penumbra.GameData.ByteString;
// NewGamePath wrap some additional validity checking around Utf8String,
// provide some filesystem helpers, and conversion to Json.
[JsonConverter( typeof( NewGamePathConverter ) )]
public readonly struct NewGamePath : IEquatable< NewGamePath >, IComparable< NewGamePath >, IDisposable
{
public const int MaxGamePathLength = 256;
public readonly Utf8String Path;
public static readonly NewGamePath Empty = new(Utf8String.Empty);
internal NewGamePath( Utf8String s )
=> Path = s;
public int Length
=> Path.Length;
public bool IsEmpty
=> Path.IsEmpty;
public NewGamePath ToLower()
=> new(Path.AsciiToLower());
public static unsafe bool FromPointer( byte* ptr, out NewGamePath path, bool lower = false )
{
var utf = new Utf8String( ptr );
return ReturnChecked( utf, out path, lower );
}
public static bool FromSpan( ReadOnlySpan< byte > data, out NewGamePath path, bool lower = false )
{
var utf = Utf8String.FromSpanUnsafe( data, false, null, null );
return ReturnChecked( utf, out path, lower );
}
// Does not check for Forward/Backslashes due to assuming that SE-strings use the correct one.
// Does not check for initial slashes either, since they are assumed to be by choice.
// Checks for maxlength, ASCII and lowercase.
private static bool ReturnChecked( Utf8String utf, out NewGamePath path, bool lower = false )
{
path = Empty;
if( !utf.IsAscii || utf.Length > MaxGamePathLength )
{
return false;
}
path = new NewGamePath( lower ? utf.AsciiToLower() : utf );
return true;
}
public NewGamePath Clone()
=> new(Path.Clone());
public static bool FromString( string? s, out NewGamePath path, bool toLower = false )
{
path = Empty;
if( s.IsNullOrEmpty() )
{
return true;
}
var substring = s!.Replace( '\\', '/' );
substring.TrimStart( '/' );
if( substring.Length > MaxGamePathLength )
{
return false;
}
if( substring.Length == 0 )
{
return true;
}
if( !Utf8String.FromString( substring, out var ascii, toLower ) || !ascii.IsAscii )
{
return false;
}
path = new NewGamePath( ascii );
return true;
}
public static bool FromFile( FileInfo file, DirectoryInfo baseDir, out NewGamePath path, bool toLower = false )
{
path = Empty;
if( !file.FullName.StartsWith( baseDir.FullName ) )
{
return false;
}
var substring = file.FullName[ baseDir.FullName.Length.. ];
return FromString( substring, out path, toLower );
}
public Utf8String Filename()
{
var idx = Path.LastIndexOf( ( byte )'/' );
return idx == -1 ? Path : Path.Substring( idx + 1 );
}
public Utf8String Extension()
{
var idx = Path.LastIndexOf( ( byte )'.' );
return idx == -1 ? Utf8String.Empty : Path.Substring( idx );
}
public bool Equals( NewGamePath other )
=> Path.Equals( other.Path );
public override int GetHashCode()
=> Path.GetHashCode();
public int CompareTo( NewGamePath other )
=> Path.CompareTo( other.Path );
public override string ToString()
=> Path.ToString();
public void Dispose()
=> Path.Dispose();
public bool IsRooted()
=> Path.Length >= 1 && ( Path[ 0 ] == '/' || Path[ 0 ] == '\\' )
|| Path.Length >= 2
&& ( Path[ 0 ] >= 'A' && Path[ 0 ] <= 'Z' || Path[ 0 ] >= 'a' && Path[ 0 ] <= 'z' )
&& Path[ 1 ] == ':';
private class NewGamePathConverter : JsonConverter
{
public override bool CanConvert( Type objectType )
=> objectType == typeof( NewGamePath );
public override object ReadJson( JsonReader reader, Type objectType, object? existingValue, JsonSerializer serializer )
{
var token = JToken.Load( reader ).ToString();
return FromString( token, out var p, true )
? p
: throw new JsonException( $"Could not convert \"{token}\" to {nameof( NewGamePath )}." );
}
public override bool CanWrite
=> true;
public override void WriteJson( JsonWriter writer, object? value, JsonSerializer serializer )
{
if( value is NewGamePath p )
{
serializer.Serialize( writer, p.ToString() );
}
}
}
}

View file

@ -0,0 +1,133 @@
using System;
using System.IO;
using Dalamud.Utility;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace Penumbra.GameData.ByteString;
[JsonConverter( typeof( NewRelPathConverter ) )]
public readonly struct NewRelPath : IEquatable< NewRelPath >, IComparable< NewRelPath >, IDisposable
{
public const int MaxRelPathLength = 250;
public readonly Utf8String Path;
public static readonly NewRelPath Empty = new(Utf8String.Empty);
internal NewRelPath( Utf8String path )
=> Path = path;
public static bool FromString( string? s, out NewRelPath path )
{
path = Empty;
if( s.IsNullOrEmpty() )
{
return true;
}
var substring = s!.Replace( '/', '\\' );
substring.TrimStart( '\\' );
if( substring.Length > MaxRelPathLength )
{
return false;
}
if( substring.Length == 0 )
{
return true;
}
if( !Utf8String.FromString( substring, out var ascii ) || !ascii.IsAscii )
{
return false;
}
path = new NewRelPath( ascii );
return true;
}
public static bool FromFile( FileInfo file, DirectoryInfo baseDir, out NewRelPath path )
{
path = Empty;
if( !file.FullName.StartsWith( baseDir.FullName ) )
{
return false;
}
var substring = file.FullName[ baseDir.FullName.Length.. ];
return FromString( substring, out path );
}
public static bool FromFile( FullPath file, DirectoryInfo baseDir, out NewRelPath path )
{
path = Empty;
if( !file.FullName.StartsWith( baseDir.FullName ) )
{
return false;
}
var substring = file.FullName[ baseDir.FullName.Length.. ];
return FromString( substring, out path );
}
public NewRelPath( NewGamePath gamePath )
=> Path = gamePath.Path.Replace( ( byte )'/', ( byte )'\\' );
public unsafe NewGamePath ToGamePath( int skipFolders = 0 )
{
var idx = 0;
while( skipFolders > 0 )
{
idx = Path.IndexOf( ( byte )'\\', idx ) + 1;
--skipFolders;
if( idx <= 0 )
{
return NewGamePath.Empty;
}
}
var length = Path.Length - idx;
var ptr = ByteStringFunctions.CopyString( Path.Path + idx, length );
ByteStringFunctions.Replace( ptr, length, ( byte )'\\', ( byte )'/' );
ByteStringFunctions.AsciiToLowerInPlace( ptr, length );
var utf = new Utf8String().Setup( ptr, length, null, true, true, true, true );
return new NewGamePath( utf );
}
public int CompareTo( NewRelPath rhs )
=> Path.CompareTo( rhs.Path );
public bool Equals( NewRelPath other )
=> Path.Equals( other.Path );
public override string ToString()
=> Path.ToString();
public void Dispose()
=> Path.Dispose();
private class NewRelPathConverter : JsonConverter
{
public override bool CanConvert( Type objectType )
=> objectType == typeof( NewRelPath );
public override object ReadJson( JsonReader reader, Type objectType, object? existingValue, JsonSerializer serializer )
{
var token = JToken.Load( reader ).ToString();
return FromString( token, out var p )
? p
: throw new JsonException( $"Could not convert \"{token}\" to {nameof( NewRelPath )}." );
}
public override bool CanWrite
=> true;
public override void WriteJson( JsonWriter writer, object? value, JsonSerializer serializer )
{
if( value is NewRelPath p )
{
serializer.Serialize( writer, p.ToString() );
}
}
}
}

View file

@ -0,0 +1,75 @@
using System;
using System.Collections;
using System.Collections.Generic;
namespace Penumbra.GameData.ByteString;
// Utf8String is a wrapper around unsafe byte strings.
// It may be used to store owned strings in unmanaged space,
// as well as refer to unowned strings.
// Unowned strings may change their value and thus become corrupt,
// so they should never be stored, just used locally or with great care.
// The string keeps track of whether it is owned or not, it also can keep track
// of some other information, like the string being pure ASCII, ASCII-lowercase or null-terminated.
// Owned strings are always null-terminated.
// Any constructed string will compute its own CRC32-value (as long as the string itself is not changed).
public sealed unsafe partial class Utf8String : IEnumerable< byte >
{
// We keep information on some of the state of the Utf8String in specific bits.
// This costs some potential max size, but that is not relevant for our case.
// Except for destruction/dispose, or if the non-owned pointer changes values,
// the CheckedFlag, AsciiLowerCaseFlag and AsciiFlag are the only things that are mutable.
private const uint NullTerminatedFlag = 0x80000000;
private const uint OwnedFlag = 0x40000000;
private const uint AsciiCheckedFlag = 0x04000000;
private const uint AsciiFlag = 0x08000000;
private const uint AsciiLowerCheckedFlag = 0x10000000;
private const uint AsciiLowerFlag = 0x20000000;
private const uint FlagMask = 0x03FFFFFF;
public bool IsNullTerminated
=> ( _length & NullTerminatedFlag ) != 0;
public bool IsOwned
=> ( _length & OwnedFlag ) != 0;
public bool IsAscii
=> CheckAscii();
public bool IsAsciiLowerCase
=> CheckAsciiLower();
public byte* Path
=> _path;
public int Crc32
=> _crc32;
public int Length
=> ( int )( _length & FlagMask );
public bool IsEmpty
=> Length == 0;
public ReadOnlySpan< byte > Span
=> new(_path, Length);
public byte this[ int idx ]
=> ( uint )idx < Length ? _path[ idx ] : throw new IndexOutOfRangeException();
public IEnumerator< byte > GetEnumerator()
{
for( var i = 0; i < Length; ++i )
{
yield return Span[ i ];
}
}
IEnumerator IEnumerable.GetEnumerator()
=> GetEnumerator();
// Only not readonly due to dispose.
// ReSharper disable once NonReadonlyMemberInGetHashCode
public override int GetHashCode()
=> _crc32;
}

View file

@ -0,0 +1,127 @@
using System;
using System.Linq;
namespace Penumbra.GameData.ByteString;
public sealed unsafe partial class Utf8String : IEquatable< Utf8String >, IComparable< Utf8String >
{
public bool Equals( Utf8String? other )
{
if( ReferenceEquals( null, other ) )
{
return false;
}
if( ReferenceEquals( this, other ) )
{
return true;
}
return _crc32 == other._crc32 && ByteStringFunctions.Equals( _path, Length, other._path, other.Length );
}
public bool EqualsCi( Utf8String? other )
{
if( ReferenceEquals( null, other ) )
{
return false;
}
if( ReferenceEquals( this, other ) )
{
return true;
}
if( ( IsAsciiLowerInternal ?? false ) && ( other.IsAsciiLowerInternal ?? false ) )
{
return _crc32 == other._crc32 && ByteStringFunctions.Equals( _path, Length, other._path, other.Length );
}
return ByteStringFunctions.AsciiCaselessEquals( _path, Length, other._path, other.Length );
}
public int CompareTo( Utf8String? other )
{
if( ReferenceEquals( this, other ) )
{
return 0;
}
if( ReferenceEquals( null, other ) )
{
return 1;
}
return ByteStringFunctions.Compare( _path, Length, other._path, other.Length );
}
public int CompareToCi( Utf8String? other )
{
if( ReferenceEquals( null, other ) )
{
return 0;
}
if( ReferenceEquals( this, other ) )
{
return 1;
}
if( ( IsAsciiLowerInternal ?? false ) && ( other.IsAsciiLowerInternal ?? false ) )
{
return ByteStringFunctions.Compare( _path, Length, other._path, other.Length );
}
return ByteStringFunctions.AsciiCaselessCompare( _path, Length, other._path, other.Length );
}
public bool StartsWith( params char[] chars )
{
if( chars.Length > Length )
{
return false;
}
var ptr = _path;
return chars.All( t => *ptr++ == ( byte )t );
}
public bool EndsWith( params char[] chars )
{
if( chars.Length > Length )
{
return false;
}
var ptr = _path + Length - chars.Length;
return chars.All( c => *ptr++ == ( byte )c );
}
public int IndexOf( byte b, int from = 0 )
{
var end = _path + Length;
for( var tmp = _path + from; tmp < end; ++tmp )
{
if( *tmp == b )
{
return ( int )( tmp - _path );
}
}
return -1;
}
public int LastIndexOf( byte b, int to = 0 )
{
var end = _path + to;
for( var tmp = _path + Length - 1; tmp >= end; --tmp )
{
if( *tmp == b )
{
return ( int )( tmp - _path );
}
}
return -1;
}
}

View file

@ -0,0 +1,214 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Penumbra.GameData.Util;
namespace Penumbra.GameData.ByteString;
public sealed unsafe partial class Utf8String : IDisposable
{
// statically allocated null-terminator for empty strings to point to.
private static readonly ByteStringFunctions.NullTerminator Null = new();
public static readonly Utf8String Empty = new();
// actual data members.
private byte* _path;
private uint _length;
private int _crc32;
// Create an empty string.
public Utf8String()
{
_path = Null.NullBytePtr;
_length |= AsciiCheckedFlag | AsciiFlag | AsciiLowerCheckedFlag | AsciiLowerFlag | NullTerminatedFlag | AsciiFlag;
_crc32 = 0;
}
// Create a temporary Utf8String from a byte pointer.
// This computes CRC, checks for ASCII and AsciiLower and assumes Null-Termination.
public Utf8String( byte* path )
{
var length = Functions.ComputeCrc32AsciiLowerAndSize( path, out var crc32, out var lower, out var ascii );
Setup( path, length, crc32, true, false, lower, ascii );
}
// Construct a temporary Utf8String from a given byte string of known size.
// Other known attributes can also be provided and are not computed.
// Can throw ArgumentOutOfRange if length is higher than max length.
// The Crc32 will be computed.
public static Utf8String FromByteStringUnsafe( byte* path, int length, bool isNullTerminated, bool? isLower = null, bool? isAscii = false )
=> FromSpanUnsafe( new ReadOnlySpan< byte >( path, length ), isNullTerminated, isLower, isAscii );
// Same as above, just with a span.
public static Utf8String FromSpanUnsafe( ReadOnlySpan< byte > path, bool isNullTerminated, bool? isLower = null, bool? isAscii = false )
{
fixed( byte* ptr = path )
{
return new Utf8String().Setup( ptr, path.Length, null, isNullTerminated, false, isLower, isAscii );
}
}
// Construct a Utf8String from a given unicode string, possibly converted to ascii lowercase.
// Only returns false if the length exceeds the max length.
public static bool FromString( string? path, out Utf8String ret, bool toAsciiLower = false )
{
if( string.IsNullOrEmpty( path ) )
{
ret = Empty;
return true;
}
var p = ByteStringFunctions.Utf8FromString( path, out var l, ( int )FlagMask );
if( p == null )
{
ret = Empty;
return false;
}
if( toAsciiLower )
{
ByteStringFunctions.AsciiToLowerInPlace( p, l );
}
ret = new Utf8String().Setup( p, l, null, true, true, toAsciiLower ? true : null, l == path.Length );
return true;
}
// Does not check for length and just assumes the isLower state from the second argument.
public static Utf8String FromStringUnsafe( string? path, bool? isLower )
{
if( string.IsNullOrEmpty( path ) )
{
return Empty;
}
var p = ByteStringFunctions.Utf8FromString( path, out var l );
var ret = new Utf8String().Setup( p, l, null, true, true, isLower, l == path.Length );
return ret;
}
// Free memory if the string is owned.
private void ReleaseUnmanagedResources()
{
if( !IsOwned )
{
return;
}
Marshal.FreeHGlobal( ( IntPtr )_path );
GC.RemoveMemoryPressure( Length );
_length = AsciiCheckedFlag | AsciiFlag | AsciiLowerCheckedFlag | AsciiLowerFlag | NullTerminatedFlag;
_path = Null.NullBytePtr;
_crc32 = 0;
}
// Manually free memory. Sets the string to an empty string.
public void Dispose()
{
ReleaseUnmanagedResources();
GC.SuppressFinalize( this );
}
~Utf8String()
{
ReleaseUnmanagedResources();
}
// Setup from all given values.
// Only called from constructors or factory functions in this library.
[MethodImpl( MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization )]
internal Utf8String Setup( byte* path, int length, int? crc32, bool isNullTerminated, bool isOwned,
bool? isLower = null, bool? isAscii = null )
{
if( length > FlagMask )
{
throw new ArgumentOutOfRangeException( nameof( length ) );
}
_path = path;
_length = ( uint )length;
_crc32 = crc32 ?? ( int )~Lumina.Misc.Crc32.Get( new ReadOnlySpan< byte >( path, length ) );
if( isNullTerminated )
{
_length |= NullTerminatedFlag;
}
if( isOwned )
{
_length |= OwnedFlag;
}
if( isLower != null )
{
_length |= AsciiLowerCheckedFlag;
if( isLower.Value )
{
_length |= AsciiLowerFlag;
}
}
if( isAscii != null )
{
_length |= AsciiCheckedFlag;
if( isAscii.Value )
{
_length |= AsciiFlag;
}
}
return this;
}
private bool CheckAscii()
{
switch( _length & ( AsciiCheckedFlag | AsciiFlag ) )
{
case AsciiCheckedFlag: return false;
case AsciiCheckedFlag | AsciiFlag: return true;
default:
_length |= AsciiCheckedFlag;
var isAscii = ByteStringFunctions.IsAscii( _path, Length );
if( isAscii )
{
_length |= AsciiFlag;
}
return isAscii;
}
}
private bool CheckAsciiLower()
{
switch( _length & ( AsciiLowerCheckedFlag | AsciiLowerFlag ) )
{
case AsciiLowerCheckedFlag: return false;
case AsciiLowerCheckedFlag | AsciiLowerFlag: return true;
default:
_length |= AsciiLowerCheckedFlag;
var isAsciiLower = ByteStringFunctions.IsAsciiLowerCase( _path, Length );
if( isAsciiLower )
{
_length |= AsciiLowerFlag;
}
return isAsciiLower;
}
}
private bool? IsAsciiInternal
=> ( _length & ( AsciiCheckedFlag | AsciiFlag ) ) switch
{
AsciiCheckedFlag => false,
AsciiFlag => true,
_ => null,
};
private bool? IsAsciiLowerInternal
=> ( _length & ( AsciiLowerCheckedFlag | AsciiLowerFlag ) ) switch
{
AsciiLowerCheckedFlag => false,
AsciiLowerFlag => true,
_ => null,
};
}

View file

@ -0,0 +1,142 @@
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using Penumbra.GameData.Util;
namespace Penumbra.GameData.ByteString;
public sealed unsafe partial class Utf8String
{
// Create a C# Unicode string from this string.
// If the string is known to be pure ASCII, use that encoding, otherwise UTF8.
public override string ToString()
=> Length == 0
? string.Empty
: ( _length & AsciiFlag ) != 0
? Encoding.ASCII.GetString( _path, Length )
: Encoding.UTF8.GetString( _path, Length );
// Convert the ascii portion of the string to lowercase.
// Only creates a new string and copy if the string is not already known to be lowercase.
public Utf8String AsciiToLower()
=> ( _length & AsciiLowerFlag ) == 0
? new Utf8String().Setup( ByteStringFunctions.AsciiToLower( _path, Length ), Length, null, true, true, true, IsAsciiInternal )
: this;
// Convert the ascii portion of the string to lowercase.
// Guaranteed to create an owned copy.
public Utf8String AsciiToLowerClone()
=> ( _length & AsciiLowerFlag ) == 0
? new Utf8String().Setup( ByteStringFunctions.AsciiToLower( _path, Length ), Length, null, true, true, true, IsAsciiInternal )
: Clone();
// Create an owned copy of the given string.
public Utf8String Clone()
{
var ret = new Utf8String();
ret._length = _length | OwnedFlag | NullTerminatedFlag;
ret._path = ByteStringFunctions.CopyString(Path, Length);
ret._crc32 = Crc32;
return ret;
}
// Create a non-owning substring from the given position.
// If from is negative or too large, the returned string will be the empty string.
public Utf8String Substring( int from )
=> ( uint )from < Length
? FromByteStringUnsafe( _path + from, Length - from, IsNullTerminated, IsAsciiLowerInternal, IsAsciiInternal )
: Empty;
// Create a non-owning substring from the given position of the given length.
// If from is negative or too large, the returned string will be the empty string.
// If from + length is too large, it will be the same as if length was not specified.
public Utf8String Substring( int from, int length )
{
var maxLength = Length - ( uint )from;
if( maxLength <= 0 )
{
return Empty;
}
return length < maxLength
? FromByteStringUnsafe( _path + from, length, false, IsAsciiLowerInternal, IsAsciiInternal )
: Substring( from );
}
// Create a owned copy of the string and replace all occurences of from with to in it.
public Utf8String Replace( byte from, byte to )
{
var length = Length;
var newPtr = ByteStringFunctions.CopyString( _path, length );
var numReplaced = ByteStringFunctions.Replace( newPtr, length, from, to );
return new Utf8String().Setup( newPtr, length, numReplaced > 0 ? _crc32 : null, true, true, IsAsciiLowerInternal, IsAsciiInternal );
}
// Join a number of strings with a given byte between them.
public static Utf8String Join( byte splitter, params Utf8String[] strings )
{
var length = strings.Sum( s => s.Length ) + strings.Length;
var data = ( byte* )Marshal.AllocHGlobal( length );
var ptr = data;
bool? isLower = ByteStringFunctions.AsciiIsLower( splitter );
bool? isAscii = splitter < 128;
foreach( var s in strings )
{
Functions.MemCpyUnchecked( ptr, s.Path, s.Length );
ptr += s.Length;
*ptr++ = splitter;
isLower = Combine( isLower, s.IsAsciiLowerInternal );
isAscii &= s.IsAscii;
}
--length;
data[ length ] = 0;
var ret = FromByteStringUnsafe( data, length, true, isLower, isAscii );
ret._length |= OwnedFlag;
return ret;
}
// Split a string and return a list of the substrings delimited by b.
// You can specify the maximum number of splits (if the maximum is reached, the last substring may contain delimiters).
// You can also specify to ignore empty substrings inside delimiters. Those are also not counted for max splits.
public List< Utf8String > Split( byte b, int maxSplits = int.MaxValue, bool removeEmpty = true )
{
var ret = new List< Utf8String >();
var start = 0;
for( var idx = IndexOf( b, start ); idx >= 0; idx = IndexOf( b, start ) )
{
if( start + 1 != idx || !removeEmpty )
{
ret.Add( Substring( start, idx - start ) );
}
start = idx + 1;
if( ret.Count == maxSplits - 1 )
{
break;
}
}
ret.Add( Substring( start ) );
return ret;
}
private static bool? Combine( bool? val1, bool? val2 )
{
return ( val1, val2 ) switch
{
(null, null) => null,
(null, true) => null,
(null, false) => false,
(true, null) => null,
(true, true) => true,
(true, false) => false,
(false, null) => false,
(false, true) => false,
(false, false) => false,
};
}
}