Implement xiv fixes into Dalamud.Boot (#857)

This commit is contained in:
kizer 2022-05-29 02:11:03 +09:00 committed by GitHub
parent 02dd1eddec
commit 75de126c9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
40 changed files with 41576 additions and 196 deletions

3
.gitmodules vendored
View file

@ -4,3 +4,6 @@
[submodule "lib/FFXIVClientStructs"]
path = lib/FFXIVClientStructs
url = https://github.com/goatcorp/FFXIVClientStructs.git
[submodule "lib/Nomade040-nmd"]
path = lib/Nomade040-nmd
url = https://github.com/Nomade040/nmd

View file

@ -58,6 +58,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>false</IntrinsicFunctions>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Use</PrecompiledHeader>
</ClCompile>
<Link>
<EnableCOMDATFolding>false</EnableCOMDATFolding>
@ -69,6 +70,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Use</PrecompiledHeader>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -91,22 +93,24 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="logging.cpp" />
<ClCompile Include="unicode.cpp" />
<ClCompile Include="xivfixes.cpp" />
<ClCompile Include="utils.cpp" />
<ClCompile Include="pch_nmd_assembly_impl.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="pch.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
</ClCompile>
<ClCompile Include="dllmain.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Use</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Use</PrecompiledHeader>
</ClCompile>
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="rewrite_entrypoint.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Use</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Use</PrecompiledHeader>
</ClCompile>
<ClCompile Include="veh.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Use</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Use</PrecompiledHeader>
</ClCompile>
<ClCompile Include="veh.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\lib\CoreCLR\boot.h" />
@ -114,8 +118,14 @@
<ClInclude Include="..\lib\CoreCLR\core\coreclr_delegates.h" />
<ClInclude Include="..\lib\CoreCLR\core\hostfxr.h" />
<ClInclude Include="..\lib\CoreCLR\nethost\nethost.h" />
<ClInclude Include="bootconfig.h" />
<ClInclude Include="hooks.h" />
<ClInclude Include="logging.h" />
<ClInclude Include="unicode.h" />
<ClInclude Include="utils.h" />
<ClInclude Include="pch.h" />
<ClInclude Include="veh.h" />
<ClInclude Include="xivfixes.h" />
</ItemGroup>
<Target Name="RemoveExtraFiles" AfterTargets="PostBuildEvent">
<Delete Files="$(OutDir)$(TargetName).lib" />

View file

@ -8,14 +8,14 @@
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Project Files">
<UniqueIdentifier>{0c915688-91ea-431f-8b68-845cad422a50}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="dllmain.cpp">
<Filter>Dalamud.Boot DLL</Filter>
</ClCompile>
<ClCompile Include="pch.cpp">
<Filter>Dalamud.Boot DLL</Filter>
</ClCompile>
<ClCompile Include="veh.cpp">
<Filter>Dalamud.Boot DLL</Filter>
</ClCompile>
@ -28,6 +28,24 @@
<ClCompile Include="..\lib\CoreCLR\boot.cpp">
<Filter>CoreCLR</Filter>
</ClCompile>
<ClCompile Include="pch_nmd_assembly_impl.cpp">
<Filter>Project Files</Filter>
</ClCompile>
<ClCompile Include="pch.cpp">
<Filter>Project Files</Filter>
</ClCompile>
<ClCompile Include="utils.cpp">
<Filter>Dalamud.Boot DLL</Filter>
</ClCompile>
<ClCompile Include="xivfixes.cpp">
<Filter>Dalamud.Boot DLL</Filter>
</ClCompile>
<ClCompile Include="logging.cpp">
<Filter>Dalamud.Boot DLL</Filter>
</ClCompile>
<ClCompile Include="unicode.cpp">
<Filter>Dalamud.Boot DLL</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\lib\CoreCLR\CoreCLR.h">
@ -49,6 +67,24 @@
<Filter>Dalamud.Boot DLL</Filter>
</ClInclude>
<ClInclude Include="pch.h">
<Filter>Project Files</Filter>
</ClInclude>
<ClInclude Include="utils.h">
<Filter>Dalamud.Boot DLL</Filter>
</ClInclude>
<ClInclude Include="hooks.h">
<Filter>Dalamud.Boot DLL</Filter>
</ClInclude>
<ClInclude Include="bootconfig.h">
<Filter>Dalamud.Boot DLL</Filter>
</ClInclude>
<ClInclude Include="xivfixes.h">
<Filter>Dalamud.Boot DLL</Filter>
</ClInclude>
<ClInclude Include="logging.h">
<Filter>Dalamud.Boot DLL</Filter>
</ClInclude>
<ClInclude Include="unicode.h">
<Filter>Dalamud.Boot DLL</Filter>
</ClInclude>
</ItemGroup>

25
Dalamud.Boot/bootconfig.h Normal file
View file

@ -0,0 +1,25 @@
#pragma once
#include "utils.h"
namespace bootconfig {
inline bool is_wait_messagebox() {
return utils::get_env<bool>(L"DALAMUD_WAIT_MESSAGEBOX");
}
inline bool is_show_console() {
return utils::get_env<bool>(L"DALAMUD_SHOW_CONSOLE");
}
inline bool is_wait_debugger() {
return utils::get_env<bool>(L"DALAMUD_WAIT_DEBUGGER");
}
inline bool is_veh_enabled() {
return utils::get_env<bool>(L"DALAMUD_IS_VEH");
}
inline bool is_veh_full() {
return utils::get_env<bool>("DALAMUD_IS_VEH_FULL");
}
}

View file

@ -1,86 +1,40 @@
#include "pch.h"
#include "bootconfig.h"
#include "logging.h"
#include "veh.h"
#include "xivfixes.h"
HMODULE g_hModule;
HINSTANCE g_hGameInstance = GetModuleHandleW(nullptr);
bool check_env_var(std::string name)
{
size_t required_size;
getenv_s(&required_size, nullptr, 0, name.c_str());
if (required_size > 0)
{
if (char* is_no_veh = static_cast<char*>(malloc(required_size * sizeof(char))))
{
getenv_s(&required_size, is_no_veh, required_size, name.c_str());
auto result = _stricmp(is_no_veh, "true");
free(is_no_veh);
if (result == 0)
return true;
}
}
return false;
}
bool is_running_on_linux()
{
size_t required_size;
getenv_s(&required_size, nullptr, 0, "XL_WINEONLINUX");
if (required_size > 0)
{
if (char* is_wine_on_linux = static_cast<char*>(malloc(required_size * sizeof(char))))
{
getenv_s(&required_size, is_wine_on_linux, required_size, "XL_WINEONLINUX");
auto result = _stricmp(is_wine_on_linux, "true");
free(is_wine_on_linux);
if (result == 0)
return true;
}
}
HMODULE hntdll = GetModuleHandleW(L"ntdll.dll");
if (!hntdll) // not running on NT
return true;
FARPROC pwine_get_version = GetProcAddress(hntdll, "wine_get_version");
FARPROC pwine_get_host_version = GetProcAddress(hntdll, "wine_get_host_version");
return pwine_get_version != nullptr || pwine_get_host_version != nullptr;
}
bool is_veh_enabled()
{
return check_env_var("DALAMUD_IS_VEH");
}
bool is_full_dumps()
{
return check_env_var("DALAMUD_IS_VEH_FULL");
}
DllExport DWORD WINAPI Initialize(LPVOID lpParam, HANDLE hMainThreadContinue)
{
#ifndef NDEBUG
DllExport DWORD WINAPI Initialize(LPVOID lpParam, HANDLE hMainThreadContinue) {
if (bootconfig::is_show_console())
ConsoleSetup(L"Dalamud Boot");
#endif
printf("Dalamud.Boot Injectable, (c) 2021 XIVLauncher Contributors\nBuilt at: %s@%s\n\n", __DATE__, __TIME__);
if (bootconfig::is_wait_messagebox())
MessageBoxW(nullptr, L"Press OK to continue", L"Dalamud Boot", MB_OK);
if (check_env_var("DALAMUD_WAIT_DEBUGGER"))
{
printf("Waiting for debugger to attach...\n");
try {
xivfixes::apply_all(true);
} catch (const std::exception& e) {
logging::print<logging::W>("Failed to do general fixups. Some things might not work.");
logging::print<logging::W>("Error: {}", e.what());
}
logging::print<logging::I>("Dalamud.Boot Injectable, (c) 2021 XIVLauncher Contributors");
logging::print<logging::I>("Built at : " __DATE__ "@" __TIME__);
if (bootconfig::is_wait_debugger()) {
logging::print<logging::I>("Waiting for debugger to attach...");
while (!IsDebuggerPresent())
Sleep(100);
printf("Debugger attached.\n");
logging::print<logging::I>("Debugger attached.");
}
wchar_t _module_path[MAX_PATH];
GetModuleFileNameW(g_hModule, _module_path, sizeof _module_path / 2);
std::filesystem::path fs_module_path(_module_path);
std::wstring runtimeconfig_path = _wcsdup(fs_module_path.replace_filename(L"Dalamud.runtimeconfig.json").c_str());
std::wstring module_path = _wcsdup(fs_module_path.replace_filename(L"Dalamud.dll").c_str());
const auto fs_module_path = utils::get_module_path(g_hModule);
const auto runtimeconfig_path = std::filesystem::path(fs_module_path).replace_filename(L"Dalamud.runtimeconfig.json").wstring();
const auto module_path = std::filesystem::path(fs_module_path).replace_filename(L"Dalamud.dll").wstring();
// ============================== CLR ========================================= //
@ -97,39 +51,28 @@ DllExport DWORD WINAPI Initialize(LPVOID lpParam, HANDLE hMainThreadContinue)
if (result != 0)
return result;
typedef void (CORECLR_DELEGATE_CALLTYPE* custom_component_entry_point_fn)(LPVOID, HANDLE);
custom_component_entry_point_fn entrypoint_fn = reinterpret_cast<custom_component_entry_point_fn>(entrypoint_vfn);
using custom_component_entry_point_fn = void (CORECLR_DELEGATE_CALLTYPE*)(LPVOID, HANDLE);
const auto entrypoint_fn = reinterpret_cast<custom_component_entry_point_fn>(entrypoint_vfn);
// ============================== VEH ======================================== //
printf("Initializing VEH... ");
if(is_running_on_linux())
{
printf("VEH was disabled, running on linux\n");
}
else if (is_veh_enabled())
{
if (veh::add_handler(is_full_dumps()))
printf("Done!\n");
else printf("Failed!\n");
}
logging::print<logging::I>("Initializing VEH...");
if (utils::is_running_on_linux()) {
logging::print<logging::I>("=> VEH was disabled, running on linux");
} else if (bootconfig::is_veh_enabled()) {
if (veh::add_handler(bootconfig::is_veh_full()))
logging::print<logging::I>("=> Done!");
else
{
printf("VEH was disabled manually\n");
logging::print<logging::I>("=> Failed!");
} else {
logging::print<logging::I>("VEH was disabled manually");
}
// ============================== Dalamud ==================================== //
printf("Initializing Dalamud... ");
logging::print<logging::I>("Initializing Dalamud...");
entrypoint_fn(lpParam, hMainThreadContinue);
printf("Done!\n");
#ifndef NDEBUG
fclose(stdin);
fclose(stdout);
fclose(stderr);
FreeConsole();
#endif
logging::print<logging::I>("Done!");
return 0;
}
@ -137,12 +80,12 @@ DllExport DWORD WINAPI Initialize(LPVOID lpParam, HANDLE hMainThreadContinue)
BOOL APIENTRY DllMain(const HMODULE hModule, const DWORD dwReason, LPVOID lpReserved) {
DisableThreadLibraryCalls(hModule);
switch (dwReason)
{
switch (dwReason) {
case DLL_PROCESS_ATTACH:
g_hModule = hModule;
break;
case DLL_PROCESS_DETACH:
xivfixes::apply_all(false);
veh::remove_handler();
break;
}

136
Dalamud.Boot/hooks.h Normal file
View file

@ -0,0 +1,136 @@
#pragma once
#include <limits>
#include "utils.h"
namespace hooks {
template<typename>
class base_hook;
template<typename TReturn, typename ... TArgs>
class base_hook<TReturn(TArgs...)> {
using TFn = TReturn(TArgs...);
private:
TFn* const m_pfnOriginal;
utils::thunk<TReturn(TArgs...)> m_thunk;
public:
base_hook(TFn* pfnOriginal)
: m_pfnOriginal(pfnOriginal)
, m_thunk(m_pfnOriginal) {
}
virtual ~base_hook() = default;
virtual void set_detour(std::function<TFn> fn) {
if (!fn)
m_thunk.set_target(m_pfnOriginal);
else
m_thunk.set_target(std::move(fn));
}
virtual TReturn call_original(TArgs... args) {
return m_pfnOriginal(std::forward<TArgs>(args)...);
}
protected:
TFn* get_original() const {
return m_pfnOriginal;
}
TFn* get_thunk() const {
return m_thunk.get_thunk();
}
};
template<typename TFn>
class import_hook : public base_hook<TFn> {
using Base = base_hook<TFn>;
TFn** const m_ppfnImportTableItem;
public:
import_hook(TFn** ppfnImportTableItem)
: Base(*ppfnImportTableItem)
, m_ppfnImportTableItem(ppfnImportTableItem) {
const utils::memory_tenderizer tenderizer(ppfnImportTableItem, sizeof * ppfnImportTableItem, PAGE_READWRITE);
*ppfnImportTableItem = Base::get_thunk();
}
import_hook(const char* pcszDllName, const char* pcszFunctionName, int hintOrOrdinal)
: import_hook(utils::get_imported_function_pointer<TFn>(GetModuleHandleW(nullptr), pcszDllName, pcszFunctionName, hintOrOrdinal)) {
}
~import_hook() override {
const utils::memory_tenderizer tenderizer(m_ppfnImportTableItem, sizeof * m_ppfnImportTableItem, PAGE_READWRITE);
*m_ppfnImportTableItem = Base::get_original();
}
};
template<typename TFn>
class export_hook : public base_hook<TFn> {
using Base = base_hook<TFn>;
static constexpr uint8_t DetouringThunkTemplate[12]{
0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // movabs rax, 0x0000000000000000
0xFF, 0xE0, // jmp rax
};
TFn* const m_pfnExportThunk;
uint8_t s_originalThunk[sizeof DetouringThunkTemplate]{};
public:
export_hook(TFn* pfnExportThunk)
: Base(reinterpret_cast<TFn*>(utils::resolve_unconditional_jump_target(pfnExportThunk)))
, m_pfnExportThunk(pfnExportThunk) {
auto pExportThunk = reinterpret_cast<uint8_t*>(pfnExportThunk);
// Make it writeable.
const utils::memory_tenderizer tenderizer(pfnExportThunk, sizeof DetouringThunkTemplate, PAGE_EXECUTE_READWRITE);
// Back up original thunk bytes.
memcpy(s_originalThunk, pExportThunk, sizeof s_originalThunk);
// Write thunk template.
memcpy(pExportThunk, DetouringThunkTemplate, sizeof DetouringThunkTemplate);
// Write target address.
*reinterpret_cast<TFn**>(&pExportThunk[2]) = Base::get_thunk();
}
~export_hook() override {
const utils::memory_tenderizer tenderizer(m_pfnExportThunk, sizeof DetouringThunkTemplate, PAGE_EXECUTE_READWRITE);
// Restore original thunk bytes.
memcpy(m_pfnExportThunk, s_originalThunk, sizeof s_originalThunk);
// Clear state.
memset(s_originalThunk, 0, sizeof s_originalThunk);
}
};
class wndproc_hook : public base_hook<std::remove_pointer_t<WNDPROC>> {
using Base = base_hook<std::remove_pointer_t<WNDPROC>>;
const HWND s_hwnd;
public:
wndproc_hook(HWND hwnd)
: Base(reinterpret_cast<WNDPROC>(GetWindowLongPtrW(hwnd, GWLP_WNDPROC)))
, s_hwnd(hwnd) {
SetWindowLongPtrW(hwnd, GWLP_WNDPROC, reinterpret_cast<LONG_PTR>(Base::get_thunk()));
}
~wndproc_hook() override {
SetWindowLongPtrW(s_hwnd, GWLP_WNDPROC, reinterpret_cast<LONG_PTR>(Base::get_original()));
}
LRESULT call_original(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) override {
return CallWindowProcW(Base::get_original(), hwnd, msg, wParam, lParam);
}
};
}

35
Dalamud.Boot/logging.cpp Normal file
View file

@ -0,0 +1,35 @@
#include "pch.h"
#include "logging.h"
void logging::print(Level level, const char* s) {
SYSTEMTIME st;
GetLocalTime(&st);
std::string estr;
switch (level) {
case Verbose:
estr = std::format("[{:02}:{:02}:{:02} CPP/VRB] {}\n", st.wHour, st.wMinute, st.wSecond, s);
break;
case Debug:
estr = std::format("[{:02}:{:02}:{:02} CPP/DBG] {}\n", st.wHour, st.wMinute, st.wSecond, s);
break;
case Info:
estr = std::format("[{:02}:{:02}:{:02} CPP/INF] {}\n", st.wHour, st.wMinute, st.wSecond, s);
break;
case Warning:
estr = std::format("[{:02}:{:02}:{:02} CPP/WRN] {}\n", st.wHour, st.wMinute, st.wSecond, s);
break;
case Error:
estr = std::format("[{:02}:{:02}:{:02} CPP/ERR] {}\n", st.wHour, st.wMinute, st.wSecond, s);
break;
case Fatal:
estr = std::format("[{:02}:{:02}:{:02} CPP/FTL] {}\n", st.wHour, st.wMinute, st.wSecond, s);
break;
default:
estr = std::format("[{:02}:{:02}:{:02} CPP/???] {}\n", st.wHour, st.wMinute, st.wSecond, s);
break;
}
DWORD wr;
WriteFile(GetStdHandle(STD_ERROR_HANDLE), &estr[0], static_cast<DWORD>(estr.size()), &wr, nullptr);
}

59
Dalamud.Boot/logging.h Normal file
View file

@ -0,0 +1,59 @@
#pragma once
#include <format>
#include <numeric>
#include <string>
#include "unicode.h"
namespace logging {
enum Level : int {
Verbose = 0,
V = 0,
Debug = 1,
D = 1,
Info = 2,
I = 2,
Warning = 3,
W = 3,
Error = 4,
E = 4,
Fatal = 5,
F = 5,
};
void print(Level level, const char* s);
inline void print(Level level, const wchar_t* s) {
const auto cs = unicode::convert<std::string>(s);
print(level, cs.c_str());
}
inline void print(Level level, const std::string& s) {
print(level, s.c_str());
}
inline void print(Level level, const std::wstring& s) {
print(level, s.c_str());
}
template<Level level, typename T>
inline void print(const T* s) {
print(level, s);
}
template<typename Arg, typename...Args>
inline void print(Level level, const char* pcszFormat, Arg arg1, Args...args) {
print(level, std::format(pcszFormat, std::forward<Arg>(arg1), std::forward<Args>(args)...));
}
template<typename Arg, typename...Args>
inline void print(Level level, const wchar_t* pcszFormat, Arg arg1, Args...args) {
print(level, std::format(pcszFormat, std::forward<Arg>(arg1), std::forward<Args>(args)...));
}
template<Level level, typename T, typename Arg, typename...Args, typename = std::enable_if_t<std::is_integral_v<T>>>
inline void print(const T* pcszFormat, Arg arg1, Args...args) {
print(level, std::format(pcszFormat, std::forward<Arg>(arg1), std::forward<Args>(args)...));
}
};

View file

@ -9,6 +9,7 @@
// Exclude rarely-used stuff from Windows headers
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
// Windows Header Files
#include <windows.h>
@ -17,6 +18,10 @@
#include <Psapi.h>
#include <Shlobj.h>
#include <TlHelp32.h>
#include <Dbt.h>
// MSVC Compiler Intrinsic
#include <intrin.h>
// C++ Standard Libraries
#include <cassert>
@ -24,18 +29,32 @@
#include <filesystem>
#include <format>
#include <fstream>
#include <functional>
#include <iostream>
#include <ranges>
#include <span>
#include <string>
#include <mutex>
#include <type_traits>
// https://www.akenotsuki.com/misc/srell/en/
#include "../lib/srell3_009/single-header/srell.hpp"
// https://github.com/Nomade040/nmd
#include "../lib/Nomade040-nmd/nmd_assembly.h"
// https://github.com/dotnet/coreclr
#include "..\lib\CoreCLR\CoreCLR.h"
#include "..\lib\CoreCLR\boot.h"
#include "../lib/CoreCLR/CoreCLR.h"
#include "../lib/CoreCLR/boot.h"
#include "unicode.h"
// Commonly used macros
#define DllExport extern "C" __declspec(dllexport)
// Global variables
extern HMODULE g_hModule;
extern HINSTANCE g_hGameInstance;
extern std::optional<CoreCLR> g_clr;
#endif //PCH_H

View file

@ -0,0 +1,2 @@
#define NMD_ASSEMBLY_IMPLEMENTATION
#include "../lib/Nomade040-nmd/nmd_assembly.h"

View file

@ -1,5 +1,7 @@
#include "pch.h"
#include "logging.h"
DllExport DWORD WINAPI Initialize(LPVOID lpParam, HANDLE hMainThreadContinue);
struct RewrittenEntryPointParameters {
@ -231,8 +233,8 @@ void* get_mapped_image_base_address(HANDLE hProcess, const std::filesystem::path
return mbi.AllocationBase;
} catch (const std::filesystem::filesystem_error& e) {
printf("%s", e.what());
} catch (const std::exception& e) {
logging::print<logging::W>("Failed to check memory block 0x{:X}(len=0x{:X}): {}", mbi.BaseAddress, mbi.RegionSize, e.what());
continue;
}
}

235
Dalamud.Boot/unicode.cpp Normal file
View file

@ -0,0 +1,235 @@
#include "pch.h"
#include "unicode.h"
size_t unicode::decode(EncodingTag<char8_t>, char32_t& out, const char8_t* in, size_t nRemainingBytes, bool strict) {
if (nRemainingBytes == 0) {
out = 0;
return 0;
}
if (0 == (*in & 0x80)) {
out = *in;
return 1;
}
if (0xC0 == (*in & 0xE0)) {
if (nRemainingBytes < 2) goto invalid;
if (0x80 != (in[1] & 0xC0)) goto invalid;
out = (
((static_cast<char32_t>(in[0]) & 0x1F) << 6) |
((static_cast<char32_t>(in[1]) & 0x3F) << 0));
return 2;
}
if (0xE0 == (*in & 0xF0)) {
if (nRemainingBytes < 3) goto invalid;
if (0x80 != (in[1] & 0xC0)) goto invalid;
if (0x80 != (in[2] & 0xC0)) goto invalid;
out = static_cast<char32_t>(
((static_cast<char32_t>(in[0]) & 0x0F) << 12) |
((static_cast<char32_t>(in[1]) & 0x3F) << 6) |
((static_cast<char32_t>(in[2]) & 0x3F) << 0));
return 3;
}
if (0xF0 == (*in & 0xF8)) {
if (nRemainingBytes < 4) goto invalid;
if (0x80 != (in[1] & 0xC0)) goto invalid;
if (0x80 != (in[2] & 0xC0)) goto invalid;
if (0x80 != (in[3] & 0xC0)) goto invalid;
out = (
((static_cast<char32_t>(in[0]) & 0x07) << 18) |
((static_cast<char32_t>(in[1]) & 0x3F) << 12) |
((static_cast<char32_t>(in[2]) & 0x3F) << 6) |
((static_cast<char32_t>(in[3]) & 0x3F) << 0));
return 4;
}
if (!strict) {
if (0xF8 == (*in & 0xFC)) {
if (nRemainingBytes < 5) goto invalid;
if (0x80 != (in[1] & 0xC0)) goto invalid;
if (0x80 != (in[2] & 0xC0)) goto invalid;
if (0x80 != (in[3] & 0xC0)) goto invalid;
if (0x80 != (in[4] & 0xC0)) goto invalid;
out = (
((static_cast<char32_t>(in[0]) & 0x07) << 24) |
((static_cast<char32_t>(in[1]) & 0x3F) << 18) |
((static_cast<char32_t>(in[2]) & 0x3F) << 12) |
((static_cast<char32_t>(in[3]) & 0x3F) << 6) |
((static_cast<char32_t>(in[4]) & 0x3F) << 0));
return 4;
}
if (0xFC == (*in & 0xFE)) {
if (nRemainingBytes < 6) goto invalid;
if (0x80 != (in[1] & 0xC0)) goto invalid;
if (0x80 != (in[2] & 0xC0)) goto invalid;
if (0x80 != (in[3] & 0xC0)) goto invalid;
if (0x80 != (in[4] & 0xC0)) goto invalid;
if (0x80 != (in[5] & 0xC0)) goto invalid;
out = (
((static_cast<char32_t>(in[0]) & 0x07) << 30) |
((static_cast<char32_t>(in[1]) & 0x3F) << 24) |
((static_cast<char32_t>(in[2]) & 0x3F) << 18) |
((static_cast<char32_t>(in[3]) & 0x3F) << 12) |
((static_cast<char32_t>(in[4]) & 0x3F) << 6) |
((static_cast<char32_t>(in[5]) & 0x3F) << 0));
return 5;
}
}
invalid:
out = UReplacement;
return 1;
}
size_t unicode::decode(EncodingTag<char16_t>, char32_t& out, const char16_t* in, size_t nRemainingBytes, bool strict) {
if (nRemainingBytes == 0) {
out = 0;
return 0;
}
if ((*in & 0xFC00) == 0xD800) {
if (nRemainingBytes < 2 || (in[1] & 0xFC00) != 0xDC00)
goto invalid;
out = 0x10000 + (
((static_cast<char32_t>(in[0]) & 0x03FF) << 10) |
((static_cast<char32_t>(in[1]) & 0x03FF) << 0)
);
return 2;
}
if (0xD800 <= *in && *in <= 0xDFFF && strict)
out = UReplacement;
else
out = *in;
return 1;
invalid:
out = UReplacement;
return 1;
}
size_t unicode::decode(EncodingTag<char32_t>, char32_t& out, const char32_t* in, size_t nRemainingBytes, bool strict) {
if (nRemainingBytes == 0) {
out = 0;
return 0;
}
out = *in;
return 1;
}
size_t unicode::decode(EncodingTag<char>, char32_t& out, const char* in, size_t nRemainingBytes, bool strict) {
return decode(EncodingTag<char8_t>(), out, reinterpret_cast<const char8_t*>(in), nRemainingBytes, strict);
}
size_t unicode::decode(EncodingTag<wchar_t>, char32_t& out, const wchar_t* in, size_t nRemainingBytes, bool strict) {
return decode(EncodingTag<char16_t>(), out, reinterpret_cast<const char16_t*>(in), nRemainingBytes, strict);
}
size_t unicode::encode(EncodingTag<char8_t>, char8_t* ptr, char32_t c, bool strict) {
if (c < (1 << 7)) {
if (ptr)
*(ptr++) = static_cast<char8_t>(c);
return 1;
}
if (c < (1 << (5 + 6))) {
if (ptr) {
*(ptr++) = 0xC0 | static_cast<char8_t>(c >> 6);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 0) & 0x3F);
}
return 2;
}
if (c < (1 << (4 + 6 + 6))) {
if (ptr) {
*(ptr++) = 0xE0 | static_cast<char8_t>(c >> 12);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 6) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 0) & 0x3F);
}
return 3;
}
if (c < (1 << (3 + 6 + 6 + 6))) {
if (ptr) {
*(ptr++) = 0xF0 | static_cast<char8_t>(c >> 18);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 12) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 6) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 0) & 0x3F);
}
return 4;
}
if (strict) {
if (ptr) { // Replacement character U+FFFD
*(ptr++) = 0xEF;
*(ptr++) = 0xBF;
*(ptr++) = 0xBD;
}
return 3;
}
if (c < (1 << (3 + 6 + 6 + 6 + 6))) {
if (ptr) {
*(ptr++) = 0xF8 | static_cast<char8_t>(c >> 24);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 18) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 12) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 6) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 0) & 0x3F);
}
return 5;
}
if (ptr) {
*(ptr++) = 0xFC | static_cast<char8_t>(c >> 30);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 24) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 18) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 12) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 6) & 0x3F);
*(ptr++) = 0x80 | static_cast<char8_t>((c >> 0) & 0x3F);
}
return 6;
}
size_t unicode::encode(EncodingTag<char16_t>, char16_t* ptr, char32_t c, bool strict) {
if (c < 0x10000) {
if (ptr) {
if (0xD800 <= c && c <= 0xDFFF && strict)
*(ptr++) = 0xFFFD;
else
*(ptr++) = static_cast<char16_t>(c);
}
return 1;
}
c -= 0x10000;
if (c < (1 << 20)) {
if (ptr) {
*(ptr++) = 0xD800 | static_cast<char16_t>((c >> 10) & 0x3FF);
*(ptr++) = 0xDC00 | static_cast<char16_t>((c >> 0) & 0x3FF);
}
return 2;
}
if (ptr)
*(ptr++) = 0xFFFD;
return 1;
}
size_t unicode::encode(EncodingTag<char32_t>, char32_t* ptr, char32_t c, bool strict) {
if (ptr)
*ptr = c;
return 1;
}
size_t unicode::encode(EncodingTag<char>, char* ptr, char32_t c, bool strict) {
return encode(EncodingTag<char8_t>(), reinterpret_cast<char8_t*>(ptr), c, strict);
}
size_t unicode::encode(EncodingTag<wchar_t>, wchar_t* ptr, char32_t c, bool strict) {
return encode(EncodingTag<char16_t>(), reinterpret_cast<char16_t*>(ptr), c, strict);
}

92
Dalamud.Boot/unicode.h Normal file
View file

@ -0,0 +1,92 @@
#pragma once
#include <array>
#include <cstdint>
#include <string>
#include <type_traits>
namespace unicode {
constexpr char32_t UReplacement = U'\uFFFD';
constexpr char32_t UInvalid = U'\uFFFF';
template<typename T> struct EncodingTag {};
size_t decode(EncodingTag<char8_t>, char32_t& out, const char8_t* in, size_t nRemainingBytes, bool strict);
size_t decode(EncodingTag<char16_t>, char32_t& out, const char16_t* in, size_t nRemainingBytes, bool strict);
size_t decode(EncodingTag<char32_t>, char32_t& out, const char32_t* in, size_t nRemainingBytes, bool strict);
size_t decode(EncodingTag<char>, char32_t& out, const char* in, size_t nRemainingBytes, bool strict);
size_t decode(EncodingTag<wchar_t>, char32_t& out, const wchar_t* in, size_t nRemainingBytes, bool strict);
template<typename T>
inline size_t decode(char32_t& out, const T* in, size_t nRemainingBytes, bool strict = true) {
return decode(EncodingTag<T>(), out, in, nRemainingBytes, strict);
}
size_t encode(EncodingTag<char8_t>, char8_t* ptr, char32_t c, bool strict);
size_t encode(EncodingTag<char16_t>, char16_t* ptr, char32_t c, bool strict);
size_t encode(EncodingTag<char32_t>, char32_t* ptr, char32_t c, bool strict);
size_t encode(EncodingTag<char>, char* ptr, char32_t c, bool strict);
size_t encode(EncodingTag<wchar_t>, wchar_t* ptr, char32_t c, bool strict);
template<typename T>
inline size_t encode(T* ptr, char32_t c, bool strict = true) {
return encode(EncodingTag<T>(), ptr, c, strict);
}
template<class TTo, class TFromElem, class TFromTraits = std::char_traits<TFromElem>>
inline TTo& convert(TTo& out, const std::basic_string_view<TFromElem, TFromTraits>& in, bool strict = true) {
out.reserve(out.size() + in.size() * 4 / sizeof(in[0]) / sizeof(out[0]));
char32_t c{};
for (size_t decLen = 0, decIdx = 0; decIdx < in.size() && (decLen = unicode::decode(c, &in[decIdx], in.size() - decIdx, strict)); decIdx += decLen) {
const auto encIdx = out.size();
const auto encLen = unicode::encode<TTo::value_type>(nullptr, c, strict);
out.resize(encIdx + encLen);
unicode::encode(&out[encIdx], c, strict);
}
return out;
}
template<class TTo, class TFromElem, class TFromTraits = std::char_traits<TFromElem>, class TFromAlloc = std::allocator<TFromElem>>
inline TTo& convert(TTo& out, const std::basic_string<TFromElem, TFromTraits, TFromAlloc>& in, bool strict = true) {
return convert(out, std::basic_string_view<TFromElem, TFromTraits>(in), strict);
}
template<class TTo, class TFromElem, typename = std::enable_if_t<std::is_integral_v<TFromElem>>>
inline TTo& convert(TTo& out, const TFromElem* in, size_t length = (std::numeric_limits<size_t>::max)(), bool strict = true) {
if (length == (std::numeric_limits<size_t>::max)())
length = std::char_traits<TFromElem>::length(in);
return convert(out, std::basic_string_view<TFromElem>(in, length), strict);
}
template<class TTo, class TFromElem, class TFromTraits = std::char_traits<TFromElem>>
inline TTo convert(const std::basic_string_view<TFromElem, TFromTraits>& in, bool strict = true) {
TTo out{};
return convert(out, in, strict);
}
template<class TTo, class TFromElem, class TFromTraits = std::char_traits<TFromElem>, class TFromAlloc = std::allocator<TFromElem>>
inline TTo convert(const std::basic_string<TFromElem, TFromTraits, TFromAlloc>& in, bool strict = true) {
TTo out{};
return convert(out, std::basic_string_view<TFromElem, TFromTraits>(in), strict);
}
template<class TTo, class TFromElem, typename = std::enable_if_t<std::is_integral_v<TFromElem>>>
inline TTo convert(const TFromElem* in, size_t length = (std::numeric_limits<size_t>::max)(), bool strict = true) {
if (length == (std::numeric_limits<size_t>::max)())
length = std::char_traits<TFromElem>::length(in);
TTo out{};
return convert(out, std::basic_string_view<TFromElem>(in, length), strict);
}
}

434
Dalamud.Boot/utils.cpp Normal file
View file

@ -0,0 +1,434 @@
#include "pch.h"
#include "utils.h"
utils::signature_finder& utils::signature_finder::look_in(const void* pFirst, size_t length) {
if (length)
m_ranges.emplace_back(std::span(reinterpret_cast<const char*>(pFirst), length));
return *this;
}
utils::signature_finder& utils::signature_finder::look_in(const void* pFirst, const void* pLast) {
return look_in(pFirst, reinterpret_cast<const char*>(pLast) - reinterpret_cast<const char*>(pFirst));
}
utils::signature_finder& utils::signature_finder::look_in(HMODULE hModule, const char* sectionName) {
const auto pcBaseAddress = reinterpret_cast<char*>(hModule);
const auto& dosHeader = *reinterpret_cast<const IMAGE_DOS_HEADER*>(&pcBaseAddress[0]);
const auto& ntHeader32 = *reinterpret_cast<const IMAGE_NT_HEADERS32*>(&pcBaseAddress[dosHeader.e_lfanew]);
// Since this does not refer to OptionalHeader32/64 else than its offset, we can use either.
const auto sections = std::span(IMAGE_FIRST_SECTION(&ntHeader32), ntHeader32.FileHeader.NumberOfSections);
for (const auto& section : sections) {
if (strncmp(reinterpret_cast<const char*>(section.Name), sectionName, IMAGE_SIZEOF_SHORT_NAME) == 0)
look_in(pcBaseAddress + section.VirtualAddress, section.Misc.VirtualSize);
}
return *this;
}
utils::signature_finder& utils::signature_finder::look_for(std::string_view pattern, std::string_view mask, char cExactMatch, char cWildcard) {
if (pattern.size() != mask.size())
throw std::runtime_error("Length of pattern does not match the length of mask.");
std::string buf;
buf.reserve(pattern.size() * 4);
for (size_t i = 0; i < pattern.size(); i++) {
const auto c = pattern[i];
if (mask[i] == cWildcard) {
buf.push_back('.');
} else if (mask[i] == cExactMatch) {
buf.push_back('\\');
buf.push_back('x');
buf.push_back((c >> 4) < 10 ? (c >> 4) - 10 : 'A' + (c >> 4) - 10);
buf.push_back((c & 15) < 10 ? (c & 15) - 10 : 'A' + (c & 15) - 10);
}
}
m_patterns.emplace_back(buf);
return *this;
}
utils::signature_finder& utils::signature_finder::look_for(std::string_view pattern, char wildcardMask) {
std::string buf;
buf.reserve(pattern.size() * 4);
for (const auto& c : pattern) {
if (c == wildcardMask) {
buf.push_back('.');
} else {
buf.push_back('\\');
buf.push_back('x');
buf.push_back((c >> 4) < 10 ? '0' + (c >> 4) : 'A' + (c >> 4) - 10);
buf.push_back((c & 15) < 10 ? '0' + (c & 15) : 'A' + (c & 15) - 10);
}
}
m_patterns.emplace_back(buf);
return *this;
}
utils::signature_finder& utils::signature_finder::look_for(std::string_view pattern) {
std::string buf;
buf.reserve(pattern.size() * 4);
for (const auto& c : pattern) {
buf.push_back('\\');
buf.push_back('x');
buf.push_back((c >> 4) < 10 ? '0' + (c >> 4) : 'A' + (c >> 4) - 10);
buf.push_back((c & 15) < 10 ? '0' + (c & 15) : 'A' + (c & 15) - 10);
}
m_patterns.emplace_back(buf);
return *this;
}
utils::signature_finder& utils::signature_finder::look_for_hex(std::string_view pattern) {
std::string buf;
buf.reserve(pattern.size());
bool bHighByte = true;
for (size_t i = 0; i < pattern.size(); i++) {
int n = -1;
if ('0' <= pattern[i] && pattern[i] <= '9')
n = pattern[i] - '0';
else if ('a' <= pattern[i] && pattern[i] <= 'f')
n = 10 + pattern[i] - 'A';
else if ('A' <= pattern[i] && pattern[i] <= 'F')
n = 10 + pattern[i] - 'A';
else if (pattern[i] == '?' && i + 1 < pattern.size() && pattern[i + 1] == '?') {
i++;
n = -2;
} else if (pattern[i] == '?')
n = -2;
if (n == -1)
continue;
else if (n == -2) {
if (!bHighByte) {
buf.insert(buf.begin() + buf.size() - 1, '0');
bHighByte = true;
}
buf.push_back('.');
continue;
}
if (bHighByte) {
buf.push_back('\\');
buf.push_back('x');
}
buf.push_back(pattern[i]);
bHighByte = !bHighByte;
}
m_patterns.emplace_back(buf);
return *this;
}
std::vector<utils::signature_finder::result> utils::signature_finder::find(size_t minCount, size_t maxCount, bool bErrorOnMoreThanMaximum) const {
std::vector<result> res;
for (const auto& rangeSpan : m_ranges) {
for (size_t patternIndex = 0; patternIndex < m_patterns.size(); patternIndex++) {
srell::match_results<std::span<const char>::iterator> matches;
auto ptr = rangeSpan.begin();
for (size_t matchIndex = 0;; ptr = matches[0].first + 1, matchIndex++) {
if (!m_patterns[patternIndex].search(ptr, rangeSpan.end(), rangeSpan.begin(), matches, srell::regex_constants::match_flag_type::match_default))
break;
for (size_t captureIndex = 0; captureIndex < matches.size(); captureIndex++) {
const auto& capture = matches[captureIndex];
res.emplace_back(
std::span(capture.first, capture.second),
patternIndex,
matchIndex,
captureIndex);
if (bErrorOnMoreThanMaximum) {
if (res.size() > maxCount)
throw std::runtime_error(std::format("Found {} result(s), wanted at most {} results", res.size(), maxCount));
} else if (res.size() == maxCount)
return res;
}
}
}
}
if (res.size() < minCount)
throw std::runtime_error(std::format("Found {} result(s), wanted at least {} results", res.size(), minCount));
return res;
}
std::span<const char> utils::signature_finder::find_one() const {
return find(1, 1, false).front().Match;
}
utils::memory_tenderizer::memory_tenderizer(const void* pAddress, size_t length, DWORD dwNewProtect) : m_data(reinterpret_cast<char*>(const_cast<void*>(pAddress)), length) {
try {
for (auto pCoveredAddress = &m_data[0];
pCoveredAddress < &m_data[0] + m_data.size();
pCoveredAddress = reinterpret_cast<char*>(m_regions.back().BaseAddress) + m_regions.back().RegionSize) {
MEMORY_BASIC_INFORMATION region{};
if (!VirtualQuery(pCoveredAddress, &region, sizeof region)) {
throw std::runtime_error(std::format(
"VirtualQuery(addr=0x{:X}, ..., cb={}) failed with Win32 code 0x{:X}",
reinterpret_cast<size_t>(pCoveredAddress),
sizeof region,
GetLastError()));
}
if (!VirtualProtect(region.BaseAddress, region.RegionSize, dwNewProtect, &region.Protect)) {
throw std::runtime_error(std::format(
"(Change)VirtualProtect(addr=0x{:X}, size=0x{:X}, ..., ...) failed with Win32 code 0x{:X}",
reinterpret_cast<size_t>(region.BaseAddress),
region.RegionSize,
GetLastError()));
}
m_regions.emplace_back(region);
}
} catch (...) {
for (auto& region : std::ranges::reverse_view(m_regions)) {
if (!VirtualProtect(region.BaseAddress, region.RegionSize, region.Protect, &region.Protect)) {
// Could not restore; fast fail
__fastfail(GetLastError());
}
}
throw;
}
}
utils::memory_tenderizer::~memory_tenderizer() {
for (auto& region : std::ranges::reverse_view(m_regions)) {
if (!VirtualProtect(region.BaseAddress, region.RegionSize, region.Protect, &region.Protect)) {
// Could not restore; fast fail
__fastfail(GetLastError());
}
}
}
std::shared_ptr<void> utils::allocate_executable_heap(size_t len) {
static std::weak_ptr<void> s_hHeap;
std::shared_ptr<void> hHeap;
if (hHeap = s_hHeap.lock(); !hHeap) {
static std::mutex m_mtx;
const auto lock = std::lock_guard(m_mtx);
if (hHeap = s_hHeap.lock(); !hHeap) {
if (const auto hHeapRaw = HeapCreate(HEAP_CREATE_ENABLE_EXECUTE, 0, 0); hHeapRaw)
s_hHeap = hHeap = std::shared_ptr<void>(hHeapRaw, HeapDestroy);
else
throw std::runtime_error("Failed to create heap.");
}
}
const auto pAllocRaw = HeapAlloc(hHeap.get(), 0, len);
if (!pAllocRaw)
throw std::runtime_error("Failed to allocate memory.");
return {
pAllocRaw,
[hHeap = std::move(hHeap)](void* pAddress) { HeapFree(hHeap.get(), 0, pAddress); },
};
}
void* utils::resolve_unconditional_jump_target(void* pfn) {
const auto bytes = reinterpret_cast<uint8_t*>(pfn);
// JMP QWORD PTR [RIP + int32]
// 48 FF 25 ?? ?? ?? ??
if (bytes[0] == 0x48 && bytes[1] == 0xFF && bytes[2] == 0x25)
return *reinterpret_cast<void**>(&bytes[7 + *reinterpret_cast<int*>(&bytes[3])]);
throw std::runtime_error("Unexpected thunk bytes.");
}
template<typename TEntryType>
static bool find_imported_function_pointer_helper(const char* pcBaseAddress, const IMAGE_IMPORT_DESCRIPTOR& desc, const IMAGE_DATA_DIRECTORY& dir, std::string_view reqFunc, uint32_t hintOrOrdinal, void*& ppFunctionAddress) {
const auto importLookupsOversizedSpan = std::span(reinterpret_cast<const TEntryType*>(&pcBaseAddress[desc.OriginalFirstThunk]), (dir.Size - desc.OriginalFirstThunk) / sizeof TEntryType);
const auto importAddressesOversizedSpan = std::span(reinterpret_cast<const TEntryType*>(&pcBaseAddress[desc.FirstThunk]), (dir.Size - desc.FirstThunk) / sizeof TEntryType);
for (size_t i = 0, i_ = (std::min)(importLookupsOversizedSpan.size(), importAddressesOversizedSpan.size()); i < i_ && importLookupsOversizedSpan[i] && importAddressesOversizedSpan[i]; i++) {
const auto& importLookup = importLookupsOversizedSpan[i];
const auto& importAddress = importAddressesOversizedSpan[i];
const auto& importByName = *reinterpret_cast<const IMAGE_IMPORT_BY_NAME*>(&pcBaseAddress[importLookup]);
// Is this entry importing by ordinals? A lot of socket functions are the case.
if (IMAGE_SNAP_BY_ORDINAL32(importLookup)) {
// Is this the entry?
if (!hintOrOrdinal || IMAGE_ORDINAL32(importLookup) != hintOrOrdinal)
continue;
// Is this entry not importing by ordinals, and are we using hint exclusively to find the entry?
} else if (reqFunc.empty()) {
// Is this the entry?
if (importByName.Hint != hintOrOrdinal)
continue;
} else {
// Name must be contained in this directory.
auto currFunc = std::string_view(importByName.Name, (std::min<size_t>)(&pcBaseAddress[dir.Size] - importByName.Name, reqFunc.size()));
currFunc = currFunc.substr(0, strnlen(currFunc.data(), currFunc.size()));
// Is this the entry? (Case sensitive)
if (reqFunc != currFunc)
continue;
}
// Found the entry; return the address of the pointer to the target function.
ppFunctionAddress = const_cast<void*>(reinterpret_cast<const void*>(&importAddress));
return true;
}
return false;
}
bool utils::find_imported_function_pointer(HMODULE hModule, const char* pcszDllName, const char* pcszFunctionName, uint32_t hintOrOrdinal, void*& ppFunctionAddress) {
const auto requestedDllName = std::string_view(pcszDllName, strlen(pcszDllName));
const auto requestedFunctionName = pcszFunctionName ? std::string_view(pcszFunctionName, strlen(pcszFunctionName)) : std::string_view();
ppFunctionAddress = nullptr;
const auto pcBaseAddress = reinterpret_cast<char*>(hModule);
const auto& dosHeader = *reinterpret_cast<const IMAGE_DOS_HEADER*>(&pcBaseAddress[0]);
const auto& ntHeader32 = *reinterpret_cast<const IMAGE_NT_HEADERS32*>(&pcBaseAddress[dosHeader.e_lfanew]);
const auto& ntHeader64 = *reinterpret_cast<const IMAGE_NT_HEADERS64*>(&pcBaseAddress[dosHeader.e_lfanew]);
const auto bPE32 = ntHeader32.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC;
const auto pDirectory = bPE32
? &ntHeader32.OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
: &ntHeader64.OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
// There should always be an import directory, but the world may break down anytime nowadays.
if (!pDirectory)
return false;
// This span might be too long in terms of meaningful data; it only serves to prevent accessing memory outsides boundaries.
const auto importDescriptorsOversizedSpan = std::span(reinterpret_cast<const IMAGE_IMPORT_DESCRIPTOR*>(&pcBaseAddress[pDirectory->VirtualAddress]), pDirectory->Size / sizeof IMAGE_IMPORT_DESCRIPTOR);
for (const auto& importDescriptor : importDescriptorsOversizedSpan) {
// Having all zero values signals the end of the table. We didn't find anything.
if (!importDescriptor.OriginalFirstThunk && !importDescriptor.TimeDateStamp && !importDescriptor.ForwarderChain && !importDescriptor.FirstThunk)
return false;
// Skip invalid entries, just in case.
if (!importDescriptor.Name || !importDescriptor.OriginalFirstThunk)
continue;
// Name must be contained in this directory.
if (importDescriptor.Name < pDirectory->VirtualAddress)
continue;
auto currentDllName = std::string_view(&pcBaseAddress[importDescriptor.Name], (std::min<size_t>)(pDirectory->Size - importDescriptor.Name, requestedDllName.size()));
currentDllName = currentDllName.substr(0, strnlen(currentDllName.data(), currentDllName.size()));
// Is this entry about the DLL that we're looking for? (Case insensitive)
if (requestedDllName.size() != currentDllName.size() || _strcmpi(requestedDllName.data(), currentDllName.data()))
continue;
if (bPE32 && find_imported_function_pointer_helper<uint32_t>(pcBaseAddress, importDescriptor, *pDirectory, requestedFunctionName, hintOrOrdinal, ppFunctionAddress))
return true;
else if (!bPE32 && find_imported_function_pointer_helper<uint64_t>(pcBaseAddress, importDescriptor, *pDirectory, requestedFunctionName, hintOrOrdinal, ppFunctionAddress))
return true;
}
// Found nothing.
return false;
}
void* utils::get_imported_function_pointer(HMODULE hModule, const char* pcszDllName, const char* pcszFunctionName, uint32_t hintOrOrdinal) {
if (void* ppImportTableItem{}; find_imported_function_pointer(GetModuleHandleW(nullptr), pcszDllName, pcszFunctionName, hintOrOrdinal, ppImportTableItem))
return ppImportTableItem;
throw std::runtime_error("Failed to find import for kernel32!OpenProcess.");
}
std::shared_ptr<void> utils::create_thunk(void* pfnFunction, void* pThis, uint64_t placeholderValue) {
const auto pcBaseFn = reinterpret_cast<const uint8_t*>(pfnFunction);
auto sourceCode = std::vector<uint8_t>(pcBaseFn, pcBaseFn + 256);
size_t i = 0;
auto placeholderFound = false;
for (nmd_x86_instruction instruction{}; ; i += instruction.length) {
if (i == sourceCode.size() || !nmd_x86_decode(&sourceCode[i], sourceCode.size() - i, &instruction, NMD_X86_MODE_64, NMD_X86_DECODER_FLAGS_ALL)) {
sourceCode.insert(sourceCode.end(), &pcBaseFn[sourceCode.size()], &pcBaseFn[sourceCode.size() + 512]);
if (!nmd_x86_decode(&sourceCode[i], sourceCode.size() - i, &instruction, NMD_X86_MODE_64, NMD_X86_DECODER_FLAGS_ALL))
throw std::runtime_error("Failed to find detour function");
}
if (instruction.opcode == 0xCC)
throw std::runtime_error("Failed to find detour function");
// msvc debugger related
if ((instruction.group & NMD_GROUP_CALL) && (instruction.imm_mask & NMD_X86_IMM_ANY))
std::fill_n(&sourceCode[i], instruction.length, 0x90);
if ((instruction.group & NMD_GROUP_JUMP) || (instruction.group & NMD_GROUP_RET)) {
sourceCode.resize(i + instruction.length);
break;
}
if (instruction.opcode == 0xB8 // mov <register>, <thunk placeholder 64bit value>
&& (instruction.imm_mask & NMD_X86_IMM64)
&& instruction.immediate == placeholderValue) {
*reinterpret_cast<void**>(&sourceCode[i + instruction.length - 8]) = pThis;
placeholderFound = true;
}
}
if (!placeholderFound)
throw std::runtime_error("Failed to find detour function");
return allocate_executable_heap(std::span(sourceCode));
}
template<>
std::wstring utils::get_env(const wchar_t* pcwzName) {
std::wstring buf(GetEnvironmentVariableW(pcwzName, nullptr, 0) + 1, L'\0');
buf.resize(GetEnvironmentVariableW(pcwzName, &buf[0], static_cast<DWORD>(buf.size())));
return buf;
}
template<>
std::string utils::get_env(const wchar_t* pcwzName) {
return unicode::convert<std::string>(get_env<std::wstring>(pcwzName));
}
template<>
bool utils::get_env(const wchar_t* pcwzName) {
auto env = get_env<std::wstring>(pcwzName);
const auto trimmed = trim(std::wstring_view(env));
for (auto& c : env) {
if (c < 255)
c = std::tolower(c);
}
return trimmed == L"1"
|| trimmed == L"true"
|| trimmed == L"t"
|| trimmed == L"yes"
|| trimmed == L"y";
}
bool utils::is_running_on_linux() {
if (get_env<bool>(L"XL_WINEONLINUX"))
return true;
HMODULE hntdll = GetModuleHandleW(L"ntdll.dll");
if (!hntdll)
return true;
if (GetProcAddress(hntdll, "wine_get_version"))
return true;
if (GetProcAddress(hntdll, "wine_get_host_version"))
return true;
return false;
}
std::filesystem::path utils::get_module_path(HMODULE hModule) {
std::wstring buf(MAX_PATH, L'\0');
while (true) {
if (const auto res = GetModuleFileNameW(hModule, &buf[0], static_cast<int>(buf.size())); !res)
throw std::runtime_error(std::format("GetModuleFileName failure: 0x{:X}", GetLastError()));
else if (res < buf.size()) {
buf.resize(res);
return buf;
} else
buf.resize(buf.size() * 2);
}
}

155
Dalamud.Boot/utils.h Normal file
View file

@ -0,0 +1,155 @@
#pragma once
#include <filesystem>
#include <functional>
#include <span>
#include <string>
#include <memory>
#include <vector>
#include "unicode.h"
namespace utils {
class signature_finder {
std::vector<std::span<const char>> m_ranges;
std::vector<srell::regex> m_patterns;
public:
signature_finder& look_in(const void* pFirst, size_t length);
signature_finder& look_in(const void* pFirst, const void* pLast);
signature_finder& look_in(HMODULE hModule, const char* sectionName);
signature_finder& look_for(std::string_view pattern, std::string_view mask, char cExactMatch = 'x', char cWildcard = '.');
signature_finder& look_for(std::string_view pattern, char wildcardMask);
signature_finder& look_for(std::string_view pattern);
signature_finder& look_for_hex(std::string_view pattern);
template<size_t len>
signature_finder& look_for(char pattern[len]) {
static_assert(len == 5);
}
struct result {
std::span<const char> Match;
size_t PatternIndex;
size_t MatchIndex;
size_t CaptureIndex;
};
std::vector<result> find(size_t minCount, size_t maxCount, bool bErrorOnMoreThanMaximum) const;
std::span<const char> find_one() const;
};
class memory_tenderizer {
std::span<char> m_data;
std::vector<MEMORY_BASIC_INFORMATION> m_regions;
public:
memory_tenderizer(const void* pAddress, size_t length, DWORD dwNewProtect);
template<typename T, typename = std::enable_if_t<std::is_trivial_v<T>&& std::is_standard_layout_v<T>>>
memory_tenderizer(const T& object, DWORD dwNewProtect) : memory_tenderizer(&object, sizeof T, dwNewProtect) {}
template<typename T>
memory_tenderizer(std::span<const T> s, DWORD dwNewProtect) : memory_tenderizer(&s[0], s.size(), dwNewProtect) {}
~memory_tenderizer();
};
void* resolve_unconditional_jump_target(void* pfn);
bool find_imported_function_pointer(HMODULE hModule, const char* pcszDllName, const char* pcszFunctionName, uint32_t hintOrOrdinal, void*& ppFunctionAddress);
void* get_imported_function_pointer(HMODULE hModule, const char* pcszDllName, const char* pcszFunctionName, uint32_t hintOrOrdinal);
template<typename TFn>
TFn** get_imported_function_pointer(HMODULE hModule, const char* pcszDllName, const char* pcszFunctionName, uint32_t hintOrOrdinal) {
return reinterpret_cast<TFn**>(get_imported_function_pointer(hModule, pcszDllName, pcszFunctionName, hintOrOrdinal));
}
std::shared_ptr<void> allocate_executable_heap(size_t len);
template<typename T>
std::shared_ptr<void> allocate_executable_heap(std::span<T> data) {
auto res = allocate_executable_heap(data.size_bytes());
memcpy(res.get(), data.data(), data.size_bytes());
return res;
}
std::shared_ptr<void> create_thunk(void* pfnFunction, void* pThis, uint64_t placeholderValue);
template<typename>
class thunk;
template<typename TReturn, typename ... TArgs>
class thunk<TReturn(TArgs...)> {
using TFn = TReturn(TArgs...);
static constexpr uint64_t Placeholder = 0xCC90CC90CC90CC90ULL;
const std::shared_ptr<void> m_pThunk;
std::function<TFn> m_fnTarget;
public:
thunk(std::function<TFn> target)
: m_pThunk(utils::create_thunk(&detour_static, this, Placeholder))
, m_fnTarget(std::move(target)) {
}
void set_target(std::function<TFn> detour) {
m_fnTarget = std::move(detour);
}
TFn* get_thunk() const {
return reinterpret_cast<TFn*>(m_pThunk.get());
}
private:
// mark it as virtual to prevent compiler from inlining
virtual TReturn detour(TArgs... args) {
return m_fnTarget(std::forward<TArgs>(args)...);
}
static TReturn detour_static(TArgs... args) {
const volatile auto pThis = reinterpret_cast<thunk<TFn>*>(Placeholder);
return pThis->detour(args...);
}
};
template<class TElem, class TTraits>
std::basic_string_view<TElem, TTraits> trim(std::basic_string_view<TElem, TTraits> view, bool left = true, bool right = true) {
if (left) {
while (!view.empty() && (view.front() < 255 && std::isspace(view.front())))
view = view.substr(1);
}
if (right) {
while (!view.empty() && (view.back() < 255 && std::isspace(view.back())))
view = view.substr(0, view.size() - 1);
}
return view;
}
template<typename T>
T get_env(const wchar_t* pcwzName) {
static_assert(false);
}
template<>
std::wstring get_env(const wchar_t* pcwzName);
template<>
std::string get_env(const wchar_t* pcwzName);
template<>
bool get_env(const wchar_t* pcwzName);
template<typename T>
T get_env(const char* pcszName) {
return get_env<T>(unicode::convert<std::wstring>(pcszName).c_str());
}
bool is_running_on_linux();
std::filesystem::path get_module_path(HMODULE hModule);
}

132
Dalamud.Boot/xivfixes.cpp Normal file
View file

@ -0,0 +1,132 @@
#include "pch.h"
#include "xivfixes.h"
#include "hooks.h"
#include "logging.h"
#include "utils.h"
using TFnGetInputDeviceManager = void* ();
static TFnGetInputDeviceManager* GetGetInputDeviceManager(HWND hwnd) {
static TFnGetInputDeviceManager* pCached = nullptr;
if (pCached)
return pCached;
char szClassName[256];
GetClassNameA(hwnd, szClassName, static_cast<int>(sizeof szClassName));
WNDCLASSEXA wcx{};
GetClassInfoExA(g_hGameInstance, szClassName, &wcx);
const auto match = utils::signature_finder()
.look_in(g_hGameInstance, ".text")
.look_for_hex("41 81 fe 19 02 00 00 0f 87 ?? ?? 00 00 0f 84 ?? ?? 00 00")
.find_one();
auto ptr = match.data() + match.size() + *reinterpret_cast<const int*>(match.data() + match.size() - 4);
ptr += 4; // CMP RBX, 0x7
ptr += 2; // JNZ <giveup>
ptr += 7; // MOV RCX, <Framework::Instance>
ptr += 3; // TEST RCX, RCX
ptr += 2; // JZ <giveup>
ptr += 5; // CALL <GetInputDeviceManagerInstance()>
ptr += *reinterpret_cast<const int*>(ptr - 4);
return pCached = reinterpret_cast<TFnGetInputDeviceManager*>(ptr);
}
void xivfixes::prevent_devicechange_crashes(bool bApply) {
static const char* LogTag = "[xivfixes:prevent_devicechange_crashes]";
static std::optional<hooks::import_hook<decltype(CreateWindowExA)>> s_hookCreateWindowExA;
static std::optional<hooks::wndproc_hook> s_hookWndProc;
if (bApply) {
s_hookCreateWindowExA.emplace("user32.dll", "CreateWindowExA", 0);
s_hookCreateWindowExA->set_detour([](DWORD dwExStyle, LPCSTR lpClassName, LPCSTR lpWindowName, DWORD dwStyle, int X, int Y, int nWidth, int nHeight, HWND hWndParent, HMENU hMenu, HINSTANCE hInstance, LPVOID lpParam)->HWND {
const auto hWnd = s_hookCreateWindowExA->call_original(dwExStyle, lpClassName, lpWindowName, dwStyle, X, Y, nWidth, nHeight, hWndParent, hMenu, hInstance, lpParam);
if (!hWnd
|| hInstance != g_hGameInstance
|| 0 != strcmp(lpClassName, "FFXIVGAME"))
return hWnd;
logging::print<logging::I>("{} CreateWindow(0x{:08X}, \"{}\", \"{}\", 0x{:08X}, {}, {}, {}, {}, 0x{:X}, 0x{:X}, 0x{:X}, 0x{:X}) called; unhooking CreateWindowExA and hooking WndProc.",
LogTag, dwExStyle, lpClassName, lpWindowName, dwStyle, X, Y, nWidth, nHeight, reinterpret_cast<size_t>(hWndParent), reinterpret_cast<size_t>(hMenu), reinterpret_cast<size_t>(hInstance), reinterpret_cast<size_t>(lpParam));
s_hookCreateWindowExA.reset();
s_hookWndProc.emplace(hWnd);
s_hookWndProc->set_detour([](HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam) -> LRESULT {
if (uMsg == WM_DEVICECHANGE && wParam == DBT_DEVNODES_CHANGED) {
if (!GetGetInputDeviceManager(hWnd)()) {
logging::print<logging::I>("{} WndProc(0x{:X}, WM_DEVICECHANGE, DBT_DEVNODES_CHANGED, {}) called but the game does not have InputDeviceManager initialized; doing nothing.", LogTag, reinterpret_cast<size_t>(hWnd), lParam);
return 0;
}
}
return s_hookWndProc->call_original(hWnd, uMsg, wParam, lParam);
});
return hWnd;
});
} else {
logging::print<logging::I>("{} Disable", LogTag);
s_hookCreateWindowExA.reset();
// This will effectively revert any other WndProc alterations, including Dalamud.
s_hookWndProc.reset();
}
}
void xivfixes::disable_game_openprocess_access_check(bool bApply) {
static const char* LogTag = "[xivfixes:disable_game_openprocess_access_check]";
static std::optional<hooks::import_hook<decltype(OpenProcess)>> hook;
if (bApply) {
hook.emplace("kernel32.dll", "OpenProcess", 0);
hook->set_detour([](DWORD dwDesiredAccess, BOOL bInheritHandle, DWORD dwProcessId)->HANDLE {
if (dwProcessId == GetCurrentProcessId()) {
logging::print<logging::I>("{} OpenProcess(0{:08X}, {}, {}) was invoked by thread {}.", LogTag, dwDesiredAccess, bInheritHandle, dwProcessId, GetCurrentThreadId());
// Prevent game from feeling unsafe that it restarts
if (dwDesiredAccess & PROCESS_VM_WRITE) {
logging::print<logging::I>("{} Returning failure with last error code set to ERROR_ACCESS_DENIED(5).", LogTag);
SetLastError(ERROR_ACCESS_DENIED);
return {};
}
}
return hook->call_original(dwDesiredAccess, bInheritHandle, dwProcessId);
});
} else {
logging::print<logging::I>("{} Disable", LogTag);
hook.reset();
}
}
void xivfixes::redirect_openprocess(bool bApply) {
static const char* LogTag = "[xivfixes:redirect_openprocess]";
static std::optional<hooks::export_hook<decltype(OpenProcess)>> hook;
if (bApply) {
logging::print<logging::I>("{} Enable", LogTag);
hook.emplace(::OpenProcess);
hook->set_detour([](DWORD dwDesiredAccess, BOOL bInheritHandle, DWORD dwProcessId)->HANDLE {
if (dwProcessId == GetCurrentProcessId()) {
logging::print<logging::I>("{} OpenProcess(0{:08X}, {}, {}) was invoked by thread {}. Redirecting to DuplicateHandle.", LogTag, dwDesiredAccess, bInheritHandle, dwProcessId, GetCurrentThreadId());
if (HANDLE res; DuplicateHandle(GetCurrentProcess(), GetCurrentProcess(), GetCurrentProcess(), &res, dwDesiredAccess, bInheritHandle, 0))
return res;
return {};
}
return hook->call_original(dwDesiredAccess, bInheritHandle, dwProcessId);
});
} else {
logging::print<logging::I>("{} Disable", LogTag);
hook.reset();
}
}

13
Dalamud.Boot/xivfixes.h Normal file
View file

@ -0,0 +1,13 @@
#pragma once
namespace xivfixes {
void prevent_devicechange_crashes(bool bApply);
void disable_game_openprocess_access_check(bool bApply);
void redirect_openprocess(bool bApply);
inline void apply_all(bool bApply) {
prevent_devicechange_crashes(bApply);
disable_game_openprocess_access_check(bApply);
redirect_openprocess(bApply);
}
}

View file

@ -88,16 +88,20 @@
<ResourceCompile Include="resources.rc" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Dalamud.Boot\logging.cpp" />
<ClCompile Include="..\Dalamud.Boot\unicode.cpp" />
<ClCompile Include="..\lib\CoreCLR\boot.cpp" />
<ClCompile Include="..\lib\CoreCLR\CoreCLR.cpp" />
<ClCompile Include="main.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Dalamud.Boot\logging.h" />
<ClInclude Include="..\Dalamud.Boot\unicode.h" />
<ClInclude Include="..\lib\CoreCLR\CoreCLR.h" />
<ClInclude Include="..\lib\CoreCLR\core\coreclr_delegates.h" />
<ClInclude Include="..\lib\CoreCLR\core\hostfxr.h" />
<ClInclude Include="..\lib\CoreCLR\nethost\nethost.h" />
<ClInclude Include="..\lib\CoreCLR\pch.h" />
<ClInclude Include="pch.h" />
</ItemGroup>
<Target Name="RemoveExtraFiles" AfterTargets="PostBuildEvent">
<Delete Files="$(OutDir)$(TargetName).lib" />

View file

@ -34,11 +34,14 @@
<ClCompile Include="..\lib\CoreCLR\CoreCLR.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Dalamud.Boot\logging.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Dalamud.Boot\unicode.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\lib\CoreCLR\pch.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\lib\CoreCLR\CoreCLR.h">
<Filter>Header Files</Filter>
</ClInclude>
@ -51,5 +54,14 @@
<ClInclude Include="..\lib\CoreCLR\core\coreclr_delegates.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Dalamud.Boot\logging.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="pch.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Dalamud.Boot\unicode.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

View file

@ -3,12 +3,14 @@
#include <filesystem>
#include <Windows.h>
#include <shellapi.h>
#include "..\Dalamud.Boot\logging.h"
#include "..\lib\CoreCLR\CoreCLR.h"
#include "..\lib\CoreCLR\boot.h"
int wmain(int argc, wchar_t** argv)
{
printf("Dalamud.Injector, (c) 2021 XIVLauncher Contributors\nBuilt at: %s@%s\n\n", __DATE__, __TIME__);
logging::print<logging::I>("Dalamud Injector, (c) 2021 XIVLauncher Contributors");
logging::print<logging::I>("Built at : " __DATE__ "@" __TIME__);
wchar_t _module_path[MAX_PATH];
GetModuleFileNameW(NULL, _module_path, sizeof _module_path / 2);
@ -35,9 +37,9 @@ int wmain(int argc, wchar_t** argv)
typedef void (CORECLR_DELEGATE_CALLTYPE* custom_component_entry_point_fn)(int, wchar_t**);
custom_component_entry_point_fn entrypoint_fn = reinterpret_cast<custom_component_entry_point_fn>(entrypoint_vfn);
printf("Running Dalamud Injector...\n");
logging::print<logging::I>("Running Dalamud Injector...");
entrypoint_fn(argc, argv);
printf("Done!\n");
logging::print<logging::I>("Done!");
return 0;
}

View file

@ -0,0 +1 @@
#pragma once

View file

@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Diagnostics;
using System.IO;
using System.Linq;
@ -324,7 +325,7 @@ namespace Dalamud.Injector
Console.WriteLine("{0} help [command]", exeName);
if (particularCommand is null or "inject")
Console.WriteLine("{0} inject [-h/--help] [-a/--all] [--warn] [pid1] [pid2] [pid3] ...", exeName);
Console.WriteLine("{0} inject [-h/--help] [-a/--all] [--warn] [--fix-acl] [--se-debug-privilege] [pid1] [pid2] [pid3] ...", exeName);
if (particularCommand is null or "launch")
{
@ -332,7 +333,7 @@ namespace Dalamud.Injector
Console.WriteLine("{0} [-g path/to/ffxiv_dx11.exe] [--game=path/to/ffxiv_dx11.exe]", exeSpaces);
Console.WriteLine("{0} [-m entrypoint|inject] [--mode=entrypoint|inject]", exeSpaces);
Console.WriteLine("{0} [--handle-owner=inherited-handle-value]", exeSpaces);
Console.WriteLine("{0} [--without-dalamud]", exeSpaces);
Console.WriteLine("{0} [--without-dalamud] [--no-fix-acl]", exeSpaces);
Console.WriteLine("{0} [-- game_arg1=value1 game_arg2=value2 ...]", exeSpaces);
}
@ -351,6 +352,8 @@ namespace Dalamud.Injector
var targetProcessSpecified = false;
var warnManualInjection = false;
var showHelp = args.Count <= 2;
var tryFixAcl = false;
var tryClaimSeDebugPrivilege = false;
for (var i = 2; i < args.Count; i++)
{
@ -378,6 +381,14 @@ namespace Dalamud.Injector
targetProcessSpecified = true;
processes.AddRange(Process.GetProcessesByName("ffxiv_dx11"));
}
else if (args[i] == "--fix-acl" || args[i] == "--acl-fix")
{
tryFixAcl = true;
}
else if (args[i] == "--se-debug-privilege")
{
tryClaimSeDebugPrivilege = true;
}
else if (args[i] == "--warn")
{
warnManualInjection = true;
@ -416,8 +427,21 @@ namespace Dalamud.Injector
}
}
if (tryClaimSeDebugPrivilege)
{
try
{
NativeAclFix.ClaimSeDebug();
Log.Information("SeDebugPrivilege claimed.");
}
catch (Win32Exception e2)
{
Log.Warning(e2, "Failed to claim SeDebugPrivilege");
}
}
foreach (var process in processes)
Inject(process, AdjustStartInfo(dalamudStartInfo, process.MainModule.FileName));
Inject(process, AdjustStartInfo(dalamudStartInfo, process.MainModule.FileName), tryFixAcl);
return 0;
}
@ -431,6 +455,7 @@ namespace Dalamud.Injector
var showHelp = args.Count <= 2;
var handleOwner = IntPtr.Zero;
var withoutDalamud = false;
var noFixAcl = false;
var parsingGameArgument = false;
for (var i = 2; i < args.Count; i++)
@ -447,6 +472,8 @@ namespace Dalamud.Injector
useFakeArguments = true;
else if (args[i] == "--without-dalamud")
withoutDalamud = true;
else if (args[i] == "--no-fix-acl" || args[i] == "--no-acl-fix")
noFixAcl = true;
else if (args[i] == "-g")
gamePath = args[++i];
else if (args[i].StartsWith("--game="))
@ -547,7 +574,7 @@ namespace Dalamud.Injector
}
var gameArgumentString = string.Join(" ", gameArguments.Select(x => EncodeParameterArgument(x)));
var process = NativeAclFix.LaunchGame(Path.GetDirectoryName(gamePath), gamePath, gameArgumentString, (Process p) =>
var process = NativeAclFix.LaunchGame(Path.GetDirectoryName(gamePath), gamePath, gameArgumentString, noFixAcl, (Process p) =>
{
if (!withoutDalamud && mode == "entrypoint")
{
@ -565,7 +592,7 @@ namespace Dalamud.Injector
{
var startInfo = AdjustStartInfo(dalamudStartInfo, gamePath);
Log.Information("Using start info: {0}", JsonConvert.SerializeObject(startInfo));
Inject(process, startInfo);
Inject(process, startInfo, false);
}
var processHandleForOwner = IntPtr.Zero;
@ -647,8 +674,20 @@ namespace Dalamud.Injector
};
}
private static void Inject(Process process, DalamudStartInfo startInfo)
private static void Inject(Process process, DalamudStartInfo startInfo, bool tryFixAcl = false)
{
if (tryFixAcl)
{
try
{
NativeAclFix.CopyAclFromSelfToTargetProcess(process.SafeHandle.DangerousGetHandle());
}
catch (Win32Exception e1)
{
Log.Warning(e1, "Failed to copy ACL");
}
}
var bootName = "Dalamud.Boot.dll";
var bootPath = Path.GetFullPath(bootName);

View file

@ -22,14 +22,18 @@ namespace Dalamud.Injector
/// <param name="workingDir">The working directory.</param>
/// <param name="exePath">The path to the executable file.</param>
/// <param name="arguments">Arguments to pass to the executable file.</param>
/// <param name="dontFixAcl">Don't actually fix the ACL.</param>
/// <param name="beforeResume">Action to execute before the process is started.</param>
/// <returns>The started process.</returns>
/// <exception cref="Win32Exception">Thrown when a win32 error occurs.</exception>
/// <exception cref="GameExitedException">Thrown when the process did not start correctly.</exception>
public static Process LaunchGame(string workingDir, string exePath, string arguments, Action<Process> beforeResume)
public static Process LaunchGame(string workingDir, string exePath, string arguments, bool dontFixAcl, Action<Process> beforeResume)
{
Process process = null;
var psecDesc = IntPtr.Zero;
if (!dontFixAcl)
{
var userName = Environment.UserName;
var pExplicitAccess = default(PInvoke.EXPLICIT_ACCESS);
@ -55,8 +59,9 @@ namespace Dalamud.Injector
throw new Win32Exception(Marshal.GetLastWin32Error());
}
var psecDesc = Marshal.AllocHGlobal(Marshal.SizeOf<PInvoke.SECURITY_DESCRIPTOR>());
psecDesc = Marshal.AllocHGlobal(Marshal.SizeOf<PInvoke.SECURITY_DESCRIPTOR>());
Marshal.StructureToPtr(secDesc, psecDesc, true);
}
var lpProcessInformation = default(PInvoke.PROCESS_INFORMATION);
try
@ -109,6 +114,7 @@ namespace Dalamud.Injector
Environment.SetEnvironmentVariable("__COMPAT_LAYER", compatLayerPrev);
}
if (!dontFixAcl)
DisableSeDebug(lpProcessInformation.hProcess);
process = new ExistingProcess(lpProcessInformation.hProcess);
@ -133,30 +139,8 @@ namespace Dalamud.Injector
throw new GameExitedException();
}
if (PInvoke.GetSecurityInfo(
PInvoke.GetCurrentProcess(),
PInvoke.SE_OBJECT_TYPE.SE_KERNEL_OBJECT,
PInvoke.SECURITY_INFORMATION.DACL_SECURITY_INFORMATION,
IntPtr.Zero,
IntPtr.Zero,
out var pACL,
IntPtr.Zero,
IntPtr.Zero) != 0)
{
throw new Win32Exception(Marshal.GetLastWin32Error());
}
if (PInvoke.SetSecurityInfo(
lpProcessInformation.hProcess,
PInvoke.SE_OBJECT_TYPE.SE_KERNEL_OBJECT,
PInvoke.SECURITY_INFORMATION.DACL_SECURITY_INFORMATION | PInvoke.SECURITY_INFORMATION.UNPROTECTED_DACL_SECURITY_INFORMATION,
IntPtr.Zero,
IntPtr.Zero,
pACL,
IntPtr.Zero) != 0)
{
throw new Win32Exception(Marshal.GetLastWin32Error());
}
if (!dontFixAcl)
CopyAclFromSelfToTargetProcess(lpProcessInformation.hProcess);
}
catch (Exception ex)
{
@ -175,6 +159,7 @@ namespace Dalamud.Injector
}
finally
{
if (psecDesc != IntPtr.Zero)
Marshal.FreeHGlobal(psecDesc);
PInvoke.CloseHandle(lpProcessInformation.hThread);
}
@ -182,6 +167,83 @@ namespace Dalamud.Injector
return process;
}
/// <summary>
/// Copies ACL of current process to the target process.
/// </summary>
/// <param name="hProcess">Native handle to the target process.</param>
/// <exception cref="Win32Exception">Thrown when a win32 error occurs.</exception>
public static void CopyAclFromSelfToTargetProcess(IntPtr hProcess)
{
if (PInvoke.GetSecurityInfo(
PInvoke.GetCurrentProcess(),
PInvoke.SE_OBJECT_TYPE.SE_KERNEL_OBJECT,
PInvoke.SECURITY_INFORMATION.DACL_SECURITY_INFORMATION,
IntPtr.Zero,
IntPtr.Zero,
out var pACL,
IntPtr.Zero,
IntPtr.Zero) != 0)
{
throw new Win32Exception(Marshal.GetLastWin32Error());
}
if (PInvoke.SetSecurityInfo(
hProcess,
PInvoke.SE_OBJECT_TYPE.SE_KERNEL_OBJECT,
PInvoke.SECURITY_INFORMATION.DACL_SECURITY_INFORMATION | PInvoke.SECURITY_INFORMATION.UNPROTECTED_DACL_SECURITY_INFORMATION,
IntPtr.Zero,
IntPtr.Zero,
pACL,
IntPtr.Zero) != 0)
{
throw new Win32Exception(Marshal.GetLastWin32Error());
}
}
public static void ClaimSeDebug()
{
var hToken = PInvoke.INVALID_HANDLE_VALUE;
try
{
if (!PInvoke.OpenThreadToken(PInvoke.GetCurrentThread(), PInvoke.TOKEN_QUERY | PInvoke.TOKEN_ADJUST_PRIVILEGES, false, out hToken))
{
if (Marshal.GetLastWin32Error() != PInvoke.ERROR_NO_TOKEN)
throw new Exception("ClaimSeDebug.OpenProcessToken#1", new Win32Exception(Marshal.GetLastWin32Error()));
if (!PInvoke.ImpersonateSelf(PInvoke.SECURITY_IMPERSONATION_LEVEL.SecurityImpersonation))
throw new Exception("ClaimSeDebug.ImpersonateSelf", new Win32Exception(Marshal.GetLastWin32Error()));
if (!PInvoke.OpenThreadToken(PInvoke.GetCurrentThread(), PInvoke.TOKEN_QUERY | PInvoke.TOKEN_ADJUST_PRIVILEGES, false, out hToken))
throw new Exception("ClaimSeDebug.OpenProcessToken#2", new Win32Exception(Marshal.GetLastWin32Error()));
}
var luidDebugPrivilege = default(PInvoke.LUID);
if (!PInvoke.LookupPrivilegeValue(null, PInvoke.SE_DEBUG_NAME, ref luidDebugPrivilege))
throw new Exception("ClaimSeDebug.LookupPrivilegeValue", new Win32Exception(Marshal.GetLastWin32Error()));
var tpLookup = new PInvoke.TOKEN_PRIVILEGES()
{
PrivilegeCount = 1,
Privileges = new PInvoke.LUID_AND_ATTRIBUTES[1]
{
new PInvoke.LUID_AND_ATTRIBUTES()
{
Luid = luidDebugPrivilege,
Attributes = PInvoke.SE_PRIVILEGE_ENABLED,
},
},
};
if (!PInvoke.AdjustTokenPrivileges(hToken, false, ref tpLookup, 0, IntPtr.Zero, IntPtr.Zero))
throw new Exception("ClaimSeDebug.AdjustTokenPrivileges", new Win32Exception(Marshal.GetLastWin32Error()));
}
finally
{
if (hToken != PInvoke.INVALID_HANDLE_VALUE && hToken != IntPtr.Zero)
PInvoke.CloseHandle(hToken);
}
}
private static void DisableSeDebug(IntPtr processHandle)
{
if (!PInvoke.OpenProcessToken(processHandle, PInvoke.TOKEN_QUERY | PInvoke.TOKEN_ADJUST_PRIVILEGES, out var tokenHandle))
@ -190,7 +252,7 @@ namespace Dalamud.Injector
}
var luidDebugPrivilege = default(PInvoke.LUID);
if (!PInvoke.LookupPrivilegeValue(null, "SeDebugPrivilege", ref luidDebugPrivilege))
if (!PInvoke.LookupPrivilegeValue(null, PInvoke.SE_DEBUG_NAME, ref luidDebugPrivilege))
{
throw new Win32Exception(Marshal.GetLastWin32Error());
}
@ -222,7 +284,7 @@ namespace Dalamud.Injector
tokenPrivileges.Privileges[0].Luid = luidDebugPrivilege;
tokenPrivileges.Privileges[0].Attributes = PInvoke.SE_PRIVILEGE_REMOVED;
if (!PInvoke.AdjustTokenPrivileges(tokenHandle, false, ref tokenPrivileges, 0, IntPtr.Zero, 0))
if (!PInvoke.AdjustTokenPrivileges(tokenHandle, false, ref tokenPrivileges, 0, IntPtr.Zero, IntPtr.Zero))
{
throw new Win32Exception(Marshal.GetLastWin32Error());
}
@ -271,6 +333,10 @@ namespace Dalamud.Injector
private static class PInvoke
{
#region Constants
public static readonly IntPtr INVALID_HANDLE_VALUE = new(-1);
public const string SE_DEBUG_NAME = "SeDebugPrivilege";
public const UInt32 STANDARD_RIGHTS_ALL = 0x001F0000;
public const UInt32 SPECIFIC_RIGHTS_ALL = 0x0000FFFF;
public const UInt32 PROCESS_VM_WRITE = 0x0020;
@ -289,6 +355,8 @@ namespace Dalamud.Injector
public const UInt32 SE_PRIVILEGE_ENABLED = 0x00000002;
public const UInt32 SE_PRIVILEGE_REMOVED = 0x00000004;
public const UInt32 ERROR_NO_TOKEN = 0x000003F0;
public enum MULTIPLE_TRUSTEE_OPERATION
{
NO_MULTIPLE_TRUSTEE,
@ -345,6 +413,14 @@ namespace Dalamud.Injector
UNPROTECTED_DACL_SECURITY_INFORMATION = 0x20000000,
PROTECTED_SACL_SECURITY_INFORMATION = 0x40000000,
}
public enum SECURITY_IMPERSONATION_LEVEL
{
SecurityAnonymous,
SecurityIdentification,
SecurityImpersonation,
SecurityDelegation
}
#endregion
#region Methods
@ -395,12 +471,24 @@ namespace Dalamud.Injector
[DllImport("kernel32.dll", SetLastError = true)]
public static extern uint ResumeThread(IntPtr hThread);
[DllImport("advapi32.dll", SetLastError = true)]
public static extern bool ImpersonateSelf(
SECURITY_IMPERSONATION_LEVEL impersonationLevel
);
[DllImport("advapi32.dll", SetLastError = true)]
public static extern bool OpenProcessToken(
IntPtr processHandle,
UInt32 desiredAccess,
out IntPtr tokenHandle);
[DllImport("advapi32.dll", SetLastError = true)]
public static extern bool OpenThreadToken(
IntPtr ThreadHandle,
uint DesiredAccess,
bool OpenAsSelf,
out IntPtr TokenHandle);
[DllImport("advapi32.dll", SetLastError = true)]
public static extern bool LookupPrivilegeValue(string lpSystemName, string lpName, ref LUID lpLuid);
@ -415,9 +503,9 @@ namespace Dalamud.Injector
IntPtr tokenHandle,
bool disableAllPrivileges,
ref TOKEN_PRIVILEGES newState,
UInt32 bufferLengthInBytes,
int cbPreviousState,
IntPtr previousState,
UInt32 returnLengthInBytes);
IntPtr cbOutPreviousState);
[DllImport("advapi32.dll", SetLastError = true)]
public static extern uint GetSecurityInfo(
@ -443,6 +531,9 @@ namespace Dalamud.Injector
[DllImport("kernel32.dll", SetLastError = true)]
public static extern IntPtr GetCurrentProcess();
[DllImport("kernel32.dll", SetLastError = true)]
public static extern IntPtr GetCurrentThread();
[DllImport("user32.dll", SetLastError = true)]
public static extern IntPtr FindWindowEx(IntPtr parentHandle, IntPtr hWndChildAfter, string className, IntPtr windowTitle);

View file

@ -5,6 +5,7 @@
#include <filesystem>
#include <iostream>
#include "nethost/nethost.h"
#include "..\..\Dalamud.Boot\logging.h"
CoreCLR::CoreCLR(void* calling_module)
: m_calling_module(calling_module)
@ -82,7 +83,7 @@ int CoreCLR::load_runtime(const std::wstring& runtime_config_path, const struct
// Success_HostAlreadyInitialized
if (result == 1)
{
printf("Success_HostAlreadyInitialized (0x1) ");
logging::print<logging::I>("Success_HostAlreadyInitialized (0x1)");
result = 0;
}

View file

@ -5,6 +5,7 @@
#include <Windows.h>
#include <Shlobj.h>
#include "CoreCLR.h"
#include "..\..\Dalamud.Boot\logging.h"
FILE* g_CmdStream;
void ConsoleSetup(const std::wstring console_name)
@ -16,6 +17,7 @@ void ConsoleSetup(const std::wstring console_name)
freopen_s(&g_CmdStream, "CONOUT$", "w", stdout);
freopen_s(&g_CmdStream, "CONOUT$", "w", stderr);
freopen_s(&g_CmdStream, "CONIN$", "r", stdin);
SetConsoleOutputCP(CP_UTF8);
}
void ConsoleTeardown()
@ -61,7 +63,7 @@ int InitializeClrAndGetEntryPoint(
if (result != 0)
{
printf("Error: Unable to get RoamingAppData path (err=%d)\n", result);
logging::print<logging::E>("Unable to get RoamingAppData path (err={})", result);
return result;
}
@ -71,13 +73,13 @@ int InitializeClrAndGetEntryPoint(
// =========================================================================== //
wprintf(L"with dotnet_path: %s\n", dotnet_path);
wprintf(L"with config_path: %s\n", runtimeconfig_path.c_str());
wprintf(L"with module_path: %s\n", module_path.c_str());
logging::print<logging::I>(L"with dotnet_path: %s", dotnet_path);
logging::print<logging::I>(L"with config_path: %s", runtimeconfig_path.c_str());
logging::print<logging::I>(L"with module_path: %s", module_path.c_str());
if (!std::filesystem::exists(dotnet_path))
{
printf("Error: Unable to find .NET runtime path\n");
logging::print<logging::E>("Error: Unable to find .NET runtime path");
return 1;
}
@ -88,13 +90,13 @@ int InitializeClrAndGetEntryPoint(
dotnet_path,
};
printf("Loading hostfxr... ");
logging::print<logging::I>("Loading hostfxr...");
if ((result = g_clr->load_hostfxr(&init_parameters)) != 0)
{
printf("\nError: Failed to load the `hostfxr` library (err=0x%08x)\n", result);
logging::print<logging::E>("Failed to load the `hostfxr` library (err=0x{:08x})", result);
return result;
}
printf("Done!\n");
logging::print<logging::I>("Done!");
// =========================================================================== //
@ -105,17 +107,17 @@ int InitializeClrAndGetEntryPoint(
dotnet_path,
};
printf("Loading coreclr... ");
logging::print<logging::I>("Loading coreclr... ");
if ((result = g_clr->load_runtime(runtimeconfig_path, &runtime_parameters)) != 0)
{
printf("\nError: Failed to load coreclr (err=%d)\n", result);
logging::print<logging::E>("Failed to load coreclr (err={})", result);
return result;
}
printf("Done!\n");
logging::print<logging::I>("Done!");
// =========================================================================== //
printf("Loading module... ");
logging::print<logging::I>("Loading module...");
if ((result = g_clr->load_assembly_and_get_function_pointer(
module_path.c_str(),
entrypoint_assembly_name.c_str(),
@ -123,10 +125,10 @@ int InitializeClrAndGetEntryPoint(
entrypoint_delegate_type_name.c_str(),
nullptr, entrypoint_fn)) != 0)
{
printf("\nError: Failed to load module (err=%d)\n", result);
logging::print<logging::E>("Failed to load module (err={})", result);
return result;
}
printf("Done!\n");
logging::print<logging::I>("Done!");
// =========================================================================== //

1
lib/Nomade040-nmd Submodule

@ -0,0 +1 @@
Subproject commit 33ac3b62c7d1eb28ae6b71d4dd78aa133ef96488

View file

@ -0,0 +1,484 @@
20220511; version 3.009:
* Fixed an optimisation bug that caused /abcd|ab/ not to match "abc".
20220504; version 3.008:
* Fixed the behaviour of [^\P{...}] when the icase flag is set, as it
behaved similarly to the one in v-mode that has been proposed in
TC39.
20220429; version 3.007:
* Further modification to the counter mechanism.
20220428; version 3.006:
* Modified the mechanism of the counter used for repetition.
* Re-removed the implementation of linear search for small character
classes.
20220424; version 3.005:
* Fixed a bug that caused /(?<=$.*)/ not to match the end of "a" when
the multiline flag is set
* Preparations for \A, \z, (?m:) that have been proposed in TC39.
20220420; version 3.004:
* Added a new optimisation for /A*B/ and /A+B/ where a character class
A overlaps a character or character class B, such as /[A-Za-z]+ing/,
/".*"/.
20220416; version 3.003:
* Combined two optimisation functions into one.
* Reduced the amount of code for lookaround (lookahead and lookbehind)
assertions.
20220416; version 3.002:
* Fixed a bug that caused regex_match or regex_search with the
match_continuous flag being set to fail when the entry point
selector introduced in version 3.000 was used internally.
20211025; version 3.001:
* Removed the code for splitting counter as it seemed to be no effect
or to make performance a bit worse.
* Fixed potential bugs.
* Minor improvements.
20211023; version 3.000:
* Updated srell_ucfdata2.hpp and srell_updata.hpp to support Unicode
14.0.0.
* Updated unicode/updataout.cpp to support Unicode 14. (Support in
advance new script names that are expected to be available in RegExp
of ECMAScript 2022).
* Changed the type used to store a Unicode value when char32_t is not
available, from an "unsigned integer type with width of at least 21
bits" to a "one of at least 32 bits".
* Changed the type used to store a repetition count or character class
number when char32_t is not available, from "unsigned int" to
"unsigned integer type of at least 32-bit width".
* Added overflow check in the function that translates digits into a
numeric value. For example, while up to the previous version
/a{0,4294967297}/ was treated as /a{0,1}/ because of overflow when
the unsigned int type is 32-bit width, SRELL now throws error_brace
in cases like this.
* Fixed a bug that caused /[^;]*^;?/ not to match the beginning of an
input string when the multiline flag is not set.
* Implemented a very simple and limited entry point selector.
20211004; version 2.930:
* Added new typedefs whose prefix is u1632w- and support UTF-16 or
UTF-32 depending on the value of WCHAR_MAX. (When 0xFFFF <=
WCHAR_MAX < 0x10FFFF, u1632w- types are aliases of u16w- types.
When 0x10FFFF <= WCHAR_MAX, u1632w- types are aliases of u32w-
types).
* Reduced the amount of memory used for Eytzinger layout search.
* Various improvements. (Some of them are based on suggestions to NIRE
by Marko Njezic).
20210624; version 2.920:
* Added a new optimisation for the quantifier '?' (I.e., {0,1}).
* Changed the version number of the ECMAScript specification
referenced in misc/sample01.cpp to 2021.
20210429; version 2.912:
* Fixed another bug in the optimisation introduced in version 2.900,
which caused /aa|a|aa/ not to match "a" (Thanks to Jan Schrötter for
the report).
Incidentally, this optimisation can be disabled by defining
SRELLDBG_NO_BRANCH_OPT2 prior to including srell.hpp.
20210424; version 2.911:
* Fixed a bug in the optimisation introduced in version 2.900, which
caused /abc|ab|ac/ not to match "ac". (Thanks for the bug report [As
my email to the reporter was rejected by the email server and
returned, it is unclear whether mentioning the name here is okay
with the reporter. So, I refrain]).
20210407; version 2.910:
* Fixed a potential memory leak in move assignment operators used by
the pattern compiler since 2.900. (Thanks to Michal Švec for the
report).
20210214; version 2.901:
* Removed redundant template specialisations.
20210214; version 2.900:
* Added a new optimisation for the alternative expression that consist
of string literals, such as /abc|abd|acde/.
* Fixed the problem that brought u(8|16)[cs]regex_(token_)?iterator
(i.e., regex (token) iterators specialised for char8_t or char16_t)
to a compile error.
* Minor improvements.
20210131; version 2.810:
* Improved internal UTF-8 iterators.
20200724; version 2.800:
* Introduced the Eytzinger layout for binary search in the character
class.
* Reimplemented linear search for small character classes.
* Modified handling of the property data used for parsing the name for
a named capturing group. Now they are loaded only when needed
instead of being loaded into an instance of basic_regex always.
20200714; version 2.730:
* Added code to prevent redundant save and restore operations when
nested capturing round brackets are processed.
* Improved regex_iterator.
20200703; version 2.720:
* Improved case-insensitive (icase) search using the
Boyer-Moore-Horspool algorithm for UTF-8 string that includes
non-ASCII characters or UTF-16 string that includes non-BMP
characters.
* Fixed a bug that caused regex_iterator->prefix().first to point to
the beginning of the subject string instead of the end of the
previous match (regression introduced in version 2.650, when
three-iterators overloads were added to regex_search()).
* In accordance with the fix above, when a three-iterators version of
regex_search() is called, now match_results.position() returns a
distance from the position passed to as the lookbehind limit (3rd
param of regex_search) and match_results.prefix().first points to
the position passed to as the beginning of the subject string (1st
param of regex_search).
* Fixed a bug that could cause a valid UTF-8 sequence being adjacent
to an invalid UTF-8 sequence to be skipped when the BMH algorithm
was used (regression introduced in version 2.630, when UTF-8
handling was modified).
20200701; version 2.710:
* Minor modifications to Boyer-Moore-Horspool search.
20200630; version 2.700:
* Optimisation adjustments.
20200620: version 2.651:
* Move the group name validity check to after parsing the \u escape.
* Updated misc/sample01.cpp to version 1.103. Changed the version
number of the ECMAScript specification referenced by to 2020 (ES11).
20200618: version 2.650:
* To element access functions in match_results, added overload
functions for specifying the group name by a pointer.
* When a three-iterators version of regex_search() is used, SRELL now
sets match_results::prefix::first to the position passed to as the
lookbehind limit (third param) instead of the position passed to as
the beginning of the subject (first param).
* Removed some operations that seem to be redundant.
20200601: version 2.643:
* Added "inline" to operators in syntax_option_type and
match_flag_type types, based on a report that it is needed not to
cause the multiple definition error.
* Minor improvements.
20200530: version 2.642:
* Reduced the size of memory allocated by the basic_regex instance.
20200528: version 2.641:
* The fix in 2.640 was incomplete. Fixed the optimisation bug 1 again.
* Optimisation adjustments.
20200516: version 2.640:
* Fixed an optimisation bug 1: It was possible for regex_match to pass
the end of a subject string under certain conditions.
* Fixed an optimisation bug 2: ^ and $ were not given a chance to
match an appropriate position in some cases when the multiline flag
is set to true.
* Updated srell_ucfdata2.hpp and srell_updata.hpp.
20200509: version 2.630:
* SRELL's pattern compiler no longer permits invalid UTF-8 sequences
in regular expressions. It throws regex_utf8. (Invalid UTF-8
sequences in the subject string are not treated as an error.)
* Fixed BMH search functions not to include extra (invalid) UTF-8
trailing bytes following the real matched substring, in a returned
result.
* Fixed minor issues: 1) basic_regex.flags() did not return the
correct value in some cases, 2) match_results.format() did not
replace $<NAME> with an empty string when any capturing group whose
name is NAME did not exist.
20200502: version 2.620:
* Removed methods used for match_continuous and regex_match in the
class for the Boyer-Moore-Horspool algorithm. Now SRELL always uses
the automaton like earlier versions when they are processed.
* Some clean-ups.
20200428: version 2.611:
* Fixed a bug that caused /\d*/ not to match the head of "abc" but to
match the end of it. (regression introduced in version 2.210.)
20200426: version 2.610:
* Fixed a bug that caused case-insensitive (icase) BMH search to skip
a matched sequence at the beginning of the entire text, when 1)
search is done against UTF-8 or UTF-16 text, and 2) the searched
pattern ends with a character that consists of multiple code units
in that encoding.
* Now SRELL parses a capturing group name according to the ECMA
specification and strictly checks its validity. Group names like
/(?<,>...)/ cause regex_error.
20200418: version 2.600:
* To pass to regex_search() directly the limit of a sequence until
where the automaton can lookbehind, added three-iterators versions
of regex_search().
* [Breaking Change] Removed the match_lblim_avail flag from
match_flag_type and the lookbehind_limit member from match_results
which were added in version 2.300.
* Updated srell_ucfdata2.hpp and srell_updata.hpp to support Unicode
13.0.0.
* Updated unicode/updataout.cpp to support Unicode 13. (Support in
advance new script names that will be available in RegExp of
ECMAScript 2020).
20191118: version 2.500:
* Modified basic_regex to hold precomputed tables for icase matching,
instead of creating them from case folding data when its instance is
first created.
* In accordance with the change above, srell_ucfdata.hpp and
ucfdataout.cpp that outputs the former were replaced with
srell_ucfdata2.hpp that holds precomputed tables and ucfdataout2.cpp
that outputs the former.
* Changed the method of character class matching from linear search to
binary search.
* Changed the timing of optimisation of a character class from "when a
closing bracket ']' is found" to "every time a character or
character range is pushed to its character class array".
* Removed all asserts.
* Modified the pattern compiler to interpret sequential \uHHHH escapes
as a Unicode code point value if they represent a valid surrogate
pair. (By this change, incompatibilities with the ECMAScript
specification disappeared.)
* Fixed the position of an endif directive that caused a compiler
error when -DSRELL_NO_NAMEDCAPTURE is specified.
* Updated updataout.cpp to version 1.101.
* Added a standalone version of SRELL in the single-header directory.
20190914: version 2.401:
* Reduced the size of basic_regex. (It was bloated by my carelessness
when support for Unicode property escapes was added).
* Improved basic_regex::swap().
20190907: version 2.400:
* Improved the performance of character class matching.
* Modified the pattern compiler to interpret the \u escape sequence in
the group name in accordance with the ECMAScript specification.
* Updated ucfdataout.cpp to version 1.200. A new member has been added
to the unicode_casefolding class in srell_ucfdata.hpp that
ucfdataout.cpp generates.
Because SRELL 2.400 and later need this added member, they cannot be
used with srell_ucfdata.hpp output by ucfdataout.cpp version 1.101
or earlier. (No problem in using an older version of SRELL with a
newer version of srell_ucfdata.hpp).
* Some clean-ups and improvements.
20190902: version 2.304:
* Fixed regex_iterator that had been broken by the code clean-up in
version 2.303.
20190810: version 2.303:
* Refixed the problem that was fixed in version 2.302 as the fix was
incomplete.
* Cleaned up code.
20190809: version 2.302:
* Bug fix: When (?...) has a quantifier, strings captured by round
brackets inside it were not cleared in each repetition but carried
over to the next loop. For example,
/(?:(ab)|(cd))+/.exec("abcd") returned ["abcd", "ab", "cd"], instead
of ["abcd", undefined, "cd"]. (The latter is correct).
* Updated misc/sample01.cpp to version 1.102. Rewrote the chapter
numbers in accordance with ECMAScript 2019 (ES10).
20190724: version 2.301:
* In accordance with the ECMAScript spec, restricted the characters
which can be escaped by '\', to the following fifteen characters:
^$\.*+?()[]{}|/
Only in the character class, i.e., inside [], '-' also becomes a
member of the group.
20190717: version 2.300:
* Added a feature for specifying the limit until where the automaton
can lookbehind, separated from the beginning of a target sequence.
(Addition of the match_lblim_avail flag to match_flag_type and the
lookbehind_limit member to match_results).
And, lookbehind_limit of match_results being private and used
internally in regex_iterator is also set in its constructor.
* Removed order restriction of capturing parentheses and
backreferences, in accordance with the ECMAScript spec. Now /\1(.)/,
/(?<=(.)\1)/, and /\k<a>(?<a>.)/ are all okay.
* Updated misc/sample01.cpp to version 1.101. Added one compliance
test from misc.js.
20190714: version 2.230:
* Improved the performance of searching when regular expressions begin
with a character or character class followed by a '*' or '+'. (E.g.,
/[A-Za-z]+ing/).
20190707: version 2.221:
* Changed the feature test macro used for checking availability of
std::u8string, from __cpp_char8_t to __cpp_lib_char8_t.
* When icase specified, if all characters in a character class become
the same character as a result of case-folding, the pattern compiler
has been changed to convert the character class to the character
literal (e.g., /r[Ss\u017F]t/i -> /rst/i).
* Fixed a minor issue.
20190617: version 2.220:
* Changed the internal representation of repetition in the case that
it becomes more compact by not using the counter.
* Fixed an optimisation bug that caused searching for /a{1,2}?b/
against "aab" to return "ab" instead of "aab". (Condition: a
character or character class with a non-greedy quantifier is
followed by its exclusive character or character class).
20190613: version 2.210:
* Improved a method of matching for expressions like /ab|cd|ef/ (where
string literals separaterd by '|' begin with a character exclusive
to each other).
20190603: version 2.202:
* Fixed a bug that caused regex_match to behave like regex_search in
the situation where the BMH algorithm is used.
20190531: version 2.200:
* For searching with a ordinary (non-regex) string, added an
implementation based on the Boyer-Moore-Horspool algorithm.
* Improved UTF-8 iterators.
* Fixed behaviours of \b and \B when icase specified, to match /.\B./i
against "s\u017F".
* Fixed minor issues.
20190508: version 2.100:
* Fixed a bug that caused failure of capturing when 1) a pair of
capturing brackets exists in a lookbehind assertion, and 2) variable
length expressions exist in both the left side of and the inside of
the pair of brackets. E.g., given "1053" =~ /(?<=(\d+)(\d+))$/, no
appropriate string was set for $2.
* Updated srell_ucfdata.hpp and srell_updata.hpp to support Unicode
12.1.0.
* Updated unicode/updataout.cpp to support Unicode 12. (Support in
advance a new binary property and new script names that will be
available in RegExp of ECMAScript 2019 and new script names that are
anticipated to be available in RegExp of ECMAScript 2020).
* Changed the newline character in srell.hpp from CR+LF to LF.
* Modified unicode/*.cpp to output LF as a newline instead of CR+LF.
* Updated misc/sample01.cpp to version 1.100:
1. Rewrote the chapter numbers in subtitles of compliance tests, in
accordance with ECMAScript 2018 Language Specification (ES9).
(The old chapter numbers were based on ECMAScript specifications
up to version 5.1).
2. Added one compliance test from ECMAScript 2018 Language
Specification 21.2.2.3, NOTE.
* Modified the macros for detecting C++11 features.
* Changed the method of the character class.
* For all the constructors and assign functions of basic_regex to have
a default argument for flag_type, reimplemented syntax_option_type
and match_flag_type (missed changes between TR1 -> C++11).
* Experimental support for the char8_t type. If a compiler supports
char8_t (detected by the __cpp_char8_t macro), classes whose names
have the "u8-" prefix accept a sequence of char8_t and handle it as
a UTF-8 string. If char8_t is not supported, the classes handle a
sequence of char as a UTF-8 string, as before.
* As classes that always handle a sequence of char as a UTF-8 string,
new classes whose names have the "u8c-" prefix were added. They
correspond to the classes having the "u8-" prefix in their names up
to version 2.002:
* u8cregex; u8ccmatch, u8csmatch; u8ccsub_match, u8cssub_match;
u8ccregex_iterator, u8csregex_iterator; u8ccregex_token_iterator,
u8csregex_token_iterator.
20180717: version 2.002:
* Changed the maximum number of hexdigits in \u{h...} from six to
'unlimited' in accordance with the ECMAScript specification. ("one
to six hexadecimal digits" of the old implementation was based on
the proposal document).
* Updated updataout.cpp to version 1.001. Encounting unknown
(newly-encoded) script names is no longer treated as an error.
* Updated srell_ucfdata.hpp and srell_updata.hpp to support Unicode
11.0.0.
20180204: version 2.001:
* When icase is specified, [\W] (a character class containing \W) no
longer matches any of [KkSs\u017F\u212A] (ecma262 issue #512).
20180127: version 2.000:
* Added the following features that are to be included into RegExp of
ECMAScript 2018:
* New syntax option flag for '.' to match every code point, dotall,
was added to srell::regex_constants as a value of
syntax_option_type and to srell::basic_regex as a value of
flag_type.
* New expressions to support the Unicode property, \p{...} and
\P{...}.
* Named capture groups (?<NAME>...) and the new expression for
backreference to a named capture group, \k<NAME>.
* The behaviors of lookbehind assertions changed. Now both (?<=...)
and (?<!...) support variable-length lookbehind.
20180125; version 1.401:
* Limited the maximum of numbers that are recognised as backreference
in match_results.format() up to 99, in accordance with the
ECMAScript specification. (I.e., restricted to $1..$9 and $01..$99).
* Removed an unused macro and its related code.
20180101; version 1.400:
* Changed the behaviour of the pattern compiler so that an empty
non-capturing group can have a quantifier, for example, /(?:)*/. It
is a meaningless expression, but changed just for compatibility with
RegExp of ECMAScript.
* Fixed a hang bug: This occured when 1) a non-capturing group has a
quantifier, 2) and the length of the group itself can be zero-width,
3) and a backreference that can be zero-width is included in the
group somewhere other than the last, such as /(.*)(?:\1.*)*/.
20171216; version 1.300:
* Fixed an important bug: /^(;[^;]*)*$/ did not match ";;;;" because
of a bug in optimisation. This problem occured when a sequence of
regular expressions ended like /(A...B*)*$/ where a character or
character set that A represents and the one that B represents are
exclusive to each other.
20170621; version 1.200:
* Updated srell_ucfdata.hpp to support Unicode 10.0.0.
* Improved u8regex_traits to handle corrupt UTF-8 sequences more
safely.
20150618; version 1.141:
Updated srell_ucfdata.hpp to support Unicode 8.0.0.
20150517; version 1.140:
* Modified the method for regex_match() to determine whether a
sequence of regular expressions is matched against a sequence of
characters. (Issue raised at #2273 in C++ Standard Library Issues
List).
* Restricted the accepted range of X in the expression "\cX" to
[A-Za-z] in accordance with the ECMAScript specification.
* Fixed the problem that caused parens in a lookaround assertion not
to capture a sequence correctly in some circumstances because the
bug fix done in version 1.111 was imperfect.
20150503; version 1.130:
* Improved case-folding functions.
* Updated unicode/ucfdataout.cpp to version 1.100.
* Fixed a typo in #if directives for u(16|32)[cs]match.
20150425; version 1.120:
* Fixed the bug that caused characters in U+010000-U+10FFFF in UTF-8
(i.e., four octet length characters) not to have been recognised.
* Updated misc/sample01.cpp to version 1.010.
20150402; version 1.111:
* Fixed the problem that caused $2 of "aaa" =~ /((.*)*)/ to be empty
instead of "aaa" because of a bug in optimisation.
20141101; version 1.110:
* Several fixes based on a bug report:
1. Added "this->" to compile() in basic_regex::assign().
2. Implemented operator=() functions explicitly instead of using
default ones generated automatically.
* unicode/ucfdataout.cpp revised and updated to version 1.001.
20140622; version 1.101:
Updated srell_ucfdata.hpp to support Unicode 7.0.0.
20121118; version 1.100:
The first released version.

View file

@ -0,0 +1,421 @@
20220511; version 3.009:
・最適化バグにより /abcd|ab/ が "abc" にマッチしなかった問題を修正。
20220504; version 3.008:
・icase指定時の[^\P{...}]の振る舞いが、TC39で提案中のv-modeのそれに近
いものになっていた問題を修正。
20220429; version 3.007:
・カウンタの仕組みをさらに変更。
20220428; version 3.006:
・繰り返し処理用のカウンタを調整。
・小さな文字クラス用の線形探索を再削除。
20220424; version 3.005:
・multiline指定時に /(?<=$.*)/ が "a" の終わりにマッチしなかった問題を
修正。
・TC39で提案中の\A, \z, (?m:)の準備。
20220420; version 3.004:
・'*' または '+' 付きの文字クラスが後続する文字または文字クラスと排他
的になっていない表現用の最適化処理を追加。例:/[A-Za-z]+ing/,
/".*"/ など。
20220416; version 3.003:
・2つの最適化函数を1つに統合。
・先読み (lookahead)・戻り読み (lookbehind) 用のコード量を削減。
20220416; version 3.002:
・3.000で導入した簡易エントリーポイント選択の使用時に、regex_matchや
match_continuousフラグが指定されたregex_searchが機能しない場合があっ
た問題を修正。
20211025; version 3.001:
・カウンタ分割を廃止。効果がないかむしろ若干速度が低下しているように見
えるため。
・潜在的なバグを修正。
・その他細かな改良など。
20211023; version 3.000:
・srell_ucfdata2.hppとsrell_updata.hppとをUnicode 14.0.0対応に更新。
・unicode/updataout.cppをUnicode 14対応に更新ECMAScript 2022で対応さ
れる見込みのスクリプト名の先行対応)。
・char32_t未対応のコンパイラでUnicode値を保持するため内部で使用する型
を「21ビット以上あるunsigned整数型」から「32ビット以上あるunsigned整
数型」に変更。
・char32_t未対応のコンパイラで繰り返し回数や文字クラス番号を保持するの
に使う型を「unsigned int」から「32ビット以上あるunsigned整数型」に変
更。
・数値用パーザにoverflowチェックを追加。例unsigned int型が32ビットの
幅の時、前の版まで /a{0,4294967297}/ は /a{0,1}/ 相当になってしまっ
ていましたが、前記のチェックを入れたことによりこのような場合には
error_braceがthrowされるようになっています。
・非multilineモード時に /[^;]*^;?/ が入力文字列の先頭にマッチしなかっ
たバグを修正。
・ごく簡易なエントリーポイント選択を実装。
20211004; version 2.930:
・WCHAR_MAXの値に基づいてUTF-16/UTF-32対応が切り替わるu1632w-型を新規
に追加WCHAR_MAXが0xFFFF以上・0x10FFFF未満ならu1632w-型はu16w-型の
別名となり、WCHAR_MAXが0x10FFFF以上ならu1632w-型はu32w-型の別名とな
ります)。
・Eytzinger layout検索時に使われるメモリ使用量を削減。
・その他細かな改良などいくつかはNIREに対するMarko Njezic氏の改善案に
基づきます)。
20210624; version 2.920:
・?{0,1}相当)用の最適化処理を追加。
・misc/sample01.cpp内で参照しているECMAScript仕様書の版を2021に変更。
20210429; version 2.912:
・2.900で導入した最適化処理のバグにより /aa|a|aa/ が "a" にマッチしな
くなっていた問題を修正報告してくださったJan Schrötter氏に感謝しま
す)。
ちなみにこの最適化処理は、srell.hppをincludeする前に
SRELLDBG_NO_BRANCH_OPT2マクロを定義しておくと無効化できます。
20210424; version 2.911:
・2.900で導入した最適化処理内の不用意な行削除が原因で、/abc|ab|ac/ が
"ac" に対してマッチしなくなっていた問題を修正(バグ報告に感謝します)。
20210407; version 2.910:
・2.900以降、パターンコンパイラ内部でmove代入演算子が使われる時にメモ
リリークしていた問題を修正報告してくださったMichal Švec氏に感謝し
ます)。
20210214; version 2.901:
・不要なテンプレートの特殊化を削除。
20210214; version 2.900:
・文字列のみからなる選択(例:/abc|abd|acde/)用の最適化処理を新規に追
加。
・u(8|16)[cs]regex_(token_)?iteratorがコンパイルエラーとなり使用できな
かった問題を修正。
・その他細かな改良など。
20210131; version 2.810:
・UTF-8用内部iteratorの改良。
20200724; version 2.800:
・文字クラスの二分探索にEytzinger layoutを導入。
・小さな文字クラス用に線形探索を再実装。
・名前付き括弧の名前部分をパーズするためのプロパティーデータの扱いを変
更。basic_regex型インスタンス内に読み込むのを止めて、必要な時のみ読
み込むように。
20200714; version 2.730:
・入れ子になった捕獲括弧で冗長な退避・復元処理をせぬように変更。
・regex_iteratorの改良。
20200703; version 2.720:
・非ASCII文字を含むUTF-8文字列または非BMPの文字を含むUTF-16文字列を、
Boyer-Moore-Horspoolアルゴリズムを用いて、大文字小文字の区別無しで
(icase/case-insensitiveで) 検索する場合の処理の改良。
・Version 2.650での変更により、regex_iterator->prefix().firstが前回マ
ッチした位置の終端ではなく文字列全体の最初を指すようにになってしまっ
ていたのを修正。
・上記修正に合わせて3イテレータ版のregex_search()が呼ばれる場合、
match_results.position()は戻り読みの逆行限界として渡された位置
regex_searchの第3引数を起点とした位置を返し、
match_results.prefix().firstは検索開始位置同第1引数を指すように
変更。
・BMH検索時に、不正なUTF-8シークウェンスの前後にある有効なシークウェン
スが読み飛ばされてしまう問題を修正2.630でUTF-8の処理方法を変えた時
に混入したバグ)。
20200701; version 2.710:
・Boyer-Moore-Horspool検索の調整。
20200630; version 2.700:
・最適化処理の調整。
20200620: version 2.651:
・グループ名のチェックを行う位置を\uエスケープの解釈後に移動。
・misc/sample01.cppをversion 1.103に更新。参照しているECMAScript仕様書
の版を2020(ES11)に変更。
20200618: version 2.650:
・名前付き括弧に捕獲された文字列へのアクセス用函数に、グループ名をポイ
ンタで指定するoverloadをmatch_resultsに追加。
・3イテレータ版のregex_search()使用時には、検索の開始位置ではなく戻り
読み (lookbehind) の逆行限界として渡された位置のほうを
match_results::prefix::firstにセットするよう変更。
・不要と思われる処理をいくつか削除。
20200601: version 2.643:
・syntax_option_typeおよびmatch_flag_typeのoperator函数にinline指定を
追加(これがないとリンク時に多重定義エラーが出ることがあるとのご指摘
がありました)。
・その他細かな改良など。
20200530: version 2.642:
・basic_regex型インスタンスが確保するメモリのサイズを削減。
20200528: version 2.641:
・2.640での修正1が不完全であったため再修正。
・最適化処理の調整。
20200516: version 2.640:
・最適化バグの修正1: regex_matchが入力文字列の終端を通り過ぎてしまうこ
とがあった問題を修正。
・最適化バグの修正2: multilineフラグ指定時に ^ や $ が適切な位置でのマ
ッチングをさせてもらえなくなってしまっていた問題を修正。
・srell_ucfdata2.hppとsrell_updata.hppとを更新。
20200509: version 2.630:
・正規表現中に不正なUTF-8のシークウェンスがあった場合、パターンコンパ
イラがregex_utf8をthrowするように仕様変更検索対象文字列中に不正な
UTF-8の並びがあってもエラー扱いされません
・UTF-8でBMH検索が行われる際、マッチした箇所の直後に余分な後続
(trailing) バイトが続いていた場合にその部分もマッチング結果に含めて
しまう問題を修正。
・basic_regex.flags() が正しい値を返さないことがあったのを修正。
・正規表現中で実際には使われていないグループ名 (NAME) を
match_results.format()に渡す書式文字列の中で$<NAME>のようにして指定
すると、その部分が空文字に置換されずそのまま残ってしまう問題を修正。
20200502: version 2.620:
・Boyer-Moore-Horspoolアルゴリズム用クラスからmatch_continuous指定時用
およびregex_match用の函数を削除。これらの処理時は以前のようにオート
マトンを使うように変更。
・その他クリーンナップ。
20200428: version 2.611:
・/\d*/ が "abc" の冒頭にマッチせず末尾にマッチする問題を修正Version
2.210で混入したバグ)。
20200426: version 2.610:
・Case-insensitive (icase) なBMH検索が行われる際、探している文字列が検
索対象テキスト全体の先頭にあった場合に読み飛ばされてしまうことがある
バグを修正UTF-8またはUTF-16で、検索文字列の末尾が複数のコードユニ
ットからなる文字である場合に発生)。
・キャプチャグループ名のパーズをECMAScriptの仕様書通りきっちり行うよう
に変更。これにより、前の版までは受理されていた /(?<,>...)/ のような
グループ名はregex_errorがthrowされるように。
20200418: version 2.600:
・戻り読み (lookbehind) の逆行限界を直接regex_search()に渡せるように
3イテレータ版のregex_search()を追加。
・[非互換変更] 2.300で導入したmatch_flag_typeのmatch_lblim_availフラグ
と、match_resultsのlookbehind_limitメンバとを廃止。
・srell_ucfdata2.hppとsrell_updata.hppとをUnicode 13.0.0対応に更新。
・unicode/updataout.cppをUnicode 13対応に更新ECMAScript 2020で対応さ
れる見込みのスクリプト名の先行対応)。
20191118: version 2.500:
・初めてbasic_regex型インスタンスが作られた時にcase foldingデータから
icaseマッチング用テーブルを展開するのに代えて、最初から計算済みテー
ブルを保持しているように仕様変更。
・上記変更に併せてsrell_ucfdata.hppおよびそれを出力するucfdataout.cpp
はお役御免とし、代わりに展開済みicase用テーブルを保持する
srell_ucfdata2.hppとそれを出力するucfdataout2.cppとを追加。
・文字クラスの照合方法を線形探索から二分探索に変更。
・文字クラスの最適化処理のタイミングを「']' が見つかった時にまとめて一
括」から「文字または文字コードの範囲をpushするたびごと逐次」に変更。
・assertをすべて削除。
・連続する\uHHHHがサロゲートペアをなしている場合はUnicode値として解釈
するように変更これによりECMAScript仕様との相違はなくなりました
・SRELL_NO_NAMEDCAPTUREマクロ使用時にコンパイルエラーが出ていたのを修
正。
・updataout.cppを1.101にヴァージョンアップ。
・単体版のsrellを追加single-headerディレクトリ内
20190914: version 2.401:
・basic_regex型インスタンスのサイズを削減Unicode property escapes対
応時にうっかり膨張させてしまっていました)。
・basic_regex::swap()の改良。
20190907: version 2.400:
・文字クラスの照合速度を改善。
・パターンコンパイル時にグループ名中の\uエスケープを解釈するように変更
ECMAScriptの仕様に準拠
・ucfdataout.cppを1.200にヴァージョンアップ。このプログラムが出力する
srell_ucfdata.hpp中のunicode_casefoldingクラスに、新たにメンバ変数が
追加されました。
SRELL 2.400以降はこの追加されたメンバ変数をコンパイル時に必要とする
ため、ucfdataout.cpp 1.101以前によって出力されたsrell_ucfdata.hppを
SRELL 2.400以降で使うことはできません古いSRELLで新しい
srell_ucfdata.hppを使うことは可
・その他コードの整理や改良など。
20190902: version 2.304:
・Version 2.303のコード整理で壊れてしまっていたregex_iteratorを修復。
20190810: version 2.303:
・2.302の修正が不完全であったため再修正。
・その他コードの整理。
20190809: version 2.302:
・(?...) に繰り返し指定がついている時、内側の括弧によって捕獲された文
字列がループごとにクリアされず持ち越されていたバグを修正。
例:/(?:(ab)|(cd))+/.exec("abcd") → 1番括弧はundefinedになるはずが
"ab"になってしまっていた。
・misc/sample01.cppをversion 1.102に更新。テスト名中の章番号を
ECMAScript 2019 (ES10) 準拠に変更
20190724: version 2.301:
・ECMAScriptの仕様に準じて、\でエスケープ可能な文字の種類を次の15字に
限定。^$\.*+?()[]{}|/
文字クラス内([]内ではこの15字に加えて '-' も対象に。
20190717: version 2.300:
・検索対象範囲とは別に、戻り読み (lookbehind) の逆行限界を指定できる機
能を追加match_flag_typeへのmatch_lblim_availフラグの追加と
match_resultsへのlookbehind_limitメンバの追加
これに併せてregex_iteratorのコンストラクタ内でも、内部で使うprivate
なmatch_results型インスタンスのlookbehind_limitメンバに値を設定する
ように変更。
・ECMAScriptの仕様に合わせて、後方参照が対応する捕獲括弧より先に出現し
てもエラー扱いせぬように変更。/\1(.)/, /(?<=(.)\1)/, /\k<a>(?<a>.)/
などすべてOKに。
・misc/sample01.cppをversion 1.101に更新。misc.jsより準拠テストを1つ追
加。
20190714: version 2.230:
・正規表現が '*' か '+' かを伴う文字または文字クラスで始まる場合の検索
速度を改善(例:/[A-Za-z]+ing/)。
20190707: version 2.221:
・std::u8stringの利用可否は__cpp_char8_tではなく__cpp_lib_char8_tを用
いて判断するように変更。
・icase指定時にcase-folding処理をした結果、文字クラス内の文字がすべて
同じ文字になった場合には、文字クラスを解消して文字リテラルとして処理
するように変更。例:/r[Ss\u017F]t/i → /rst/i。
・その他問題を修正。
20190617: version 2.220:
・カウンタを使わぬほうが内部表現がコンパクトになる繰り返しはカウンタを
使わぬように変更。
・最適化バグにより、/a{1,2}?b/.exec("aab") が "aab" ではなく "ab" を返
していたのを修正(発生条件:最短一致優先の回数指定が付いている文字ま
たは文字クラスの後ろに、その文字集合と排他的な文字または文字クラスが
続いている場合)。
20190613: version 2.210:
・/ab|cd|ef/ のような表現('|' で区切られている文字列の先頭文字が互い
に排他的な場合)の照合方法を改良。
20190603: version 2.202:
・BMHアルゴリズムが使われる状況で、regex_matchがregex_search相当の処理
をしてしまうバグを修正。
20190531: version 2.200:
・通常の正規表現ではないテキスト検索用に、Boyer-Moore-Horspoolアル
ゴリズムに基づく実装を追加。
・UTF-8用iteratorの改良。
・icase指定時の\b/\Bの挙動を修正。/.\B./i が "s\u017F" にマッチするよ
うに。
・その他問題を修正。
20190508: version 2.100:
・Lookbehind中に文字列のキャプチャがあり、かつその中および左方に可変長
の正規表現があった場合、文字列の捕獲に失敗することがあったのを修正。
例:"1053" =~ /(?<=(\d+)(\d+))$/ で$2に適切な文字列がセットされず。
・srell_ucfdata.hppとsrell_updata.hppとをUnicode 12.1.0対応に更新。
・unicode/updataout.cppをUnicode 12対応に更新ECMAScript 2020で対応さ
れる見込みのスクリプト名の先行対応)。
・srell.hpp中の改行コードをCR+LFからLFに変更。
・unicode/*.cppが出力するファイルの改行コードをCR+LFからLFに変更。
・misc/sample01.cppをversion 1.010に更新。
1. テスト名中の章番号をECMAScript 2018 (ES9) 準拠に変更(前版までは
ECMAScript 5.1までの章番号準拠でした)。
2. ECMAScript 2018規格の2.2.2.3 NOTEから準拠テストを1つ追加。
・C++11の機能の使用可否を判定するマクロを変更。
・文字クラスの処理方法を変更。
・basic_regexの全コンストラクタと全assign函数とでflag_typeのdefault引
数を指定できるように、syntax_option_typeとmatch_flag_typeとを再実装
TR1→C++11間の変更の見落とし
・char8_t型に試験対応。コンパイラがchar8_tに対応している場合
__cpp_char8_tマクロ定義の有無で判断、"u8-"というprefixの付いた
クラスは「char8_t型文字列を受け取り、それをUTF-8として扱う」ように。
char8_tに未対応の場合は従来通り、char型文字列をUTF-8として処理。
・常に「char型文字列をUTF-8として扱う」クラスとして新規に"u8c-"という
prefixに付いたクラスを追加。2.002までの"u8-"付きクラス相当。
・u8cregex; u8ccmatch, u8csmatch; u8ccsub_match, u8cssub_match;
u8ccregex_iterator, u8csregex_iterator; u8ccregex_token_iterator,
u8csregex_token_iterator.
20180717: version 2.002:
・ECMAScriptの仕様に合わせて \u{h...} の h... 部分の最大桁数を6から無
制限に変更変更前の16桁というのは提案書に基づく実装でした
・updataout.cppを1.001に更新。新規に追加されたスクリプト名をエラー扱い
せぬように修整。
・srell_ucfdata.hppとsrell_updata.hppとをUnicode 11.0.0対応に更新。
20180204: version 2.001:
・icase指定時に、[\W]\Wを含む文字classが [KkSs\u017F\u212A] のいず
れにもマッチせぬよう変更関連ecma262 issue #512
20180127; version 2.000:
・ECMAScript 2018のRegExpに追加されることになった次の機能を実装:
・'.' があらゆるコードポイントにマッチするようにするための指定
"dotall" フラグを、srell::regex_constants内の syntax_option_type
および srell::basic_regex内の flag_type に追加。
・Unicode property用の表現、\p{...} と \P{...} とを追加。
・名前付きキャプチャ (?<NAME>...) と、名前付きキャプチャによって捕獲
された文字列を後方参照するための正規表現、\k<NAME> とを追加。
・戻り読み (lookbehind) の振る舞いを変更。(?<=...), (?<!...) とも可変
幅の戻り読みに対応。
20180125; version 1.401:
・ECMAScriptの仕様に合わせて、match_results.format()内で後方参照として
認識される数値を99までに制限即ち$1$9および$01$99のみ有効
・長い間メンテナンスしていないマクロを削除。
20180101; version 1.400:
・/(?:)*/ のように、空のnon-capturingグループにも量指定子を付けられる
ように変更ECMAScriptのRegExpとの互換性確保のための変更で、使い道は
おそらくありません)。
・次の3条件が揃った時に固まってしまったのを修正: 1) non-capturingグル
ープに量指定子が付いていて、2) そのグループ自身が0幅になり得て、3)
そのグループ内の最後以外の場所に、0幅になり得る後方参照が現れる時。
たとえば /(.*)(?:\1.*)*/ のような表現。
20171216; version 1.300:
・最適化処理のバグにより、/^(;[^;]*)*$/ が ";;;;" にマッチしなかった問
題を修正。この問題の発生条件は次の通り:
・/(A...B*)*$/ のような終わり方をしていて、かつAとBとが互いに排他的
な文字または文字集合である場合。
20170621; version 1.200:
・srell_ucfdata.hppをUnicode 10.0.0対応に。
・不正なUTF-8 sequenceに対するu8regex_traitsの振る舞いを改善。
20150618; version 1.141:
srell_ucfdata.hppをUnicode 8.0.0対応に。
20150517; version 1.140:
・regex_match()がマッチの成否を判定する方法の変更。
C++ Standard Library Issues List #2273 への対応)
・ECMAScriptの仕様に合わせて \cX の X の範囲を [A-Za-z] に制限。
・look-around assertions中の丸括弧が、ある条件下で正しく文字列をキャプ
チャせぬ場合があった問題を修正。Version 1.111での修正が不完全であっ
たことによるもの。
20150503; version 1.130:
・case-folding用函数の改善。
・unicode/ucfdataout.cppをversion 1.100に。
・u(16|32)[cs]match用の#if directives中にあったtypoを修正。
20150425; version 1.120:
・UTF-8文字列においてU+010000-U+10FFFFの範囲の文字4オクテット長の文
字)が認識されぬバグを修正。
・misc/sample01.cppをversion 1.010に。
20150402; version 1.111:
・最適化処理のバグにより、"aaa" =~ /((.*)*)/ の $2 が "aaa" ではなく空
になってしまう問題を修正。
20141101; version 1.110:
・バグ報告による修正:
1. basic_regex::assign() 内の compile() に "this->" を追加。
2. operator=() 函数を明示的に実装。
・unicode/ucfdataout.cppをversion 1.001 に。
20140622; version 1.101:
srell_ucfdata.hppをUnicode 7.0.0対応に。
20121118; version 1.100:
最初のリリース版。

View file

@ -0,0 +1,32 @@
/*****************************************************************************
**
** SRELL (std::regex-like library) version 3.009
**
** Copyright (c) 2012-2022, Nozomu Katoo. All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions are
** met:
**
** 1. Redistributions of source code must retain the above copyright notice,
** this list of conditions and the following disclaimer.
**
** 2. Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
** IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
** THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
** PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
** EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
** PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
** LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
** NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**
******************************************************************************
**/

View file

@ -0,0 +1,379 @@
//
// A sample program for SRELL (tests and benchmarks).
// 2021/06/24; version 1.104
//
// Macro Options:
// -DSTD_REGEX: std::regex used.
// -DBOOST_REGEX: boost::regex used.
// -DBOOST_XPRESSIVE: boost::xpressive used.
// unspecified or others: SRELL used.
//
#include <cstdio>
#include <cstring>
#include <ctime>
#include <string>
#include <vector>
#if defined(STD_REGEX)
#include <regex>
#define RE_PREFIX std
#pragma message("std::regex selected.")
#elif defined(BOOST_REGEX)
#include <boost/regex.hpp>
#define RE_PREFIX boost
#pragma message("boost::regex selected.")
#elif defined(BOOST_XPRESSIVE)
#include <boost/xpressive/xpressive.hpp>
#define RE_PREFIX boost::xpressive
#pragma message("boost::xpressive selected.")
#else
#include "../srell.hpp"
#define RE_PREFIX srell
#pragma message("srell selected.")
#endif
bool test(const std::string &str, const std::string &exp, const unsigned int max, const std::vector<std::string> *const expected = NULL)
{
#if !defined(BOOST_XPRESSIVE)
RE_PREFIX::regex re;
#else
boost::xpressive::cregex re;
#endif
RE_PREFIX::cmatch mr;
bool b = false;
unsigned int num_of_failures = 0;
try
{
std::string matched;
std::string msg;
#if !defined(BOOST_XPRESSIVE)
re.assign(exp, RE_PREFIX::regex::ECMAScript);
#else
re = boost::xpressive::cregex::compile(exp, boost::xpressive::cregex::ECMAScript | boost::xpressive::cregex::not_dot_newline);
#endif
const clock_t st = std::clock();
for (unsigned int i = 0; i < max; i++)
#if !defined(BOOST_REGEX)
b = RE_PREFIX::regex_search(str.c_str(), str.c_str() + str.size(), mr, re);
#else
b = RE_PREFIX::regex_search(str.c_str(), str.c_str() + str.size(), mr, re, boost::regex_constants::match_not_dot_newline);
#endif
const clock_t ed = std::clock();
// std::fprintf(stdout, "\t\"%s\" =~ /%s/\n", str.c_str(), exp.c_str()); // Perl 5 style.
std::fprintf(stdout, "\t/%s/.exec(\"%s\");\n", exp.c_str(), str.c_str()); // ECMAScript style.
if (max > 1)
std::fprintf(stdout, "\t%u times\n", max);
std::fprintf(stdout, "\t%s (%ld msec)\n", b ? "Found" : "Not Found", static_cast<long>(static_cast<double>(ed - st) * 1000 / CLOCKS_PER_SEC));
for (RE_PREFIX::cmatch::size_type i = 0; i < mr.size(); ++i)
{
if (i)
std::fprintf(stdout, "\t$%u = ", i);
else
std::fputs("\t$& = ", stdout);
if (mr[i].matched)
{
matched = mr[i].str();
msg = '"' + matched + '"' + " (%u+%u)";
}
else
msg = matched = "(undefined)";
if (expected)
{
if (i < expected->size())
{
if (matched == expected->operator[](i))
msg += "; passed!";
else
{
msg += "; failed... (expected: \"" + expected->operator[](i) + "\")";
++num_of_failures;
}
}
else
{
msg += "; failed..."; // should not exist.
++num_of_failures;
}
}
msg += '\n';
std::fprintf(stdout, msg.c_str(), mr.position(i), mr.length(i));
}
if (!num_of_failures && expected->size() != mr.size())
++num_of_failures;
std::fprintf(stdout, "Result: %s.\n\n", num_of_failures ? "failed" : "passed");
return num_of_failures == 0;
}
catch (const RE_PREFIX::regex_error &e)
{
std::fprintf(stdout, "Error (regex_error): %d \"%s\"\n\n", e.code(), e.what());
}
catch (const std::exception &e)
{
std::fprintf(stdout, "Error (std::exception): \"%s\"\n\n", e.what());
}
return false;
}
int main()
{
const unsigned int count = 100000;
std::string exp;
std::string str;
std::vector<std::string> expected;
unsigned int num_of_tests = 0;
unsigned int num_of_tests_passed = 0;
unsigned int num_of_benches = 0;
unsigned int num_of_benches_passed = 0;
std::fputs("Test 1 (ECMAScript 2021 Language Specification 22.2.2.3, NOTE)\n", stdout);
str = "abc";
exp = "((a)|(ab))((c)|(bc))";
expected.resize(7);
expected[0] = "abc";
expected[1] = "a";
expected[2] = "a";
expected[3] = "(undefined)";
expected[4] = "bc";
expected[5] = "(undefined)";
expected[6] = "bc";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 2a (ECMAScript 2021 Language Specification 22.2.2.5.1, NOTE 2)\n", stdout);
str = "abcdefghi";
exp = "a[a-z]{2,4}";
expected.resize(1);
expected[0] = "abcde";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 2b (ECMAScript 2021 Language Specification 22.2.2.5.1, NOTE 2)\n", stdout);
str = "abcdefghi";
exp = "a[a-z]{2,4}?";
expected[0] = "abc";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 3 (ECMAScript 2021 Language Specification 22.2.2.5.1, NOTE 2)\n", stdout);
str = "aabaac";
exp = "(aa|aabaac|ba|b|c)*";
expected.resize(2);
expected[0] = "aaba";
expected[1] = "ba";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 4 (ECMAScript 2021 Language Specification 22.2.2.5.1, NOTE 3)\n", stdout);
str = "zaacbbbcac";
exp = "(z)((a+)?(b+)?(c))*";
expected.resize(6);
expected[0] = "zaacbbbcac";
expected[1] = "z";
expected[2] = "ac";
expected[3] = "a";
expected[4] = "(undefined)";
expected[5] = "c";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 5a (ECMAScript 2021 Language Specification 22.2.2.5.1, NOTE 4)\n", stdout);
str = "b";
exp = "(a*)*";
expected.resize(2);
expected[0] = "";
expected[1] = "";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 5b (ECMAScript 2021 Language Specification 22.2.2.5.1, NOTE 4)\n", stdout);
str = "baaaac";
exp = "(a*)b\\1+";
expected[0] = "b";
expected[1] = "";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 6a (ECMAScript 2021 Language Specification 22.2.2.8.2, NOTE 2)\n", stdout);
str = "baaabac";
exp = "(?=(a+))";
expected[0] = "";
expected[1] = "aaa";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 6b (ECMAScript 2021 Language Specification 22.2.2.8.2, NOTE 2)\n", stdout);
str = "baaabac";
exp = "(?=(a+))a*b\\1";
expected[0] = "aba";
expected[1] = "a";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 7 (ECMAScript 2021 Language Specification 22.2.2.8.2, NOTE 3)\n", stdout);
str = "baaabaac";
exp = "(.*?)a(?!(a+)b\\2c)\\2(.*)";
expected.resize(4);
expected[0] = "baaabaac";
expected[1] = "ba";
expected[2] = "(undefined)";
expected[3] = "abaac";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
std::fputs("Test 8 (from https://github.com/tc39/test262/tree/master/test/built-ins/RegExp/lookBehind/misc.js)\n", stdout);
str = "abc";
exp = "(abc\\1)";
expected.resize(2);
expected[0] = "abc";
expected[1] = "abc";
if (test(str, exp, 1, &expected))
++num_of_tests_passed;
++num_of_tests;
#ifndef SKIP_BENCHMARK
std::fputs("Benchmark 01\n", stdout);
//0123456
str = "aaaabaa";
exp = "^(.*)*b\\1$";
expected.resize(2);
expected[0] = "aaaabaa";
expected[1] = "aa";
if (test(str, exp, count, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 02\n", stdout);
//012345678
str = "aaaabaaaa";
exp = "^(.*)*b\\1\\1$";
expected[0] = "aaaabaaaa";
expected[1] = "aa";
if (test(str, exp, count, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 03\n", stdout);
//01
str = "ab";
exp = "(.*?)*b\\1";
expected[0] = "b";
expected[1] = "";
if (test(str, exp, count * 10, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 04\n", stdout);
//01234567
str = "acaaabbb";
exp = "(a(.)a|\\2(.)b){2}";
expected.resize(4);
expected[0] = "aaabb";
expected[1] = "bb";
expected[2] = "(undefined)";
expected[3] = "b";
if (test(str, exp, count * 10, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 05\n", stdout);
str = "aabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbaaaaaa";
exp = "(a*)(b)*\\1\\1\\1";
expected.resize(3);
expected[0] = "aabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbaaaaaa";
expected[1] = "aa";
expected[2] = "b";
if (test(str, exp, count, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 06a\n", stdout);
str = "aaaaaaaaaab";
exp = "(.*)*b";
expected.resize(2);
expected[0] = "aaaaaaaaaab";
expected[1] = "aaaaaaaaaa";
if (test(str, exp, count * 10, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 06b\n", stdout);
str = "aaaaaaaaaab";
exp = "(.*)+b";
if (test(str, exp, count * 10, &expected)) // the same results expected.
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 06c\n", stdout);
str = "aaaaaaaaaab";
exp = "(.*){2,}b";
expected[1] = "";
if (test(str, exp, count * 10, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 07\n", stdout);
str = "aaaaaaaaaabc";
exp = "(?=(a+))(abc)";
expected.resize(3);
expected[0] = "abc";
expected[1] = "a";
expected[2] = "abc";
if (test(str, exp, count, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 08\n", stdout);
str = "1234-5678-1234-456";
exp = "(\\d{4}[-]){3}\\d{3,4}";
expected.resize(2);
expected[0] = "1234-5678-1234-456";
expected[1] = "1234-";
if (test(str, exp, count * 5, &expected))
++num_of_benches_passed;
++num_of_benches;
std::fputs("Benchmark 09\n", stdout);
str = "aaaaaaaaaaaaaaaaaaaaa";
exp = "(.*)*b";
expected.resize(0);
if (test(str, exp, 1, &expected))
++num_of_benches_passed;
++num_of_benches;
#endif // !defined(SKIP_BENCHMARK)
std::fprintf(stdout, "Results of tests: %u/%u passed.\n", num_of_tests_passed, num_of_tests);
std::fprintf(stdout, "Results of benchmarks: %u/%u passed.\n", num_of_benches_passed, num_of_benches);
return 0;
std::fputs("Benchmark 10\n", stdout);
str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz";
exp = "(x+y*)+a";
test(str, exp, 1);
return 0;
}

View file

@ -0,0 +1,21 @@
How to Use
Put the following three files in one directory, and include srell.hpp.
1. srell.hpp
2. srell_ucfdata2.hpp (data for case folding)
3. srell_updata.hpp (data for Unicode properties)
The files in the following directories are supplements. As SRELL does not use
them, it is safe to remove them.
* misc
Contains a source code file for a simple test and benchmark program.
* single-header
Contains a standalone version of srell.hpp into which srell_ucfdata2.hpp
and srell_updata.hpp have been merged.
* unicode
Contains source code files for programs that generate srell_ucfdata.hpp and
srell_update.hpp from latest Unicode data text files.

View file

@ -0,0 +1,23 @@
■使用法
次のファイルを同じディレクトリに置き、srell.hppをincludeするだけです。
・srell.hpp
・srell_ucfdata2.hppcase folding用データ
・srell_updata.hppUnicode property用データ
■付属物
以下のディレクトリ内にあるものはおまけのようなものです。
SRELL側からは参照していませんので、削除してしまってもライブラリの動作に
影響はありません。
・misc
簡単なテスト及びベンチマークプログラムのソースが入っています。
・single-header
srell.hppの中にsrell_ucfdata2.hppとsrell_updata.hppとを統合してしまい、
これ単体で使用できるようにしたstandalone版が入っています。
・unicode
最新のUnicodeデータからsrell_ucfdata.hpp及びsrell_updata.hppを作るため
のプログラムのソースが入っています。

File diff suppressed because it is too large Load diff

9868
lib/srell3_009/srell.hpp Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
Contents of this directory:
1. ucfdataout2.cpp
2. updataout.cpp
----
1. ucfdataout2.cpp
This is a C++ source file for a program that generates a newer version
of srell_ucfdata2.hpp, which is a C++ source file that SRELL 2.500- includes
for case-folding. It is generated by ucfdataout with CaseFolding.txt provided
by the Unicode Consortium.
+---------------------------------------------------------------------------
| What is CaseFolding.txt?
|
| It is a data file needed for case-insensitive matching based on the
| Unicode Standard. Whenever a new version of the Unicode Standard is
| released, CaseFolding.txt may also be updated accordingly.
|
+---------------------------------------------------------------------------
1-1. Usage
1) compile ucfdataout2.cpp,
2) get the latest version of CaseFolding.txt, which is available at
http://www.unicode.org/Public/UNIDATA/CaseFolding.txt ,
3) put CaseFolding.txt and a binary file generated at 1) in the same
directory and run the binary file,
4) move the newly generated "srell_ucfdata2.hpp" to the directory in where
SRELL is put.
1-2. Compatibility
srell_ucfdata2.hpp is not compatible with srell_ucfdata.hpp that SRELL up
to 2.401 was using.
----
2. updataout.cpp
This is a C++ source file for a program that generates a newer version
of srell_updata.hpp, which is a C++ source file that SRELL includes for
the Unicode property escapes (\p{...} and \P{...}). It is generated by
updataout with the following text files provided by the Unicode Consortium:
* DerivedCoreProperties.txt
* DerivedNormalizationProps.txt
* emoji-data.txt
* PropList.txt
* ScriptExtensions.txt
* Scripts.txt
* UnicodeData.txt
As well as CaseFolding.txt mentioned above, these files may be updated
accordingly whenever a new version of the Unicode Standard is released.
2-1. Usage
1) compile updataout.cpp,
2) get the latest versions of the text files mentioned above, which are
available at:
a. emoji-data.txt: http://www.unicode.org/Public/UNIDATA/emoji/
b. others: http://www.unicode.org/Public/UNIDATA/ ,
3) put the text files and a binary file generated at 1) in the same
directory and run the binary file,
4) move the newly generated "srell_updata.hpp" to the directory in where
SRELL is put.
Note: emoji-data.txt has been moved from /Public/UNIDATA/ to
/Public/emoji/(version number)/ since Unicode 11.0.0.
Since Unicode 13.0.0, moved to /Public/UNIDATA/emoji/ .
2-2. Compatibility
srell_updata.hpp does not have compatibility issues as of this release.

View file

@ -0,0 +1,84 @@
■同梱物について
1. ucfdataout2.cpp
2. updataout.cpp
----
1. ucfdataout2.cpp
srell_ucfdata2.hppの最新版を作成するプログラムのソースファイルです。SRELLの
2.5以降はcase-insensitiveな大文字小文字の違いを無視した照合を行うために、
このsrell_ucfdata2.hppを必要とします。
ucfdataout2は、Unicode Consortiumより提供されているCaseFolding.txtというテキ
ストデータからsrell_ucfdata2.hppを自動生成します。
+---------------------------------------------------------------------------
| CaseFolding.txtとは
|
| Case-insensitiveな照合を行う際には、大文字小文字の違いを吸収するために
| "case-folding" と呼ばれる処理が行われます。Unicode規格に基づいた
| case-foldingを行うために、Unicode Consortiumから提供されているのが
| CaseFolding.txtです。
|
| このデータファイルはUnicode規格がアップデートされるとそれに合わせて
| アップデートされる可能性があります。
|
+---------------------------------------------------------------------------
1-1. 使用方法
1) ucfdataout2.cppをコンパイルします。
2) 最新版のCaseFolding.txtを次のURLより取得します。
http://www.unicode.org/Public/UNIDATA/CaseFolding.txt ,
3) CaseFolding.txtと、1)で作成したバイナリとを同じフォルダに置いて
バイナリを実行します。
4) srell_ucfdata2.hppが生成されますので、それをSRELLの置かれているディレク
トリへと移動させます。
1-2. 互換性
srell_ucfdata2.hppは、SRELL 2.401までが利用していたsrell_updata.hppと互換
性がありません。
----
2. updataout.cpp
srell_updata.hppの最新版を作成するプログラムのソースファイルです。SRELLは
Unicode property escapes\p{...} と \P{...})を含む正規表現と文字列との照合
を行うために、このsrell_updata.hppを必要とします。
updataoutは、Unicode Consortiumより提供されている次のテキストデータから
srell_updata.hppを自動生成します。
・DerivedCoreProperties.txt
・DerivedNormalizationProps.txt
・emoji-data.txt
・PropList.txt
・ScriptExtensions.txt
・Scripts.txt
・UnicodeData.txt
先述のCaseFolding.txt同様、これらのテキストデータファイルもUnicode規格が
アップデートされるとそれに合わせてアップデートされる可能性があります。
2-1. 使用方法
1) updataout.cppをコンパイルします。
2) 前記テキストファイルの最新版を次のURLより取得します。
a. emoji-data.txt: http://www.unicode.org/Public/UNIDATA/emoji/
b. それ以外: http://www.unicode.org/Public/UNIDATA/
3) これらのテキストファイルと、1)で作成したバイナリとを同じフォルダに
置いてバイナリを実行します。
4) srell_updata.hppが生成されますので、それをSRELLの置かれているディレク
トリへと移動させます。
補註: Unicode 11.0.0以降、emoji-data.txt は /Public/UNIDATA/ から
/Public/emoji/(ヴァージョン番号)/ へ移されました。
さらに Unicode 13.0.0以降、/Public/UNIDATA/emoji/ へ移されました。
2-2. 互換性
srell_updata.hpp には非互換となるような変更はこれまでのところ加えられてい
ません。

View file

@ -0,0 +1,590 @@
//
// ucfdataout.cpp: version 2.100 (2020/05/13).
//
// This is a program that generates srell_ucfdata.hpp from CaseFolding.txt
// provided by the Unicode Consortium. The latese version is available at:
// http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
//
#include <cstdio>
#include <cstdlib>
#include <string>
#include <map>
#include "../srell.hpp"
#if defined(_MSC_VER) && _MSC_VER >= 1400
#pragma warning(disable:4996)
#endif
namespace unishared
{
template <const std::size_t BufSize, typename Type>
std::string stringify(const Type value, const char *const fmt)
{
char buffer[BufSize];
std::sprintf(buffer, fmt, value);
return std::string(buffer);
}
bool read_file(std::string &str, const char *const filename, const char *const dir)
{
const std::string path(std::string(dir ? dir : "") + filename);
FILE *const fp = std::fopen(path.c_str(), "r");
std::fprintf(stdout, "Reading '%s'... ", path.c_str());
if (fp)
{
static const std::size_t bufsize = 4096;
char *const buffer = static_cast<char *>(std::malloc(bufsize));
if (buffer)
{
for (;;)
{
const std::size_t size = std::fread(buffer, 1, bufsize, fp);
if (!size)
break;
str.append(buffer, size);
}
std::fclose(fp);
std::fputs("done.\n", stdout);
std::free(buffer);
return true;
}
}
std::fputs("failed...\n", stdout);
return false;
}
bool write_file(const char *const filename, const std::string &str)
{
FILE *const fp = std::fopen(filename, "wb");
std::fprintf(stdout, "Writing '%s'... ", filename);
if (fp)
{
const bool success = std::fwrite(str.c_str(), 1, str.size(), fp) == str.size();
std::fclose(fp);
if (success)
{
std::fputs("done.\n", stdout);
return true;
}
}
std::fputs("failed...\n", stdout);
return false;
}
}
// namespace unishared
struct ucf_options
{
const char *infilename;
const char *outfilename;
const char *indir;
int version;
int errorno;
ucf_options(const int argc, const char *const *const argv)
: infilename("CaseFolding.txt")
, outfilename("srell_ucfdata2.hpp")
, indir("")
, version(2)
, errorno(0)
{
bool outfile_specified = false;
for (int index = 1; index < argc; ++index)
{
const char firstchar = argv[index][0];
if (firstchar == '-' || firstchar == '/')
{
const char *const option = argv[index] + 1;
++index;
if (std::strcmp(option, "i") == 0)
{
if (index >= argc)
goto NO_ARGUMENT;
infilename = argv[index];
}
else if (std::strcmp(option, "o") == 0)
{
if (index >= argc)
goto NO_ARGUMENT;
outfilename = argv[index];
outfile_specified = true;
}
else if (std::strcmp(option, "v") == 0)
{
if (index >= argc)
goto NO_ARGUMENT;
version = static_cast<int>(std::strtol(argv[index], NULL, 10));
if (!outfile_specified && version < 2)
{
static const char *const v1name = "srell_ucfdata.hpp";
outfilename = v1name;
}
}
else if (std::strcmp(option, "id") == 0)
{
if (index >= argc)
goto NO_ARGUMENT;
indir = argv[index];
}
else
{
--index;
goto UNKNOWN_OPTION;
}
continue;
NO_ARGUMENT:
std::fprintf(stdout, "[Error] no argument for \"%s\" specified.\n", argv[--index]);
errorno = -2;
}
else
{
UNKNOWN_OPTION:
std::fprintf(stdout, "[Error] unknown option \"%s\" found.\n", argv[index]);
errorno = -1;
}
}
}
};
// struct ucf_options
class unicode_casefolding
{
public:
unicode_casefolding()
: maxdelta_(0L), maxdelta_cp_(0L), ucf_maxcodepoint_(0L), rev_maxcodepoint_(0L)
, ucf_numofsegs_(1U), rev_numofsegs_(1U), numofcps_from_(0U), numofcps_to_(0U)
, max_appearance_(0U), nextoffset_(0x100L), rev_charsets_(1, -1L)
{
}
int create_ucfdata(std::string &outdata, const ucf_options &opts)
{
const std::string indent("\t\t\t");
int errorno = opts.errorno;
std::string buf;
if (errorno)
return errorno;
if (unishared::read_file(buf, opts.infilename, opts.indir))
{
static const srell::regex re_line("^.*$", srell::regex::multiline);
const srell::cregex_iterator eos;
srell::cregex_iterator iter(buf.c_str(), buf.c_str() + buf.size(), re_line);
srell::cmatch match;
int colcount = 0;
for (; iter != eos; ++iter)
{
if (iter->length(0))
{
static const srell::regex re_datainfo("^# (.*)$");
if (!srell::regex_match((*iter)[0].first, (*iter)[0].second, match, re_datainfo))
{
outdata.append(1, '\n');
break;
}
outdata += "// " + match.str(1) + "\n";
}
}
if (opts.version <= 1)
outdata += "template <typename T1, typename T2, typename T3>\nstruct unicode_casefolding\n{\n\tstatic const T1 *table()\n\t{\n\t\tstatic const T1 ucftable[] =\n\t\t{\n";
else
outdata += "template <typename T2, typename T3>\nstruct unicode_casefolding\n{\n";
for (; iter != eos; ++iter)
{
static const srell::regex re_cfdata("^\\s*([0-9A-Fa-f]+); ([CS]); ([0-9A-Fa-f]+);\\s*#\\s*(.*)$");
const srell::cmatch &line = *iter;
if (srell::regex_match(line[0].first, line[0].second, match, re_cfdata))
{
const std::string from(match[1]);
const std::string to(match[3]);
const std::string type(match[2]);
const std::string name(match[4]);
update(from, to);
if (opts.version == 1)
outdata += indent + "{ 0x" + from + ", 0x" + to + " },\t// " + type + "; " + name + "\n";
else if (opts.version <= 0)
{
if (colcount == 0)
outdata += indent;
outdata += "{ 0x" + from + ", 0x" + to + " },";
if (++colcount == 4)
{
outdata.append(1, '\n');
colcount = 0;
}
}
}
else if (opts.version == 1)
{
static const srell::regex re_comment_or_emptyline("^#.*|^$");
if (!srell::regex_match(line[0].first, line[0].second, re_comment_or_emptyline))
outdata += indent + "// " + line.str(0) + "\n";
}
}
if (colcount > 0)
outdata.append(1, '\n');
if (opts.version <= 1)
outdata += indent + "{ 0, 0 }\n\t\t};\n\t\treturn ucftable;\n\t}\n";
outdata += "\tstatic const T2 ucf_maxcodepoint = 0x" + unishared::stringify<16>(ucf_maxcodepoint_, "%.4lX") + ";\n";
outdata += "\tstatic const T3 ucf_deltatablesize = 0x" + unishared::stringify<16>(ucf_numofsegs_ << 8, "%X") + ";\n";
outdata += "\tstatic const T2 rev_maxcodepoint = 0x" + unishared::stringify<16>(rev_maxcodepoint_, "%.4lX") + ";\n";
outdata += "\tstatic const T3 rev_indextablesize = 0x" + unishared::stringify<16>(rev_numofsegs_ << 8, "%X") + ";\n";
outdata += "\tstatic const T3 rev_charsettablesize = " + unishared::stringify<16>(numofcps_to_ * 2 + numofcps_from_ + 1, "%u") + ";\t// 1 + " + unishared::stringify<16>(numofcps_to_, "%u") + " * 2 + " + unishared::stringify<16>(numofcps_from_, "%u") + "\n";
outdata += "\tstatic const T3 rev_maxset = " + unishared::stringify<16>(maxset(), "%u") + ";\n";
outdata += "\tstatic const T2 eos = 0;\n";
if (opts.version >= 2)
{
outdata += "\n\tstatic const T2 ucf_deltatable[];\n\tstatic const T3 ucf_segmenttable[];\n\tstatic const T3 rev_indextable[];\n\tstatic const T3 rev_segmenttable[];\n\tstatic const T2 rev_charsettable[];\n\n\tstatic const T2 *ucf_deltatable_ptr()\n\t{\n\t\treturn ucf_deltatable;\n\t}\n\tstatic const T3 *ucf_segmenttable_ptr()\n\t{\n\t\treturn ucf_segmenttable;\n\t}\n\tstatic const T3 *rev_indextable_ptr()\n\t{\n\t\treturn rev_indextable;\n\t}\n\tstatic const T3 *rev_segmenttable_ptr()\n\t{\n\t\treturn rev_segmenttable;\n\t}\n\tstatic const T2 *rev_charsettable_ptr()\n\t{\n\t\treturn rev_charsettable;\n\t}\n};\n\n";
out_v2tables(outdata);
outdata += "#define SRELL_UCFDATA_VERSION 200\n";
}
else
outdata += "};\n#define SRELL_UCFDATA_VER 201909L\n";
std::fprintf(stdout, "MaxDelta: %+ld (U+%.4lX->U+%.4lX)\n", maxdelta_, maxdelta_cp_, maxdelta_cp_ + maxdelta_);
}
else
errorno = 1;
return errorno;
}
private:
void update(const std::string &from, const std::string &to)
{
const long cp_from = std::strtol(from.c_str(), NULL, 16);
const long cp_to = std::strtol(to.c_str(), NULL, 16);
const long delta = cp_to - cp_from;
const long segno_from = cp_from >> 8;
const long segno_to = cp_to >> 8;
update_tables(cp_from, cp_to, segno_from);
++numofcps_from_;
if (std::abs(maxdelta_) < std::abs(delta))
{
maxdelta_cp_ = cp_from;
maxdelta_ = delta;
}
if (ucf_maxcodepoint_ < cp_from)
ucf_maxcodepoint_ = cp_from;
if (rev_maxcodepoint_ < cp_to)
rev_maxcodepoint_ = cp_to;
if (rev_maxcodepoint_ < cp_from)
rev_maxcodepoint_ = cp_from;
if (!ucf_countedsegnos.count(segno_from))
{
ucf_countedsegnos[segno_from] = 1;
++ucf_numofsegs_;
}
if (!rev_countedsegnos.count(segno_to))
{
rev_countedsegnos[segno_to] = 1;
++rev_numofsegs_;
}
if (!rev_countedsegnos.count(segno_from))
{
rev_countedsegnos[segno_from] = 1;
++rev_numofsegs_;
}
if (!cps_counted_as_foldedto.count(cp_to))
{
cps_counted_as_foldedto[cp_to] = 1;
++numofcps_to_;
}
if (appearance_counts_.count(to))
++appearance_counts_[to];
else
appearance_counts_[to] = 1;
if (max_appearance_ < appearance_counts_[to])
max_appearance_ = appearance_counts_[to];
}
unsigned int maxset() const
{
return max_appearance_ + 1;
}
void out_v2tables(std::string &outdata)
{
const char *const headers[] = {
"template <typename T2, typename T3>\nconst ",
" unicode_casefolding<T2, T3>::",
"[] =\n{\n"
};
create_revtables();
out_lowertable(outdata, headers, "T2", "ucf_deltatable", ucf_deltas_, ucf_segments_);
outdata.append(1, '\n');
out_uppertable(outdata, headers, "T3", "ucf_segmenttable", ucf_segments_);
outdata.append(1, '\n');
out_lowertable(outdata, headers, "T3", "rev_indextable", rev_indices_, rev_segments_);
outdata.append(1, '\n');
out_uppertable(outdata, headers, "T3", "rev_segmenttable", rev_segments_);
outdata.append(1, '\n');
out_cstable(outdata, headers, "T2", "rev_charsettable", rev_charsets_);
}
// Updates ucf_segments_, ucf_deltas_, and rev_charsets_.
void update_tables(const long cp_from, const long cp_to, const long segno_from)
{
if (segno_from >= static_cast<long>(ucf_segments_.size()))
ucf_segments_.resize(segno_from + 1, 0L);
long &offset_of_segment = ucf_segments_[segno_from];
if (offset_of_segment == 0L)
{
offset_of_segment = nextoffset_;
nextoffset_ += 0x100L;
ucf_deltas_.resize(nextoffset_, 0L);
}
ucf_deltas_[offset_of_segment + (cp_from & 0xffL)] = cp_to - cp_from;
for (long index = 0L;; ++index)
{
if (index == static_cast<long>(rev_charsets_.size()))
{
rev_charsets_.push_back(cp_to);
rev_charsets_.push_back(cp_from);
rev_charsets_.push_back(-1L);
break;
}
if (rev_charsets_[index] == cp_to)
{
for (++index; rev_charsets_[index] != -1L; ++index);
rev_charsets_.insert(index, 1, cp_from);
break;
}
}
}
// Creates rev_segments_ and rev_indices_ from rev_charsets_.
void create_revtables()
{
long nextoffset = 0x100L;
for (long index = 0L; index < static_cast<long>(rev_charsets_.size()); ++index)
{
const long bocs = index; // Beginning of charset.
for (; rev_charsets_[index] != -1L; ++index)
{
const long &u21ch = rev_charsets_[index];
const long segno = u21ch >> 8L;
if (segno >= static_cast<long>(rev_segments_.size()))
rev_segments_.resize(segno + 1, 0L);
long &offset_of_segment = rev_segments_[segno];
if (offset_of_segment == 0L)
{
offset_of_segment = nextoffset;
nextoffset += 0x100L;
rev_indices_.resize(nextoffset, 0L);
}
rev_indices_[offset_of_segment + (u21ch & 0xffL)] = bocs;
}
}
}
void out_lowertable(std::string &outdata, const char *const headers[], const char *const type, const char *const funcname, const std::basic_string<long> &table, const std::basic_string<long> &segtable) const
{
int end = static_cast<int>(table.size());
outdata += headers[0];
outdata += type;
outdata += headers[1];
outdata += funcname;
outdata += headers[2];
for (int i = 0; i < end;)
{
const int col = i & 15;
if ((i & 255) == 0)
{
if (i)
{
for (int j = 0; j < static_cast<int>(segtable.size()); ++j)
{
if (segtable[j] == i)
{
outdata += "\n\t// For u+" + unishared::stringify<16>(j, "%.2X") + "xx (" + unishared::stringify<16>(i, "%d") + ")\n";
break;
}
}
}
else
outdata += "\t// For common (0)\n";
}
outdata += col == 0 ? "\t" : (col & 3) == 0 ? " " : " ";
if (table[i] >= 0L)
outdata += unishared::stringify<16>(table[i], "%ld");
else
outdata += "static_cast<", outdata += type, outdata += ">(", outdata += unishared::stringify<16>(table[i], "%ld") + ")";
if (++i == end)
outdata.append(1, '\n');
else if (col == 15)
outdata += ",\n";
else
outdata.append(1, ',');
}
outdata += "};\n";
}
void out_uppertable(std::string &outdata, const char *const headers[], const char *const type, const char *const funcname, const std::basic_string<long> &table) const
{
int end = static_cast<int>(table.size());
outdata += headers[0];
outdata += type;
outdata += headers[1];
outdata += funcname;
outdata += headers[2];
for (int i = 0; i < end;)
{
const int col = i & 15;
outdata += col == 0 ? "\t" : (col & 3) == 0 ? " " : " ";
if (table[i] >= 0)
outdata += unishared::stringify<16>(table[i], "%ld");
else
outdata += "static_cast<", outdata += type, outdata += ">(", outdata += unishared::stringify<16>(table[i], "%ld") + ")";
if (++i == end)
outdata.append(1, '\n');
else if (col == 15)
outdata += ",\n";
else
outdata.append(1, ',');
}
outdata += "};\n";
}
void out_cstable(std::string &outdata, const char *const headers[], const char *const type, const char *const funcname, const std::basic_string<long> &table) const
{
int end = static_cast<int>(table.size());
bool newline = true;
int bos = 0;
int prevprintedbos = -1;
outdata += headers[0];
outdata += type;
outdata += headers[1];
outdata += funcname;
outdata += headers[2];
for (int i = 0; i < end;)
{
const long val = table[i];
outdata += newline ? "\t" : " ";
newline = false;
if (val == -1L)
outdata += "eos";
else
outdata += "0x", outdata += unishared::stringify<16>(val, "%.4lX");
if (++i != end)
outdata.append(1, ',');
if (val == -1L)
{
if (prevprintedbos != bos / 10 || i == end)
{
outdata += "\t// ";
outdata += unishared::stringify<16>(bos, "%d");
prevprintedbos = bos / 10;
}
outdata.append(1, '\n');
newline = true;
bos = i;
}
}
outdata += "};\n";
}
typedef std::map<long, char> flagset_type;
long maxdelta_; // = 0L;
long maxdelta_cp_; // = 0L;
long ucf_maxcodepoint_; // = 0L; // The max code point for case-folding.
long rev_maxcodepoint_; // = 0L; // The max code point for reverse lookup.
unsigned int ucf_numofsegs_; // = 1U; // The number of segments in the delta table.
unsigned int rev_numofsegs_; // = 1U; // The number of segments in the table for reverse lookup.
unsigned int numofcps_from_; // = 0U; // The number of code points in "folded from"s.
unsigned int numofcps_to_; // = 0U; // The number of code points in "folded to"s.
flagset_type ucf_countedsegnos; // The set of segment nos marked as "counted" for case-folding.
flagset_type rev_countedsegnos; // The set of segment nos marked as "counted" for reverse lookup.
flagset_type cps_counted_as_foldedto; // The set of code points marked as "folded to".
unsigned int max_appearance_;
std::map<std::string, unsigned int> appearance_counts_;
long nextoffset_;
std::basic_string<long> ucf_deltas_;
std::basic_string<long> ucf_segments_;
std::basic_string<long> rev_indices_;
std::basic_string<long> rev_segments_;
std::basic_string<long> rev_deltas_;
std::basic_string<long> rev_charsets_;
};
// class unicode_casefolding
int main(const int argc, const char *const *const argv)
{
ucf_options ucfopts(argc, argv);
std::string outdata;
unicode_casefolding ucf;
int errorno = ucf.create_ucfdata(outdata, ucfopts);
if (errorno == 0)
{
if (!unishared::write_file(ucfopts.outfilename, outdata))
errorno = 2;
}
return errorno;
}

File diff suppressed because it is too large Load diff