diff --git a/Makefile b/Makefile
index 99e6501..92071d1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,9 @@
-CC=gcc
-WINDRES=windres
-CFLAGS=-Iinc -Wall -Wl,--enable-stdcall-fixup -s
-LIBS=-lgdi32 -lwinmm
+-include config.mk
+
+WINDRES  ?= windres
+LDFLAGS   = -Iinc -Wall -Wl,--enable-stdcall-fixup -s
+CFLAGS    = -std=c99
+LIBS      = -lgdi32 -lwinmm
 
 FILES = src/debug.c \
         src/main.c \
@@ -21,8 +23,8 @@ FILES = src/debug.c \
 
 all:
 	$(WINDRES) -J rc ddraw.rc ddraw.rc.o
-	$(CC) $(CFLAGS) -shared -o ddraw.dll $(FILES) ddraw.def ddraw.rc.o $(LIBS)
-#	$(CC) $(CFLAGS) -nostdlib -shared -o ddraw.dll $(FILES) ddraw.def ddraw.rc.o $(LIBS) -lkernel32 -luser32 -lmsvcrt
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ddraw.dll $(FILES) ddraw.def ddraw.rc.o $(LIBS)
+#	$(CC) $(CFLAGS) $(LDFLAGS) -nostdlib -shared -o ddraw.dll $(FILES) ddraw.def ddraw.rc.o $(LIBS) -lkernel32 -luser32 -lmsvcrt
 
 clean:
-	rm -f ddraw.dll
+	$(RM) ddraw.dll ddraw.rc.o
diff --git a/cnc-ddraw.vcxproj b/cnc-ddraw.vcxproj
index 39864d4..3781845 100644
--- a/cnc-ddraw.vcxproj
+++ b/cnc-ddraw.vcxproj
@@ -13,6 +13,19 @@
   <ItemGroup>
     <ClCompile Include="src\clipper.c" />
     <ClCompile Include="src\debug.c" />
+    <ClCompile Include="src\detours\creatwth.cpp" />
+    <ClCompile Include="src\detours\detours.cpp" />
+    <ClCompile Include="src\detours\disasm.cpp" />
+    <ClCompile Include="src\detours\disolarm.cpp" />
+    <ClCompile Include="src\detours\disolarm64.cpp" />
+    <ClCompile Include="src\detours\disolia64.cpp" />
+    <ClCompile Include="src\detours\disolx64.cpp" />
+    <ClCompile Include="src\detours\disolx86.cpp" />
+    <ClCompile Include="src\detours\image.cpp" />
+    <ClCompile Include="src\detours\modules.cpp" />
+    <ClCompile Include="src\detours\uimports.cpp">
+      <ExcludedFromBuild>true</ExcludedFromBuild>
+    </ClCompile>
     <ClCompile Include="src\dinput.c" />
     <ClCompile Include="src\hook.c" />
     <ClCompile Include="src\lodepng.c" />
@@ -99,7 +112,7 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <AdditionalIncludeDirectories>inc</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>inc;src\detours</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -124,7 +137,7 @@ if exist "$(LocalDebuggerCommand)" if exist "$(LocalDebuggerWorkingDirectory)"
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <AdditionalIncludeDirectories>inc</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>inc;src\detours</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
diff --git a/cnc-ddraw.vcxproj.filters b/cnc-ddraw.vcxproj.filters
index 2423530..24513d4 100644
--- a/cnc-ddraw.vcxproj.filters
+++ b/cnc-ddraw.vcxproj.filters
@@ -13,6 +13,9 @@
       <UniqueIdentifier>{9b152f9d-a092-42a9-ac47-0594f135a640}</UniqueIdentifier>
       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
     </Filter>
+    <Filter Include="Source Files\detours">
+      <UniqueIdentifier>{af194dd7-3316-4887-93d6-9f2af2135f94}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="src\clipper.c">
@@ -60,6 +63,39 @@
     <ClCompile Include="src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="src\detours\disolx64.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\disolx86.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\image.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\modules.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\uimports.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\creatwth.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\detours.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\disasm.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\disolarm.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\disolarm64.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
+    <ClCompile Include="src\detours\disolia64.cpp">
+      <Filter>Source Files\detours</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="inc\clipper.h">
diff --git a/ddraw.rc b/ddraw.rc
index 15528d4..4af3f0c 100644
--- a/ddraw.rc
+++ b/ddraw.rc
@@ -4,7 +4,7 @@
 #define VERSION_MAJOR    1
 #define VERSION_MINOR    3
 #define VERSION_BUILD    4
-#define VERSION_REVISION 1  
+#define VERSION_REVISION 3  
 
 #define VERSION          VERSION_MAJOR, VERSION_MINOR, VERSION_BUILD, VERSION_REVISION
 #define VERSION_STRING   ver_str(VERSION_MAJOR, VERSION_MINOR, VERSION_BUILD, VERSION_REVISION)
diff --git a/inc/debug.h b/inc/debug.h
index 9c30178..19d9907 100644
--- a/inc/debug.h
+++ b/inc/debug.h
@@ -8,6 +8,7 @@ double CounterStop();
 void DebugPrint(const char *format, ...);
 void DrawFrameInfoStart();
 void DrawFrameInfoEnd();
+int dprintf(const char *fmt, ...);
 
 extern double DebugFrameTime;
 extern DWORD DebugFrameCount;
@@ -24,6 +25,8 @@ extern DWORD DebugFrameCount;
 
 #ifdef _DEBUG_S
 #define printf(format, ...) DebugPrint("xDBG " format, ##__VA_ARGS__)
+#else
+#define printf(format, ...) dprintf(format, ##__VA_ARGS__) 
 #endif 
 
 #else 
diff --git a/inc/hook.h b/inc/hook.h
index 8da0fec..c147065 100644
--- a/inc/hook.h
+++ b/inc/hook.h
@@ -3,6 +3,8 @@
 
 #include <windows.h>
 
+typedef HFONT(__stdcall* CREATEFONTINDIRECTA)(CONST LOGFONT*);
+
 typedef BOOL (WINAPI* GETCURSORPOSPROC)(LPPOINT);
 typedef BOOL(WINAPI* CLIPCURSORPROC)(const RECT*);
 typedef int (WINAPI* SHOWCURSORPROC)(BOOL);
@@ -45,11 +47,13 @@ extern ENABLEWINDOWPROC real_EnableWindow;
 extern CREATEWINDOWEXAPROC real_CreateWindowExA;
 extern DESTROYWINDOWPROC real_DestroyWindow;
 
+extern int HookingMethod;
 extern BOOL Hook_Active;
 
 void Hook_Init();
+void Hook_Exit();
 void Hook_PatchIAT(HMODULE hMod, char *moduleName, char *functionName, PROC newFunction);
-PROC Hook_HotPatch(PROC function, PROC newFunction);
-void Hook_TryHotPatch(char *moduleName, char *functionName, PROC newFunction, PROC *function);
+void Hook_Create(char *moduleName, char *functionName, PROC newFunction, PROC *function);
+void Hook_Revert(char *moduleName, char *functionName, PROC newFunction, PROC *function);
 
 #endif
diff --git a/inc/main.h b/inc/main.h
index c1765d3..7d6dcbb 100644
--- a/inc/main.h
+++ b/inc/main.h
@@ -122,14 +122,12 @@ typedef struct IDirectDrawImpl
     BOOL handlemouse;
     char shader[MAX_PATH];
     BOOL wine;
-    LONG minimized;
     BOOL altenter;
     BOOL hidecursor;
     BOOL accurateTimers;
     int bnetHack;
     BOOL bnetActive;
     BOOL bnetD3d9Fullscreen;
-    BOOL hotPatch;
     SpeedLimiter ticksLimiter;
     SpeedLimiter flipLimiter;
     SpeedLimiter fpsLimiter;
diff --git a/src/clipper.c b/src/clipper.c
index 6660950..fd70511 100644
--- a/src/clipper.c
+++ b/src/clipper.c
@@ -20,7 +20,7 @@
 
 HRESULT __stdcall ddraw_clipper_QueryInterface(IDirectDrawClipperImpl *This, REFIID riid, void **obj)
 {
-    printf("DirectDrawClipper::QueryInterface(This=%p, riid=%08X, obj=%p) ???\n", This, (unsigned int)riid, obj);
+    printf("??? DirectDrawClipper::QueryInterface(This=%p, riid=%08X, obj=%p)\n", This, (unsigned int)riid, obj);
     return S_OK;
 }
 
@@ -52,37 +52,37 @@ ULONG __stdcall ddraw_clipper_Release(IDirectDrawClipperImpl *This)
 
 HRESULT __stdcall ddraw_clipper_GetClipList(IDirectDrawClipperImpl *This, LPRECT a, LPRGNDATA b, LPDWORD c)
 {
-    printf("IDirectDrawClipper::GetClipList(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawClipper::GetClipList(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_clipper_GetHWnd(IDirectDrawClipperImpl *This, HWND FAR *a)
 {
-    printf("IDirectDrawClipper::GetHWnd(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawClipper::GetHWnd(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_clipper_Initialize(IDirectDrawClipperImpl *This, LPDIRECTDRAW a, DWORD b)
 {
-    printf("IDirectDrawClipper::Initialize(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawClipper::Initialize(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_clipper_IsClipListChanged(IDirectDrawClipperImpl *This, BOOL FAR *a)
 {
-    printf("IDirectDrawClipper::IsClipListChanged(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawClipper::IsClipListChanged(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_clipper_SetClipList(IDirectDrawClipperImpl *This, LPRGNDATA a, DWORD b)
 {
-    printf("IDirectDrawClipper::SetClipList(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawClipper::SetClipList(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_clipper_SetHWnd(IDirectDrawClipperImpl *This, DWORD a, HWND b)
 {
-    printf("IDirectDrawClipper::SetHWnd(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawClipper::SetHWnd(This=%p, ...)\n", This);
     return DD_OK;
 }
 
diff --git a/src/debug.c b/src/debug.c
index 88690e1..b9e08f2 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -35,6 +35,36 @@ void DebugPrint(const char *format, ...)
 	OutputDebugStringA(buffer);
 }
 
+int dprintf(const char *fmt, ...)
+{
+    static CRITICAL_SECTION cs;
+    static BOOL initialized;
+
+    if (!initialized)
+    {
+        initialized = TRUE;
+        InitializeCriticalSection(&cs);
+    }
+
+    EnterCriticalSection(&cs);
+    
+    va_list args;
+    int ret;
+
+    SYSTEMTIME st;
+    GetLocalTime(&st);
+
+    fprintf(stdout, "[%lu] %02d:%02d:%02d.%03d ", GetCurrentThreadId(), st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
+
+    va_start(args, fmt);
+    ret = vfprintf(stdout, fmt, args);
+    va_end(args);
+
+    LeaveCriticalSection(&cs);
+
+    return ret;
+}
+
 void DrawFrameInfoStart()
 {
     static DWORD tick_fps = 0;
diff --git a/src/detours/CREDITS.TXT b/src/detours/CREDITS.TXT
new file mode 100644
index 0000000..a122676
--- /dev/null
+++ b/src/detours/CREDITS.TXT
@@ -0,0 +1,115 @@
+==============================================================================
+The following individuals have helped identify specific bugs and improvements
+in Detours.  The entire Detours community has benefited from their help.
+==============================================================================
+
+* Jay Krell:          Identified issue with VirtualSize == 0 files created in
+                      NT 3.1 images. (Build_339)
+
+* Igor Odnovorov:     Identified an issue with the placement of the trampoline
+                      region when a function is detoured twice and the second
+                      trampoline region is outside of the +/- 2GB range of
+                      the target. (Build_337)
+
+* Jay Krell:          Identified need for some programs to enumerate the
+                      address of IAT entries. (Build_336)
+
+* Calvin Hsia:        Identified need for some program to change the excluded
+                      system region. (Build_336)
+
+* Adam Smith:         Identified error in failure handling when VirtualProect
+                      cannot make pages executable because the Prohibit
+                      Dynamic Code Generation mitigation policy has been
+                      applied to a process. (Build_335)
+
+* Ben Faull:          Identified fix to detour_alloc_region_from_lo and
+                      detour_alloc_region_from_hi that preserves ASLR entropy.
+                      (Build_334)
+
+* Shaoxiang Su:       Reported errors building with Visual Studio 2015.
+                      (Build_332)
+
+* Jay Krell:          Identified and resolved significant gaps in the X86, X64
+                      and IA64 disassemblers for instruction found in code,
+                      but seldom found in function prologues. (Build_331)
+
+* Allan Murphy:       Identify error in rep and jmp ds: encodings. (Build_331)
+
+* Philip Bacon:       Identified incorrect entry point return for pure
+                      resource-only binaries. (Build_330)
+
+* Jay Krell:          Identified failure in DetourAttachEx to update nAlign.
+                      (Build_330)
+
+* Sumit Sarin:        Helped debug error with packed binaries.
+                      (Build_329)
+
+* Nitya Kumar Sharma: Reported bug in DetourAfterWithDll for 32/64 agnostic
+                      EXEs.
+                      (Build_327)
+
+* Richard Black:      Identified a large number of typos in documentation.
+                      (Build_326)
+
+* Michael Bilodeau:   Identified bug in DetourUpdateProcessWithDll when the
+                      target process contains a Detours payload *after* all
+                      valid PE binaries.
+                      (Build_324)
+
+* Meera Jindal:       Reported bug in identification of target address in
+                      DetourCopyInstruction for jmp[] and call[] on x86 & x64,
+                      the ff15 and ff25 opcodes.
+                      (Build_323)
+
+* Ken Johnson:        Assistance with SAL 2.0 annotations.
+                      (Build_319)
+
+* Nick Wood:          Identified bug in DetourFindFunction on ARM.
+                      (Build_314)
+
+* Mark Russinovich:   Helped debug DetourCreateProcessWithDllEx.
+                      (Build_314)
+
+* John Lin:           Implementation idea for DetoursCreateProcessWithDllEx.
+                      (Build_314)
+
+* Andrew Zawadowskiy  Reported an improper memory page permissions
+                      vulnerability in Detours 2.1.  (Vulnerability does not
+                      exist in versions later than Detours 2.1.)
+                      (Build_223)
+
+* Nightxie:           Identified bug in detour_alloc_round_up_to_region.
+                      (Build_310)
+
+* Diana Milirud:      Identified bug in B* instructions on ARM.
+                      (Build_309)
+
+* Juan Carlos         Identified correct MSIL entry point for unsigned MSIL.
+  Luciani:            (Build_308)
+
+* Lee Hunt            Suggested improvements in algorithm for allocation of
+  Lawrence Landauer   trampoline regions on x64 to avoid collisions with
+  Joe Laughlin:       system DLLs.
+                      (Build_307)
+
+* Tyler Sims          Identified bug in handling of "anycpu" MSIL binaries
+  Darren Kennedy:     on x64.
+                      (Build_307)
+
+* Andre Vachon:       Help with optimized binaries.
+                      (Build 301)
+
+* Chris Mann:         Identified fix not forward ported from 2.2 to 3.0.
+                      (Build_301)
+
+* Mark Irving:        Identified bug with EXEs missing second import table.
+                      (Build_300)
+
+* Ben Schwarz:        Identified bug in handling of multi-byte NOPs.
+                      (Build_300)
+
+* Aaron Giles         Coded initial ARM/Thumb2 disassembler.
+  Jared Henderson:    (Build_300)
+
+* Doug Brubacher:     Coded initial x86 disassembler.
+                      (Build_100)
diff --git a/src/detours/LICENSE.md b/src/detours/LICENSE.md
new file mode 100644
index 0000000..e6a4c56
--- /dev/null
+++ b/src/detours/LICENSE.md
@@ -0,0 +1,23 @@
+# Copyright (c) Microsoft Corporation
+
+All rights reserved.
+
+# MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/detours/README.md b/src/detours/README.md
new file mode 100644
index 0000000..b5c5239
--- /dev/null
+++ b/src/detours/README.md
@@ -0,0 +1,48 @@
+# Microsoft Research Detours Package
+
+Detours is a software package for monitoring and instrumenting API calls on Windows. Detours
+has been used by many ISVs and  is also  used by product teams at Microsoft. Detours is now available under
+a standard open source  license (MIT).  This simplifies licensing for programmers using Detours
+and allows the community to support Detours using open source tools and processes.
+
+Detours is compatible with the Windows NT family of 
+operating systems: Windows NT, Windows XP, Windows Server 2003, Windows 7,
+Windows 8, and Windows 10.  It cannot be used by Window Store apps
+because Detours requires APIs not available to those applications. 
+This repo contains the source code for version 4.0.1 of Detours.
+
+For technical documentation on Detours, see the [Detours Wiki](https://github.com/microsoft/Detours/wiki).
+For directions on how to build and run samples, see the
+samples [README.txt](https://github.com/Microsoft/Detours/blob/master/samples/README.TXT) file.
+
+## Contributing
+
+The [`Detours`](https://github.com/microsoft/detours) repository is where development is done.
+Here are some ways you can participate in the project:
+
+* [Answer questions](https://github.com/microsoft/detours/issues) about using Detours.
+* [Improve the Wiki](https://github.com/microsoft/detours/wiki).
+* [Submit bugs](https://github.com/microsoft/detours/issues) and help us verify fixes and changes as they are checked in.
+* Review [source code changes](https://github.com/microsoft/detours/pulls).
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+
+## Issues, questions, and feedback
+
+* Open an issue on [GitHub Issues](https://github.com/Microsoft/detours/issues).
+
+## Mailing list for announcements
+
+The detours-announce mailing list is a low-traffic email list for important announcements 
+about the project, such as the availability of new versions of Detours.  To join it, send 
+an email to listserv@lists.research.microsoft.com with a 
+message body containing only the text SUBSCRIBE DETOURS-ANNOUNCE.
+To leave it, send an email to listserv@lists.research.microsoft.com with a 
+message body containing only the text UNSUBSCRIBE DETOURS-ANNOUNCE.
+
+
+## License
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the [MIT](LICENSE.md) License.
diff --git a/src/detours/creatwth.cpp b/src/detours/creatwth.cpp
new file mode 100644
index 0000000..bbe7cc8
--- /dev/null
+++ b/src/detours/creatwth.cpp
@@ -0,0 +1,1586 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Create a process with a DLL (creatwth.cpp of detours.lib)
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+
+#if _MSC_VER >= 1900
+#pragma warning(push)
+#pragma warning(disable:4091) // empty typedef
+#endif
+#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1
+#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1
+#include <windows.h>
+#include <stddef.h>
+#pragma warning(push)
+#if _MSC_VER > 1400
+#pragma warning(disable:6102 6103) // /analyze warnings
+#endif
+#include <strsafe.h>
+#pragma warning(pop)
+
+// #define DETOUR_DEBUG 1
+#define DETOURS_INTERNAL
+
+#include "detours.h"
+
+#if DETOURS_VERSION != 0x4c0c1   // 0xMAJORcMINORcPATCH
+#error detours.h version mismatch
+#endif
+
+#if _MSC_VER >= 1900
+#pragma warning(pop)
+#endif
+
+#define IMPORT_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
+#define BOUND_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT]
+#define CLR_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR]
+#define IAT_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT]
+
+//////////////////////////////////////////////////////////////////////////////
+//
+const GUID DETOUR_EXE_HELPER_GUID = { /* ea0251b9-5cde-41b5-98d0-2af4a26b0fee */
+    0xea0251b9, 0x5cde, 0x41b5,
+    { 0x98, 0xd0, 0x2a, 0xf4, 0xa2, 0x6b, 0x0f, 0xee }};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Enumate through modules in the target process.
+//
+static BOOL WINAPI LoadNtHeaderFromProcess(HANDLE hProcess,
+                                           HMODULE hModule,
+                                           PIMAGE_NT_HEADERS32 pNtHeader)
+{
+    PBYTE pbModule = (PBYTE)hModule;
+
+    if (pbModule == NULL) {
+        SetLastError(ERROR_INVALID_PARAMETER);
+        return FALSE;
+    }
+
+    MEMORY_BASIC_INFORMATION mbi;
+    ZeroMemory(&mbi, sizeof(mbi));
+
+    if (VirtualQueryEx(hProcess, hModule, &mbi, sizeof(mbi)) == 0) {
+        return FALSE;
+    }
+
+    IMAGE_DOS_HEADER idh;
+
+    if (!ReadProcessMemory(hProcess, pbModule, &idh, sizeof(idh), NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %d\n",
+                      pbModule, pbModule + sizeof(idh), GetLastError()));
+        return FALSE;
+    }
+
+    if (idh.e_magic != IMAGE_DOS_SIGNATURE ||
+        (DWORD)idh.e_lfanew > mbi.RegionSize ||
+        (DWORD)idh.e_lfanew < sizeof(idh)) {
+
+        SetLastError(ERROR_BAD_EXE_FORMAT);
+        return FALSE;
+    }
+
+    if (!ReadProcessMemory(hProcess, pbModule + idh.e_lfanew,
+                           pNtHeader, sizeof(*pNtHeader), NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p:%p) failed: %d\n",
+                      pbModule + idh.e_lfanew,
+                      pbModule + idh.e_lfanew + sizeof(*pNtHeader),
+                      pbModule,
+                      GetLastError()));
+        return FALSE;
+    }
+
+    if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+        SetLastError(ERROR_BAD_EXE_FORMAT);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+static HMODULE WINAPI EnumerateModulesInProcess(HANDLE hProcess,
+                                                HMODULE hModuleLast,
+                                                PIMAGE_NT_HEADERS32 pNtHeader)
+{
+    PBYTE pbLast = (PBYTE)hModuleLast + MM_ALLOCATION_GRANULARITY;
+
+    MEMORY_BASIC_INFORMATION mbi;
+    ZeroMemory(&mbi, sizeof(mbi));
+
+    // Find the next memory region that contains a mapped PE image.
+    //
+
+    for (;; pbLast = (PBYTE)mbi.BaseAddress + mbi.RegionSize) {
+        if (VirtualQueryEx(hProcess, (PVOID)pbLast, &mbi, sizeof(mbi)) == 0) {
+            break;
+        }
+
+        // Usermode address space has such an unaligned region size always at the
+        // end and only at the end.
+        //
+        if ((mbi.RegionSize & 0xfff) == 0xfff) {
+            break;
+        }
+        if (((PBYTE)mbi.BaseAddress + mbi.RegionSize) < pbLast) {
+            break;
+        }
+
+        // Skip uncommitted regions and guard pages.
+        //
+        if ((mbi.State != MEM_COMMIT) ||
+            ((mbi.Protect & 0xff) == PAGE_NOACCESS) ||
+            (mbi.Protect & PAGE_GUARD)) {
+            continue;
+        }
+
+        if (LoadNtHeaderFromProcess(hProcess, (HMODULE)pbLast, pNtHeader)) {
+            return (HMODULE)pbLast;
+        }
+    }
+    return NULL;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Find a region of memory in which we can create a replacement import table.
+//
+static PBYTE FindAndAllocateNearBase(HANDLE hProcess, PBYTE pbModule, PBYTE pbBase, DWORD cbAlloc)
+{
+    MEMORY_BASIC_INFORMATION mbi;
+    ZeroMemory(&mbi, sizeof(mbi));
+
+    PBYTE pbLast = pbBase;
+    for (;; pbLast = (PBYTE)mbi.BaseAddress + mbi.RegionSize) {
+
+        ZeroMemory(&mbi, sizeof(mbi));
+        if (VirtualQueryEx(hProcess, (PVOID)pbLast, &mbi, sizeof(mbi)) == 0) {
+            if (GetLastError() == ERROR_INVALID_PARAMETER) {
+                break;
+            }
+            DETOUR_TRACE(("VirtualQueryEx(%p) failed: %d\n",
+                          pbLast, GetLastError()));
+            break;
+        }
+        // Usermode address space has such an unaligned region size always at the
+        // end and only at the end.
+        //
+        if ((mbi.RegionSize & 0xfff) == 0xfff) {
+            break;
+        }
+
+        // Skip anything other than a pure free region.
+        //
+        if (mbi.State != MEM_FREE) {
+            continue;
+        }
+
+        // Use the max of mbi.BaseAddress and pbBase, in case mbi.BaseAddress < pbBase.
+        PBYTE pbAddress = (PBYTE)mbi.BaseAddress > pbBase ? (PBYTE)mbi.BaseAddress : pbBase;
+
+        // Round pbAddress up to the nearest MM allocation boundary.
+        const DWORD_PTR mmGranularityMinusOne = (DWORD_PTR)(MM_ALLOCATION_GRANULARITY -1);
+        pbAddress = (PBYTE)(((DWORD_PTR)pbAddress + mmGranularityMinusOne) & ~mmGranularityMinusOne);
+
+#ifdef _WIN64
+        // The offset from pbModule to any replacement import must fit into 32 bits.
+        // For simplicity, we check that the offset to the last byte fits into 32 bits,
+        // instead of the largest offset we'll actually use. The values are very similar.
+        const size_t GB4 = ((((size_t)1) << 32) - 1);
+        if ((size_t)(pbAddress + cbAlloc - 1 - pbModule) > GB4) {
+            DETOUR_TRACE(("FindAndAllocateNearBase(1) failing due to distance >4GB %p\n", pbAddress));
+            return NULL;
+        }
+#else
+        UNREFERENCED_PARAMETER(pbModule);
+#endif
+
+        DETOUR_TRACE(("Free region %p..%p\n",
+                      mbi.BaseAddress,
+                      (PBYTE)mbi.BaseAddress + mbi.RegionSize));
+
+        for (; pbAddress < (PBYTE)mbi.BaseAddress + mbi.RegionSize; pbAddress += MM_ALLOCATION_GRANULARITY) {
+            PBYTE pbAlloc = (PBYTE)VirtualAllocEx(hProcess, pbAddress, cbAlloc,
+                                                  MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+            if (pbAlloc == NULL) {
+                DETOUR_TRACE(("VirtualAllocEx(%p) failed: %d\n", pbAddress, GetLastError()));
+                continue;
+            }
+#ifdef _WIN64
+            // The offset from pbModule to any replacement import must fit into 32 bits.
+            if ((size_t)(pbAddress + cbAlloc - 1 - pbModule) > GB4) {
+                DETOUR_TRACE(("FindAndAllocateNearBase(2) failing due to distance >4GB %p\n", pbAddress));
+                return NULL;
+            }
+#endif
+            DETOUR_TRACE(("[%p..%p] Allocated for import table.\n",
+                          pbAlloc, pbAlloc + cbAlloc));
+            return pbAlloc;
+        }
+    }
+    return NULL;
+}
+
+static inline DWORD PadToDword(DWORD dw)
+{
+    return (dw + 3) & ~3u;
+}
+
+static inline DWORD PadToDwordPtr(DWORD dw)
+{
+    return (dw + 7) & ~7u;
+}
+
+static inline HRESULT ReplaceOptionalSizeA(_Inout_z_count_(cchDest) LPSTR pszDest,
+                                           _In_ size_t cchDest,
+                                           _In_z_ LPCSTR pszSize)
+{
+    if (cchDest == 0 || pszDest == NULL || pszSize == NULL ||
+        pszSize[0] == '\0' || pszSize[1] == '\0' || pszSize[2] != '\0') {
+
+        // can not write into empty buffer or with string other than two chars.
+        return ERROR_INVALID_PARAMETER;
+    }
+
+    for (; cchDest >= 2; cchDest--, pszDest++) {
+        if (pszDest[0] == '?' && pszDest[1] == '?') {
+            pszDest[0] = pszSize[0];
+            pszDest[1] = pszSize[1];
+            break;
+        }
+    }
+
+    return S_OK;
+}
+
+static BOOL RecordExeRestore(HANDLE hProcess, HMODULE hModule, DETOUR_EXE_RESTORE& der)
+{
+    // Save the various headers for DetourRestoreAfterWith.
+    ZeroMemory(&der, sizeof(der));
+    der.cb = sizeof(der);
+
+    der.pidh = (PBYTE)hModule;
+    der.cbidh = sizeof(der.idh);
+    if (!ReadProcessMemory(hProcess, der.pidh, &der.idh, sizeof(der.idh), NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %d\n",
+                      der.pidh, der.pidh + der.cbidh, GetLastError()));
+        return FALSE;
+    }
+    DETOUR_TRACE(("IDH: %p..%p\n", der.pidh, der.pidh + der.cbidh));
+
+    // We read the NT header in two passes to get the full size.
+    // First we read just the Signature and FileHeader.
+    der.pinh = der.pidh + der.idh.e_lfanew;
+    der.cbinh = FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader);
+    if (!ReadProcessMemory(hProcess, der.pinh, &der.inh, der.cbinh, NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %d\n",
+                      der.pinh, der.pinh + der.cbinh, GetLastError()));
+        return FALSE;
+    }
+
+    // Second we read the OptionalHeader and Section headers.
+    der.cbinh = (FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader) +
+                 der.inh.FileHeader.SizeOfOptionalHeader +
+                 der.inh.FileHeader.NumberOfSections * sizeof(IMAGE_SECTION_HEADER));
+
+    if (der.cbinh > sizeof(der.raw)) {
+        return FALSE;
+    }
+
+    if (!ReadProcessMemory(hProcess, der.pinh, &der.inh, der.cbinh, NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %d\n",
+                      der.pinh, der.pinh + der.cbinh, GetLastError()));
+        return FALSE;
+    }
+    DETOUR_TRACE(("INH: %p..%p\n", der.pinh, der.pinh + der.cbinh));
+
+    // Third, we read the CLR header
+
+    if (der.inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) {
+        if (der.inh32.CLR_DIRECTORY.VirtualAddress != 0 &&
+            der.inh32.CLR_DIRECTORY.Size != 0) {
+
+            DETOUR_TRACE(("CLR32.VirtAddr=%x, CLR.Size=%x\n",
+                          der.inh32.CLR_DIRECTORY.VirtualAddress,
+                          der.inh32.CLR_DIRECTORY.Size));
+
+            der.pclr = ((PBYTE)hModule) + der.inh32.CLR_DIRECTORY.VirtualAddress;
+        }
+    }
+    else if (der.inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) {
+        if (der.inh64.CLR_DIRECTORY.VirtualAddress != 0 &&
+            der.inh64.CLR_DIRECTORY.Size != 0) {
+
+            DETOUR_TRACE(("CLR64.VirtAddr=%x, CLR.Size=%x\n",
+                          der.inh64.CLR_DIRECTORY.VirtualAddress,
+                          der.inh64.CLR_DIRECTORY.Size));
+
+            der.pclr = ((PBYTE)hModule) + der.inh64.CLR_DIRECTORY.VirtualAddress;
+        }
+    }
+
+    if (der.pclr != 0) {
+        der.cbclr = sizeof(der.clr);
+        if (!ReadProcessMemory(hProcess, der.pclr, &der.clr, der.cbclr, NULL)) {
+            DETOUR_TRACE(("ReadProcessMemory(clr@%p..%p) failed: %d\n",
+                          der.pclr, der.pclr + der.cbclr, GetLastError()));
+            return FALSE;
+        }
+        DETOUR_TRACE(("CLR: %p..%p\n", der.pclr, der.pclr + der.cbclr));
+    }
+
+    return TRUE;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+#if DETOURS_32BIT
+#define DWORD_XX                        DWORD32
+#define IMAGE_NT_HEADERS_XX             IMAGE_NT_HEADERS32
+#define IMAGE_NT_OPTIONAL_HDR_MAGIC_XX  IMAGE_NT_OPTIONAL_HDR32_MAGIC
+#define IMAGE_ORDINAL_FLAG_XX           IMAGE_ORDINAL_FLAG32
+#define UPDATE_IMPORTS_XX               UpdateImports32
+#define DETOURS_BITS_XX                 32
+#include "uimports.cpp"
+#undef DETOUR_EXE_RESTORE_FIELD_XX
+#undef DWORD_XX
+#undef IMAGE_NT_HEADERS_XX
+#undef IMAGE_NT_OPTIONAL_HDR_MAGIC_XX
+#undef IMAGE_ORDINAL_FLAG_XX
+#undef UPDATE_IMPORTS_XX
+#endif // DETOURS_32BIT
+
+#if DETOURS_64BIT
+#define DWORD_XX                        DWORD64
+#define IMAGE_NT_HEADERS_XX             IMAGE_NT_HEADERS64
+#define IMAGE_NT_OPTIONAL_HDR_MAGIC_XX  IMAGE_NT_OPTIONAL_HDR64_MAGIC
+#define IMAGE_ORDINAL_FLAG_XX           IMAGE_ORDINAL_FLAG64
+#define UPDATE_IMPORTS_XX               UpdateImports64
+#define DETOURS_BITS_XX                 64
+#include "uimports.cpp"
+#undef DETOUR_EXE_RESTORE_FIELD_XX
+#undef DWORD_XX
+#undef IMAGE_NT_HEADERS_XX
+#undef IMAGE_NT_OPTIONAL_HDR_MAGIC_XX
+#undef IMAGE_ORDINAL_FLAG_XX
+#undef UPDATE_IMPORTS_XX
+#endif // DETOURS_64BIT
+
+//////////////////////////////////////////////////////////////////////////////
+//
+#if DETOURS_64BIT
+
+C_ASSERT(sizeof(IMAGE_NT_HEADERS64) == sizeof(IMAGE_NT_HEADERS32) + 16);
+
+static BOOL UpdateFrom32To64(HANDLE hProcess, HMODULE hModule, WORD machine,
+                             DETOUR_EXE_RESTORE& der)
+{
+    IMAGE_DOS_HEADER idh;
+    IMAGE_NT_HEADERS32 inh32;
+    IMAGE_NT_HEADERS64 inh64;
+    IMAGE_SECTION_HEADER sects[32];
+    PBYTE pbModule = (PBYTE)hModule;
+    DWORD n;
+
+    ZeroMemory(&inh32, sizeof(inh32));
+    ZeroMemory(&inh64, sizeof(inh64));
+    ZeroMemory(sects, sizeof(sects));
+
+    DETOUR_TRACE(("UpdateFrom32To64(%04x)\n", machine));
+    //////////////////////////////////////////////////////// Read old headers.
+    //
+    if (!ReadProcessMemory(hProcess, pbModule, &idh, sizeof(idh), NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %d\n",
+                      pbModule, pbModule + sizeof(idh), GetLastError()));
+        return FALSE;
+    }
+    DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p)\n",
+                  pbModule, pbModule + sizeof(idh)));
+
+    PBYTE pnh = pbModule + idh.e_lfanew;
+    if (!ReadProcessMemory(hProcess, pnh, &inh32, sizeof(inh32), NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %d\n",
+                      pnh, pnh + sizeof(inh32), GetLastError()));
+        return FALSE;
+    }
+    DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p)\n", pnh, pnh + sizeof(inh32)));
+
+    if (inh32.FileHeader.NumberOfSections > (sizeof(sects)/sizeof(sects[0]))) {
+        return FALSE;
+    }
+
+    PBYTE psects = pnh +
+        FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader) +
+        inh32.FileHeader.SizeOfOptionalHeader;
+    ULONG cb = inh32.FileHeader.NumberOfSections * sizeof(IMAGE_SECTION_HEADER);
+    if (!ReadProcessMemory(hProcess, psects, &sects, cb, NULL)) {
+        DETOUR_TRACE(("ReadProcessMemory(ish@%p..%p) failed: %d\n",
+                      psects, psects + cb, GetLastError()));
+        return FALSE;
+    }
+    DETOUR_TRACE(("ReadProcessMemory(ish@%p..%p)\n", psects, psects + cb));
+
+    ////////////////////////////////////////////////////////// Convert header.
+    //
+    inh64.Signature = inh32.Signature;
+    inh64.FileHeader = inh32.FileHeader;
+    inh64.FileHeader.Machine = machine;
+    inh64.FileHeader.SizeOfOptionalHeader = sizeof(IMAGE_OPTIONAL_HEADER64);
+
+    inh64.OptionalHeader.Magic = IMAGE_NT_OPTIONAL_HDR64_MAGIC;
+    inh64.OptionalHeader.MajorLinkerVersion = inh32.OptionalHeader.MajorLinkerVersion;
+    inh64.OptionalHeader.MinorLinkerVersion = inh32.OptionalHeader.MinorLinkerVersion;
+    inh64.OptionalHeader.SizeOfCode = inh32.OptionalHeader.SizeOfCode;
+    inh64.OptionalHeader.SizeOfInitializedData = inh32.OptionalHeader.SizeOfInitializedData;
+    inh64.OptionalHeader.SizeOfUninitializedData = inh32.OptionalHeader.SizeOfUninitializedData;
+    inh64.OptionalHeader.AddressOfEntryPoint = inh32.OptionalHeader.AddressOfEntryPoint;
+    inh64.OptionalHeader.BaseOfCode = inh32.OptionalHeader.BaseOfCode;
+    inh64.OptionalHeader.ImageBase = inh32.OptionalHeader.ImageBase;
+    inh64.OptionalHeader.SectionAlignment = inh32.OptionalHeader.SectionAlignment;
+    inh64.OptionalHeader.FileAlignment = inh32.OptionalHeader.FileAlignment;
+    inh64.OptionalHeader.MajorOperatingSystemVersion
+        = inh32.OptionalHeader.MajorOperatingSystemVersion;
+    inh64.OptionalHeader.MinorOperatingSystemVersion
+        = inh32.OptionalHeader.MinorOperatingSystemVersion;
+    inh64.OptionalHeader.MajorImageVersion = inh32.OptionalHeader.MajorImageVersion;
+    inh64.OptionalHeader.MinorImageVersion = inh32.OptionalHeader.MinorImageVersion;
+    inh64.OptionalHeader.MajorSubsystemVersion = inh32.OptionalHeader.MajorSubsystemVersion;
+    inh64.OptionalHeader.MinorSubsystemVersion = inh32.OptionalHeader.MinorSubsystemVersion;
+    inh64.OptionalHeader.Win32VersionValue = inh32.OptionalHeader.Win32VersionValue;
+    inh64.OptionalHeader.SizeOfImage = inh32.OptionalHeader.SizeOfImage;
+    inh64.OptionalHeader.SizeOfHeaders = inh32.OptionalHeader.SizeOfHeaders;
+    inh64.OptionalHeader.CheckSum = inh32.OptionalHeader.CheckSum;
+    inh64.OptionalHeader.Subsystem = inh32.OptionalHeader.Subsystem;
+    inh64.OptionalHeader.DllCharacteristics = inh32.OptionalHeader.DllCharacteristics;
+    inh64.OptionalHeader.SizeOfStackReserve = inh32.OptionalHeader.SizeOfStackReserve;
+    inh64.OptionalHeader.SizeOfStackCommit = inh32.OptionalHeader.SizeOfStackCommit;
+    inh64.OptionalHeader.SizeOfHeapReserve = inh32.OptionalHeader.SizeOfHeapReserve;
+    inh64.OptionalHeader.SizeOfHeapCommit = inh32.OptionalHeader.SizeOfHeapCommit;
+    inh64.OptionalHeader.LoaderFlags = inh32.OptionalHeader.LoaderFlags;
+    inh64.OptionalHeader.NumberOfRvaAndSizes = inh32.OptionalHeader.NumberOfRvaAndSizes;
+    for (n = 0; n < IMAGE_NUMBEROF_DIRECTORY_ENTRIES; n++) {
+        inh64.OptionalHeader.DataDirectory[n] = inh32.OptionalHeader.DataDirectory[n];
+    }
+
+    /////////////////////////////////////////////////////// Write new headers.
+    //
+    DWORD dwProtect = 0;
+    if (!DetourVirtualProtectSameExecuteEx(hProcess, pbModule, inh64.OptionalHeader.SizeOfHeaders,
+                                           PAGE_EXECUTE_READWRITE, &dwProtect)) {
+        return FALSE;
+    }
+
+    if (!WriteProcessMemory(hProcess, pnh, &inh64, sizeof(inh64), NULL)) {
+        DETOUR_TRACE(("WriteProcessMemory(inh@%p..%p) failed: %d\n",
+                      pnh, pnh + sizeof(inh64), GetLastError()));
+        return FALSE;
+    }
+    DETOUR_TRACE(("WriteProcessMemory(inh@%p..%p)\n", pnh, pnh + sizeof(inh64)));
+
+    psects = pnh +
+        FIELD_OFFSET(IMAGE_NT_HEADERS, OptionalHeader) +
+        inh64.FileHeader.SizeOfOptionalHeader;
+    cb = inh64.FileHeader.NumberOfSections * sizeof(IMAGE_SECTION_HEADER);
+    if (!WriteProcessMemory(hProcess, psects, &sects, cb, NULL)) {
+        DETOUR_TRACE(("WriteProcessMemory(ish@%p..%p) failed: %d\n",
+                      psects, psects + cb, GetLastError()));
+        return FALSE;
+    }
+    DETOUR_TRACE(("WriteProcessMemory(ish@%p..%p)\n", psects, psects + cb));
+
+    // Record the updated headers.
+    if (!RecordExeRestore(hProcess, hModule, der)) {
+        return FALSE;
+    }
+
+    // Remove the import table.
+    if (der.pclr != NULL && (der.clr.Flags & 1)) {
+        inh64.IMPORT_DIRECTORY.VirtualAddress = 0;
+        inh64.IMPORT_DIRECTORY.Size = 0;
+
+        if (!WriteProcessMemory(hProcess, pnh, &inh64, sizeof(inh64), NULL)) {
+            DETOUR_TRACE(("WriteProcessMemory(inh@%p..%p) failed: %d\n",
+                          pnh, pnh + sizeof(inh64), GetLastError()));
+            return FALSE;
+        }
+    }
+
+    DWORD dwOld = 0;
+    if (!VirtualProtectEx(hProcess, pbModule, inh64.OptionalHeader.SizeOfHeaders,
+                          dwProtect, &dwOld)) {
+        return FALSE;
+    }
+
+    return TRUE;
+}
+#endif // DETOURS_64BIT
+
+//////////////////////////////////////////////////////////////////////////////
+//
+BOOL WINAPI DetourUpdateProcessWithDll(_In_ HANDLE hProcess,
+                                       _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                       _In_ DWORD nDlls)
+{
+    // Find the next memory region that contains a mapped PE image.
+    //
+    BOOL bHas64BitDll = FALSE;
+    BOOL bHas32BitExe = FALSE;
+    BOOL bIs32BitProcess;
+    HMODULE hModule = NULL;
+    HMODULE hLast = NULL;
+
+    DETOUR_TRACE(("DetourUpdateProcessWithDll(%p,dlls=%d)\n", hProcess, nDlls));
+
+    for (;;) {
+        IMAGE_NT_HEADERS32 inh;
+
+        if ((hLast = EnumerateModulesInProcess(hProcess, hLast, &inh)) == NULL) {
+            break;
+        }
+
+        DETOUR_TRACE(("%p  machine=%04x magic=%04x\n",
+                      hLast, inh.FileHeader.Machine, inh.OptionalHeader.Magic));
+
+        if ((inh.FileHeader.Characteristics & IMAGE_FILE_DLL) == 0) {
+            hModule = hLast;
+            if (inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC
+                && inh.FileHeader.Machine != 0) {
+
+                bHas32BitExe = TRUE;
+            }
+            DETOUR_TRACE(("%p  Found EXE\n", hLast));
+        }
+        else {
+            if (inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC
+                && inh.FileHeader.Machine != 0) {
+
+                bHas64BitDll = TRUE;
+            }
+        }
+    }
+
+    if (hModule == NULL) {
+        SetLastError(ERROR_INVALID_OPERATION);
+        return FALSE;
+    }
+
+    if (!bHas32BitExe) {
+        bIs32BitProcess = FALSE;
+    }
+    else if (!bHas64BitDll) {
+        bIs32BitProcess = TRUE;
+    }
+    else {
+        if (!IsWow64Process(hProcess, &bIs32BitProcess)) {
+            return FALSE;
+        }
+    }
+
+    DETOUR_TRACE(("    32BitExe=%d 32BitProcess\n", bHas32BitExe, bIs32BitProcess));
+
+    return DetourUpdateProcessWithDllEx(hProcess,
+                                        hModule,
+                                        bIs32BitProcess,
+                                        rlpDlls,
+                                        nDlls);
+}
+
+BOOL WINAPI DetourUpdateProcessWithDllEx(_In_ HANDLE hProcess,
+                                         _In_ HMODULE hModule,
+                                         _In_ BOOL bIs32BitProcess,
+                                         _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                         _In_ DWORD nDlls)
+{
+    // Find the next memory region that contains a mapped PE image.
+    //
+    BOOL bIs32BitExe = FALSE;
+
+    DETOUR_TRACE(("DetourUpdateProcessWithDllEx(%p,%p,dlls=%d)\n", hProcess, hModule, nDlls));
+
+    IMAGE_NT_HEADERS32 inh;
+
+    if (hModule == NULL || LoadNtHeaderFromProcess(hProcess, hModule, &inh) == NULL) {
+        SetLastError(ERROR_INVALID_OPERATION);
+        return FALSE;
+    }
+
+    if (inh.OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC
+        && inh.FileHeader.Machine != 0) {
+
+        bIs32BitExe = TRUE;
+    }
+
+    DETOUR_TRACE(("    32BitExe=%d 32BitProcess\n", bIs32BitExe, bIs32BitProcess));
+
+    if (hModule == NULL) {
+        SetLastError(ERROR_INVALID_OPERATION);
+        return FALSE;
+    }
+
+    // Save the various headers for DetourRestoreAfterWith.
+    //
+    DETOUR_EXE_RESTORE der;
+
+    if (!RecordExeRestore(hProcess, hModule, der)) {
+        return FALSE;
+    }
+
+#if defined(DETOURS_64BIT)
+    // Try to convert a neutral 32-bit managed binary to a 64-bit managed binary.
+    if (bIs32BitExe && !bIs32BitProcess) {
+        if (!der.pclr                       // Native binary
+            || (der.clr.Flags & 1) == 0     // Or mixed-mode MSIL
+            || (der.clr.Flags & 2) != 0) {  // Or 32BIT Required MSIL
+
+            SetLastError(ERROR_INVALID_HANDLE);
+            return FALSE;
+        }
+
+        if (!UpdateFrom32To64(hProcess, hModule,
+#if defined(DETOURS_X64)
+                              IMAGE_FILE_MACHINE_AMD64,
+#elif defined(DETOURS_IA64)
+                              IMAGE_FILE_MACHINE_IA64,
+#elif defined(DETOURS_ARM64)
+                              IMAGE_FILE_MACHINE_ARM64,
+#else
+#error Must define one of DETOURS_X64 or DETOURS_IA64 or DETOURS_ARM64 on 64-bit.
+#endif
+                              der)) {
+            return FALSE;
+        }
+        bIs32BitExe = FALSE;
+    }
+#endif // DETOURS_64BIT
+
+    // Now decide if we can insert the detour.
+
+#if defined(DETOURS_32BIT)
+    if (bIs32BitProcess) {
+        // 32-bit native or 32-bit managed process on any platform.
+        if (!UpdateImports32(hProcess, hModule, rlpDlls, nDlls)) {
+            return FALSE;
+        }
+    }
+    else {
+        // 64-bit native or 64-bit managed process.
+        //
+        // Can't detour a 64-bit process with 32-bit code.
+        // Note: This happens for 32-bit PE binaries containing only
+        // manage code that have been marked as 64-bit ready.
+        //
+        SetLastError(ERROR_INVALID_HANDLE);
+        return FALSE;
+    }
+#elif defined(DETOURS_64BIT)
+    if (bIs32BitProcess || bIs32BitExe) {
+        // Can't detour a 32-bit process with 64-bit code.
+        SetLastError(ERROR_INVALID_HANDLE);
+        return FALSE;
+    }
+    else {
+        // 64-bit native or 64-bit managed process on any platform.
+        if (!UpdateImports64(hProcess, hModule, rlpDlls, nDlls)) {
+            return FALSE;
+        }
+    }
+#else
+#pragma Must define one of DETOURS_32BIT or DETOURS_64BIT.
+#endif // DETOURS_64BIT
+
+    /////////////////////////////////////////////////// Update the CLR header.
+    //
+    if (der.pclr != NULL) {
+        DETOUR_CLR_HEADER clr;
+        CopyMemory(&clr, &der.clr, sizeof(clr));
+        clr.Flags &= 0xfffffffe;    // Clear the IL_ONLY flag.
+
+        DWORD dwProtect;
+        if (!DetourVirtualProtectSameExecuteEx(hProcess, der.pclr, sizeof(clr), PAGE_READWRITE, &dwProtect)) {
+            DETOUR_TRACE(("VirtualProtectEx(clr) write failed: %d\n", GetLastError()));
+            return FALSE;
+        }
+
+        if (!WriteProcessMemory(hProcess, der.pclr, &clr, sizeof(clr), NULL)) {
+            DETOUR_TRACE(("WriteProcessMemory(clr) failed: %d\n", GetLastError()));
+            return FALSE;
+        }
+
+        if (!VirtualProtectEx(hProcess, der.pclr, sizeof(clr), dwProtect, &dwProtect)) {
+            DETOUR_TRACE(("VirtualProtectEx(clr) restore failed: %d\n", GetLastError()));
+            return FALSE;
+        }
+        DETOUR_TRACE(("CLR: %p..%p\n", der.pclr, der.pclr + der.cbclr));
+
+#if DETOURS_64BIT
+        if (der.clr.Flags & 0x2) { // Is the 32BIT Required Flag set?
+            // X64 never gets here because the process appears as a WOW64 process.
+            // However, on IA64, it doesn't appear to be a WOW process.
+            DETOUR_TRACE(("CLR Requires 32-bit\n", der.pclr, der.pclr + der.cbclr));
+            SetLastError(ERROR_INVALID_HANDLE);
+            return FALSE;
+        }
+#endif // DETOURS_64BIT
+    }
+
+    //////////////////////////////// Save the undo data to the target process.
+    //
+    if (!DetourCopyPayloadToProcess(hProcess, DETOUR_EXE_RESTORE_GUID, &der, sizeof(der))) {
+        DETOUR_TRACE(("DetourCopyPayloadToProcess failed: %d\n", GetLastError()));
+        return FALSE;
+    }
+    return TRUE;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+BOOL WINAPI DetourCreateProcessWithDllA(_In_opt_ LPCSTR lpApplicationName,
+                                        _Inout_opt_ LPSTR lpCommandLine,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                        _In_ BOOL bInheritHandles,
+                                        _In_ DWORD dwCreationFlags,
+                                        _In_opt_ LPVOID lpEnvironment,
+                                        _In_opt_ LPCSTR lpCurrentDirectory,
+                                        _In_ LPSTARTUPINFOA lpStartupInfo,
+                                        _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                        _In_ LPCSTR lpDllName,
+                                        _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA)
+{
+    DWORD dwMyCreationFlags = (dwCreationFlags | CREATE_SUSPENDED);
+    PROCESS_INFORMATION pi;
+    BOOL fResult = FALSE;
+
+    if (pfCreateProcessA == NULL) {
+        pfCreateProcessA = CreateProcessA;
+    }
+
+    fResult = pfCreateProcessA(lpApplicationName,
+                               lpCommandLine,
+                               lpProcessAttributes,
+                               lpThreadAttributes,
+                               bInheritHandles,
+                               dwMyCreationFlags,
+                               lpEnvironment,
+                               lpCurrentDirectory,
+                               lpStartupInfo,
+                               &pi);
+
+    if (lpProcessInformation != NULL) {
+        CopyMemory(lpProcessInformation, &pi, sizeof(pi));
+    }
+
+    if (!fResult) {
+        return FALSE;
+    }
+
+    LPCSTR rlpDlls[2];
+    DWORD nDlls = 0;
+    if (lpDllName != NULL) {
+        rlpDlls[nDlls++] = lpDllName;
+    }
+
+    if (!DetourUpdateProcessWithDll(pi.hProcess, rlpDlls, nDlls)) {
+        TerminateProcess(pi.hProcess, ~0u);
+        return FALSE;
+    }
+
+    if (!(dwCreationFlags & CREATE_SUSPENDED)) {
+        ResumeThread(pi.hThread);
+    }
+    return TRUE;
+}
+
+
+BOOL WINAPI DetourCreateProcessWithDllW(_In_opt_ LPCWSTR lpApplicationName,
+                                        _Inout_opt_ LPWSTR lpCommandLine,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                        _In_ BOOL bInheritHandles,
+                                        _In_ DWORD dwCreationFlags,
+                                        _In_opt_ LPVOID lpEnvironment,
+                                        _In_opt_ LPCWSTR lpCurrentDirectory,
+                                        _In_ LPSTARTUPINFOW lpStartupInfo,
+                                        _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                        _In_ LPCSTR lpDllName,
+                                        _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW)
+{
+    DWORD dwMyCreationFlags = (dwCreationFlags | CREATE_SUSPENDED);
+    PROCESS_INFORMATION pi;
+
+    if (pfCreateProcessW == NULL) {
+        pfCreateProcessW = CreateProcessW;
+    }
+
+    BOOL fResult = pfCreateProcessW(lpApplicationName,
+                                    lpCommandLine,
+                                    lpProcessAttributes,
+                                    lpThreadAttributes,
+                                    bInheritHandles,
+                                    dwMyCreationFlags,
+                                    lpEnvironment,
+                                    lpCurrentDirectory,
+                                    lpStartupInfo,
+                                    &pi);
+
+    if (lpProcessInformation) {
+        CopyMemory(lpProcessInformation, &pi, sizeof(pi));
+    }
+
+    if (!fResult) {
+        return FALSE;
+    }
+
+    LPCSTR rlpDlls[2];
+    DWORD nDlls = 0;
+    if (lpDllName != NULL) {
+        rlpDlls[nDlls++] = lpDllName;
+    }
+
+    if (!DetourUpdateProcessWithDll(pi.hProcess, rlpDlls, nDlls)) {
+        TerminateProcess(pi.hProcess, ~0u);
+        return FALSE;
+    }
+
+    if (!(dwCreationFlags & CREATE_SUSPENDED)) {
+        ResumeThread(pi.hThread);
+    }
+    return TRUE;
+}
+
+BOOL WINAPI DetourCopyPayloadToProcess(_In_ HANDLE hProcess,
+                                       _In_ REFGUID rguid,
+                                       _In_reads_bytes_(cbData) PVOID pvData,
+                                       _In_ DWORD cbData)
+{
+    DWORD cbTotal = (sizeof(IMAGE_DOS_HEADER) +
+                     sizeof(IMAGE_NT_HEADERS) +
+                     sizeof(IMAGE_SECTION_HEADER) +
+                     sizeof(DETOUR_SECTION_HEADER) +
+                     sizeof(DETOUR_SECTION_RECORD) +
+                     cbData);
+
+    PBYTE pbBase = (PBYTE)VirtualAllocEx(hProcess, NULL, cbTotal,
+                                         MEM_COMMIT, PAGE_READWRITE);
+    if (pbBase == NULL) {
+        DETOUR_TRACE(("VirtualAllocEx(%d) failed: %d\n", cbTotal, GetLastError()));
+        return FALSE;
+    }
+
+    PBYTE pbTarget = pbBase;
+    IMAGE_DOS_HEADER idh;
+    IMAGE_NT_HEADERS inh;
+    IMAGE_SECTION_HEADER ish;
+    DETOUR_SECTION_HEADER dsh;
+    DETOUR_SECTION_RECORD dsr;
+    SIZE_T cbWrote = 0;
+
+    ZeroMemory(&idh, sizeof(idh));
+    idh.e_magic = IMAGE_DOS_SIGNATURE;
+    idh.e_lfanew = sizeof(idh);
+    if (!WriteProcessMemory(hProcess, pbTarget, &idh, sizeof(idh), &cbWrote) ||
+        cbWrote != sizeof(idh)) {
+        DETOUR_TRACE(("WriteProcessMemory(idh) failed: %d\n", GetLastError()));
+        return FALSE;
+    }
+    pbTarget += sizeof(idh);
+
+    ZeroMemory(&inh, sizeof(inh));
+    inh.Signature = IMAGE_NT_SIGNATURE;
+    inh.FileHeader.SizeOfOptionalHeader = sizeof(inh.OptionalHeader);
+    inh.FileHeader.Characteristics = IMAGE_FILE_DLL;
+    inh.FileHeader.NumberOfSections = 1;
+    inh.OptionalHeader.Magic = IMAGE_NT_OPTIONAL_HDR_MAGIC;
+    if (!WriteProcessMemory(hProcess, pbTarget, &inh, sizeof(inh), &cbWrote) ||
+        cbWrote != sizeof(inh)) {
+        return FALSE;
+    }
+    pbTarget += sizeof(inh);
+
+    ZeroMemory(&ish, sizeof(ish));
+    memcpy(ish.Name, ".detour", sizeof(ish.Name));
+    ish.VirtualAddress = (DWORD)((pbTarget + sizeof(ish)) - pbBase);
+    ish.SizeOfRawData = (sizeof(DETOUR_SECTION_HEADER) +
+                         sizeof(DETOUR_SECTION_RECORD) +
+                         cbData);
+    if (!WriteProcessMemory(hProcess, pbTarget, &ish, sizeof(ish), &cbWrote) ||
+        cbWrote != sizeof(ish)) {
+        return FALSE;
+    }
+    pbTarget += sizeof(ish);
+
+    ZeroMemory(&dsh, sizeof(dsh));
+    dsh.cbHeaderSize = sizeof(dsh);
+    dsh.nSignature = DETOUR_SECTION_HEADER_SIGNATURE;
+    dsh.nDataOffset = sizeof(DETOUR_SECTION_HEADER);
+    dsh.cbDataSize = (sizeof(DETOUR_SECTION_HEADER) +
+                      sizeof(DETOUR_SECTION_RECORD) +
+                      cbData);
+    if (!WriteProcessMemory(hProcess, pbTarget, &dsh, sizeof(dsh), &cbWrote) ||
+        cbWrote != sizeof(dsh)) {
+        return FALSE;
+    }
+    pbTarget += sizeof(dsh);
+
+    ZeroMemory(&dsr, sizeof(dsr));
+    dsr.cbBytes = cbData + sizeof(DETOUR_SECTION_RECORD);
+    dsr.nReserved = 0;
+    dsr.guid = rguid;
+    if (!WriteProcessMemory(hProcess, pbTarget, &dsr, sizeof(dsr), &cbWrote) ||
+        cbWrote != sizeof(dsr)) {
+        return FALSE;
+    }
+    pbTarget += sizeof(dsr);
+
+    if (!WriteProcessMemory(hProcess, pbTarget, pvData, cbData, &cbWrote) ||
+        cbWrote != cbData) {
+        return FALSE;
+    }
+    pbTarget += cbData;
+
+    DETOUR_TRACE(("Copied %d byte payload into target process at %p\n",
+                  cbTotal, pbTarget - cbTotal));
+    return TRUE;
+}
+
+static BOOL s_fSearchedForHelper = FALSE;
+static PDETOUR_EXE_HELPER s_pHelper = NULL;
+
+VOID CALLBACK DetourFinishHelperProcess(_In_ HWND,
+                                        _In_ HINSTANCE,
+                                        _In_ LPSTR,
+                                        _In_ INT)
+{
+    LPCSTR * rlpDlls = NULL;
+    DWORD Result = 9900;
+    DWORD cOffset = 0;
+    DWORD cSize = 0;
+    HANDLE hProcess = NULL;
+
+    if (s_pHelper == NULL) {
+        DETOUR_TRACE(("DetourFinishHelperProcess called with s_pHelper = NULL.\n"));
+        Result = 9905;
+        goto Cleanup;
+    }
+
+    hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, s_pHelper->pid);
+    if (hProcess == NULL) {
+        DETOUR_TRACE(("OpenProcess(pid=%d) failed: %d\n",
+                      s_pHelper->pid, GetLastError()));
+        Result = 9901;
+        goto Cleanup;
+    }
+
+    rlpDlls = new NOTHROW LPCSTR [s_pHelper->nDlls];
+    cSize = s_pHelper->cb - sizeof(DETOUR_EXE_HELPER);
+    for (DWORD n = 0; n < s_pHelper->nDlls; n++) {
+        size_t cchDest = 0;
+        HRESULT hr = StringCchLengthA(&s_pHelper->rDlls[cOffset], cSize - cOffset, &cchDest);
+        if (!SUCCEEDED(hr)) {
+            Result = 9902;
+            goto Cleanup;
+        }
+
+        rlpDlls[n] = &s_pHelper->rDlls[cOffset];
+        cOffset += (DWORD)cchDest + 1;
+    }
+
+    if (!DetourUpdateProcessWithDll(hProcess, rlpDlls, s_pHelper->nDlls)) {
+        DETOUR_TRACE(("DetourUpdateProcessWithDll(pid=%d) failed: %d\n",
+                      s_pHelper->pid, GetLastError()));
+        Result = 9903;
+        goto Cleanup;
+    }
+    Result = 0;
+
+  Cleanup:
+    if (rlpDlls != NULL) {
+        delete[] rlpDlls;
+        rlpDlls = NULL;
+    }
+
+    ExitProcess(Result);
+}
+
+BOOL WINAPI DetourIsHelperProcess(VOID)
+{
+    PVOID pvData;
+    DWORD cbData;
+
+    if (s_fSearchedForHelper) {
+        return (s_pHelper != NULL);
+    }
+
+    s_fSearchedForHelper = TRUE;
+    pvData = DetourFindPayloadEx(DETOUR_EXE_HELPER_GUID, &cbData);
+
+    if (pvData == NULL || cbData < sizeof(DETOUR_EXE_HELPER)) {
+        return FALSE;
+    }
+
+    s_pHelper = (PDETOUR_EXE_HELPER)pvData;
+    if (s_pHelper->cb < sizeof(*s_pHelper)) {
+        s_pHelper = NULL;
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+static
+BOOL WINAPI AllocExeHelper(_Out_ PDETOUR_EXE_HELPER *pHelper,
+                           _In_ DWORD dwTargetPid,
+                           _In_ DWORD nDlls,
+                           _In_reads_(nDlls) LPCSTR *rlpDlls)
+{
+    PDETOUR_EXE_HELPER Helper = NULL;
+    BOOL Result = FALSE;
+    _Field_range_(0, cSize - 4) DWORD cOffset = 0;
+    DWORD cSize = 4;
+
+    if (pHelper == NULL) {
+        goto Cleanup;
+    }
+    *pHelper = NULL;
+
+    if (nDlls < 1 || nDlls > 4096) {
+        SetLastError(ERROR_INVALID_PARAMETER);
+        goto Cleanup;
+    }
+
+    for (DWORD n = 0; n < nDlls; n++) {
+        HRESULT hr;
+        size_t cchDest = 0;
+
+        hr = StringCchLengthA(rlpDlls[n], 4096, &cchDest);
+        if (!SUCCEEDED(hr)) {
+            goto Cleanup;
+        }
+
+        cSize += (DWORD)cchDest + 1;
+    }
+
+    Helper = (PDETOUR_EXE_HELPER) new NOTHROW BYTE[sizeof(DETOUR_EXE_HELPER) + cSize];
+    if (Helper == NULL) {
+        goto Cleanup;
+    }
+
+    Helper->cb = sizeof(DETOUR_EXE_HELPER) + cSize;
+    Helper->pid = dwTargetPid;
+    Helper->nDlls = nDlls;
+
+    for (DWORD n = 0; n < nDlls; n++) {
+        HRESULT hr;
+        size_t cchDest = 0;
+
+        if (cOffset > 0x10000 || cSize > 0x10000 || cOffset + 2 >= cSize) {
+            goto Cleanup;
+        }
+
+        if (cOffset + 2 >= cSize || cOffset + 65536 < cSize) {
+            goto Cleanup;
+        }
+
+        _Analysis_assume_(cOffset + 1 < cSize);
+        _Analysis_assume_(cOffset < 0x10000);
+        _Analysis_assume_(cSize < 0x10000);
+
+        PCHAR psz = &Helper->rDlls[cOffset];
+
+        hr = StringCchCopyA(psz, cSize - cOffset, rlpDlls[n]);
+        if (!SUCCEEDED(hr)) {
+            goto Cleanup;
+        }
+
+// REVIEW 28020 The expression '1<=_Param_(2)& &_Param_(2)<=2147483647' is not true at this call.
+// REVIEW 28313 Analysis will not proceed past this point because of annotation evaluation. The annotation expression *_Param_(3)<_Param_(2)&&*_Param_(3)<=stringLength$(_Param_(1)) cannot be true under any assumptions at this point in the program.
+#pragma warning(suppress:28020 28313)
+        hr = StringCchLengthA(psz, cSize - cOffset, &cchDest);
+        if (!SUCCEEDED(hr)) {
+            goto Cleanup;
+        }
+
+        // Replace "32." with "64." or "64." with "32."
+
+        for (DWORD c = (DWORD)cchDest + 1; c > 3; c--) {
+#if DETOURS_32BIT
+            if (psz[c - 3] == '3' && psz[c - 2] == '2' && psz[c - 1] == '.') {
+                psz[c - 3] = '6'; psz[c - 2] = '4';
+                break;
+            }
+#else
+            if (psz[c - 3] == '6' && psz[c - 2] == '4' && psz[c - 1] == '.') {
+                psz[c - 3] = '3'; psz[c - 2] = '2';
+                break;
+            }
+#endif
+        }
+
+        cOffset += (DWORD)cchDest + 1;
+    }
+
+    *pHelper = Helper;
+    Helper = NULL;
+    Result = TRUE;
+
+  Cleanup:
+    if (Helper != NULL) {
+        delete[] (PBYTE)Helper;
+        Helper = NULL;
+    }
+    return Result;
+}
+
+static
+VOID WINAPI FreeExeHelper(PDETOUR_EXE_HELPER *pHelper)
+{
+    if (*pHelper != NULL) {
+        delete[] (PBYTE)*pHelper;
+        *pHelper = NULL;
+    }
+}
+
+BOOL WINAPI DetourProcessViaHelperA(_In_ DWORD dwTargetPid,
+                                    _In_ LPCSTR lpDllName,
+                                    _In_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA)
+{
+    return DetourProcessViaHelperDllsA(dwTargetPid, 1, &lpDllName, pfCreateProcessA);
+}
+
+
+BOOL WINAPI DetourProcessViaHelperDllsA(_In_ DWORD dwTargetPid,
+                                        _In_ DWORD nDlls,
+                                        _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                        _In_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA)
+{
+    BOOL Result = FALSE;
+    PROCESS_INFORMATION pi;
+    STARTUPINFOA si;
+    CHAR szExe[MAX_PATH];
+    CHAR szCommand[MAX_PATH];
+    PDETOUR_EXE_HELPER helper = NULL;
+    HRESULT hr;
+    DWORD nLen = GetEnvironmentVariableA("WINDIR", szExe, ARRAYSIZE(szExe));
+
+    DETOUR_TRACE(("DetourProcessViaHelperDlls(pid=%d,dlls=%d)\n", dwTargetPid, nDlls));
+    if (nDlls < 1 || nDlls > 4096) {
+        SetLastError(ERROR_INVALID_PARAMETER);
+        goto Cleanup;
+    }
+    if (!AllocExeHelper(&helper, dwTargetPid, nDlls, rlpDlls)) {
+        goto Cleanup;
+    }
+
+    if (nLen == 0 || nLen >= ARRAYSIZE(szExe)) {
+        goto Cleanup;
+    }
+
+#if DETOURS_OPTION_BITS
+#if DETOURS_32BIT
+    hr = StringCchCatA(szExe, ARRAYSIZE(szExe), "\\sysnative\\rundll32.exe");
+#else // !DETOURS_32BIT
+    hr = StringCchCatA(szExe, ARRAYSIZE(szExe), "\\syswow64\\rundll32.exe");
+#endif // !DETOURS_32BIT
+#else // DETOURS_OPTIONS_BITS
+    hr = StringCchCatA(szExe, ARRAYSIZE(szExe), "\\system32\\rundll32.exe");
+#endif // DETOURS_OPTIONS_BITS
+    if (!SUCCEEDED(hr)) {
+        goto Cleanup;
+    }
+
+    hr = StringCchPrintfA(szCommand, ARRAYSIZE(szCommand),
+                          "rundll32.exe \"%hs\",#1", &helper->rDlls[0]);
+    if (!SUCCEEDED(hr)) {
+        goto Cleanup;
+    }
+
+    ZeroMemory(&pi, sizeof(pi));
+    ZeroMemory(&si, sizeof(si));
+    si.cb = sizeof(si);
+
+    DETOUR_TRACE(("DetourProcessViaHelperDlls(\"%hs\", \"%hs\")\n", szExe, szCommand));
+    if (pfCreateProcessA(szExe, szCommand, NULL, NULL, FALSE, CREATE_SUSPENDED,
+                         NULL, NULL, &si, &pi)) {
+
+        if (!DetourCopyPayloadToProcess(pi.hProcess,
+                                        DETOUR_EXE_HELPER_GUID,
+                                        helper, helper->cb)) {
+            DETOUR_TRACE(("DetourCopyPayloadToProcess failed: %d\n", GetLastError()));
+            TerminateProcess(pi.hProcess, ~0u);
+            CloseHandle(pi.hProcess);
+            CloseHandle(pi.hThread);
+            goto Cleanup;
+        }
+
+        ResumeThread(pi.hThread);
+        WaitForSingleObject(pi.hProcess, INFINITE);
+
+        DWORD dwResult = 500;
+        GetExitCodeProcess(pi.hProcess, &dwResult);
+
+        CloseHandle(pi.hProcess);
+        CloseHandle(pi.hThread);
+
+        if (dwResult != 0) {
+            DETOUR_TRACE(("Rundll32.exe failed: result=%d\n", dwResult));
+            goto Cleanup;
+        }
+        Result = TRUE;
+    }
+    else {
+        DETOUR_TRACE(("CreateProcess failed: %d\n", GetLastError()));
+        goto Cleanup;
+    }
+
+  Cleanup:
+    FreeExeHelper(&helper);
+    return Result;
+}
+
+BOOL WINAPI DetourProcessViaHelperW(_In_ DWORD dwTargetPid,
+                                    _In_ LPCSTR lpDllName,
+                                    _In_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW)
+{
+    return DetourProcessViaHelperDllsW(dwTargetPid, 1, &lpDllName, pfCreateProcessW);
+}
+
+BOOL WINAPI DetourProcessViaHelperDllsW(_In_ DWORD dwTargetPid,
+                                        _In_ DWORD nDlls,
+                                        _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                        _In_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW)
+{
+    BOOL Result = FALSE;
+    PROCESS_INFORMATION pi;
+    STARTUPINFOW si;
+    WCHAR szExe[MAX_PATH];
+    WCHAR szCommand[MAX_PATH];
+    PDETOUR_EXE_HELPER helper = NULL;
+    HRESULT hr;
+    DWORD nLen = GetEnvironmentVariableW(L"WINDIR", szExe, ARRAYSIZE(szExe));
+
+    DETOUR_TRACE(("DetourProcessViaHelperDlls(pid=%d,dlls=%d)\n", dwTargetPid, nDlls));
+    if (nDlls < 1 || nDlls > 4096) {
+        SetLastError(ERROR_INVALID_PARAMETER);
+        goto Cleanup;
+    }
+    if (!AllocExeHelper(&helper, dwTargetPid, nDlls, rlpDlls)) {
+        goto Cleanup;
+    }
+
+    if (nLen == 0 || nLen >= ARRAYSIZE(szExe)) {
+        goto Cleanup;
+    }
+
+#if DETOURS_OPTION_BITS
+#if DETOURS_32BIT
+    hr = StringCchCatW(szExe, ARRAYSIZE(szExe), L"\\sysnative\\rundll32.exe");
+#else // !DETOURS_32BIT
+    hr = StringCchCatW(szExe, ARRAYSIZE(szExe), L"\\syswow64\\rundll32.exe");
+#endif // !DETOURS_32BIT
+#else // DETOURS_OPTIONS_BITS
+    hr = StringCchCatW(szExe, ARRAYSIZE(szExe), L"\\system32\\rundll32.exe");
+#endif // DETOURS_OPTIONS_BITS
+    if (!SUCCEEDED(hr)) {
+        goto Cleanup;
+    }
+
+    hr = StringCchPrintfW(szCommand, ARRAYSIZE(szCommand),
+                          L"rundll32.exe \"%hs\",#1", &helper->rDlls[0]);
+    if (!SUCCEEDED(hr)) {
+        goto Cleanup;
+    }
+
+    ZeroMemory(&pi, sizeof(pi));
+    ZeroMemory(&si, sizeof(si));
+    si.cb = sizeof(si);
+
+    DETOUR_TRACE(("DetourProcessViaHelperDlls(\"%ls\", \"%ls\")\n", szExe, szCommand));
+    if (pfCreateProcessW(szExe, szCommand, NULL, NULL, FALSE, CREATE_SUSPENDED,
+                         NULL, NULL, &si, &pi)) {
+
+        if (!DetourCopyPayloadToProcess(pi.hProcess,
+                                        DETOUR_EXE_HELPER_GUID,
+                                        helper, helper->cb)) {
+            DETOUR_TRACE(("DetourCopyPayloadToProcess failed: %d\n", GetLastError()));
+            TerminateProcess(pi.hProcess, ~0u);
+            CloseHandle(pi.hProcess);
+            CloseHandle(pi.hThread);
+            goto Cleanup;
+        }
+
+        ResumeThread(pi.hThread);
+
+        ResumeThread(pi.hThread);
+        WaitForSingleObject(pi.hProcess, INFINITE);
+
+        DWORD dwResult = 500;
+        GetExitCodeProcess(pi.hProcess, &dwResult);
+
+        CloseHandle(pi.hProcess);
+        CloseHandle(pi.hThread);
+
+        if (dwResult != 0) {
+            DETOUR_TRACE(("Rundll32.exe failed: result=%d\n", dwResult));
+            goto Cleanup;
+        }
+        Result = TRUE;
+    }
+    else {
+        DETOUR_TRACE(("CreateProcess failed: %d\n", GetLastError()));
+        goto Cleanup;
+    }
+
+  Cleanup:
+    FreeExeHelper(&helper);
+    return Result;
+}
+
+BOOL WINAPI DetourCreateProcessWithDllExA(_In_opt_ LPCSTR lpApplicationName,
+                                          _Inout_opt_ LPSTR lpCommandLine,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                          _In_ BOOL bInheritHandles,
+                                          _In_ DWORD dwCreationFlags,
+                                          _In_opt_ LPVOID lpEnvironment,
+                                          _In_opt_ LPCSTR lpCurrentDirectory,
+                                          _In_ LPSTARTUPINFOA lpStartupInfo,
+                                          _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                          _In_ LPCSTR lpDllName,
+                                          _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA)
+{
+    if (pfCreateProcessA == NULL) {
+        pfCreateProcessA = CreateProcessA;
+    }
+
+    PROCESS_INFORMATION backup;
+    if (lpProcessInformation == NULL) {
+        lpProcessInformation = &backup;
+        ZeroMemory(&backup, sizeof(backup));
+    }
+
+    if (!pfCreateProcessA(lpApplicationName,
+                          lpCommandLine,
+                          lpProcessAttributes,
+                          lpThreadAttributes,
+                          bInheritHandles,
+                          dwCreationFlags | CREATE_SUSPENDED,
+                          lpEnvironment,
+                          lpCurrentDirectory,
+                          lpStartupInfo,
+                          lpProcessInformation)) {
+        return FALSE;
+    }
+
+    LPCSTR szDll = lpDllName;
+
+    if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, &szDll, 1) &&
+        !DetourProcessViaHelperA(lpProcessInformation->dwProcessId,
+                                 lpDllName,
+                                 pfCreateProcessA)) {
+
+        TerminateProcess(lpProcessInformation->hProcess, ~0u);
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+        return FALSE;
+    }
+
+    if (!(dwCreationFlags & CREATE_SUSPENDED)) {
+        ResumeThread(lpProcessInformation->hThread);
+    }
+
+    if (lpProcessInformation == &backup) {
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+    }
+
+    return TRUE;
+}
+
+BOOL WINAPI DetourCreateProcessWithDllExW(_In_opt_ LPCWSTR lpApplicationName,
+                                          _Inout_opt_  LPWSTR lpCommandLine,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                          _In_ BOOL bInheritHandles,
+                                          _In_ DWORD dwCreationFlags,
+                                          _In_opt_ LPVOID lpEnvironment,
+                                          _In_opt_ LPCWSTR lpCurrentDirectory,
+                                          _In_ LPSTARTUPINFOW lpStartupInfo,
+                                          _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                          _In_ LPCSTR lpDllName,
+                                          _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW)
+{
+    if (pfCreateProcessW == NULL) {
+        pfCreateProcessW = CreateProcessW;
+    }
+
+    PROCESS_INFORMATION backup;
+    if (lpProcessInformation == NULL) {
+        lpProcessInformation = &backup;
+        ZeroMemory(&backup, sizeof(backup));
+    }
+
+    if (!pfCreateProcessW(lpApplicationName,
+                          lpCommandLine,
+                          lpProcessAttributes,
+                          lpThreadAttributes,
+                          bInheritHandles,
+                          dwCreationFlags | CREATE_SUSPENDED,
+                          lpEnvironment,
+                          lpCurrentDirectory,
+                          lpStartupInfo,
+                          lpProcessInformation)) {
+        return FALSE;
+    }
+
+
+    LPCSTR sz = lpDllName;
+
+    if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, &sz, 1) &&
+        !DetourProcessViaHelperW(lpProcessInformation->dwProcessId,
+                                 lpDllName,
+                                 pfCreateProcessW)) {
+
+        TerminateProcess(lpProcessInformation->hProcess, ~0u);
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+        return FALSE;
+    }
+
+    if (!(dwCreationFlags & CREATE_SUSPENDED)) {
+        ResumeThread(lpProcessInformation->hThread);
+    }
+
+    if (lpProcessInformation == &backup) {
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+    }
+    return TRUE;
+}
+
+BOOL WINAPI DetourCreateProcessWithDllsA(_In_opt_ LPCSTR lpApplicationName,
+                                         _Inout_opt_ LPSTR lpCommandLine,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                         _In_ BOOL bInheritHandles,
+                                         _In_ DWORD dwCreationFlags,
+                                         _In_opt_ LPVOID lpEnvironment,
+                                         _In_opt_ LPCSTR lpCurrentDirectory,
+                                         _In_ LPSTARTUPINFOA lpStartupInfo,
+                                         _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                         _In_ DWORD nDlls,
+                                         _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                         _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA)
+{
+    if (pfCreateProcessA == NULL) {
+        pfCreateProcessA = CreateProcessA;
+    }
+
+    PROCESS_INFORMATION backup;
+    if (lpProcessInformation == NULL) {
+        lpProcessInformation = &backup;
+        ZeroMemory(&backup, sizeof(backup));
+    }
+
+    if (!pfCreateProcessA(lpApplicationName,
+                          lpCommandLine,
+                          lpProcessAttributes,
+                          lpThreadAttributes,
+                          bInheritHandles,
+                          dwCreationFlags | CREATE_SUSPENDED,
+                          lpEnvironment,
+                          lpCurrentDirectory,
+                          lpStartupInfo,
+                          lpProcessInformation)) {
+        return FALSE;
+    }
+
+    if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, rlpDlls, nDlls) &&
+        !DetourProcessViaHelperDllsA(lpProcessInformation->dwProcessId,
+                                     nDlls,
+                                     rlpDlls,
+                                     pfCreateProcessA)) {
+
+        TerminateProcess(lpProcessInformation->hProcess, ~0u);
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+        return FALSE;
+    }
+
+    if (!(dwCreationFlags & CREATE_SUSPENDED)) {
+        ResumeThread(lpProcessInformation->hThread);
+    }
+
+    if (lpProcessInformation == &backup) {
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+    }
+
+    return TRUE;
+}
+
+BOOL WINAPI DetourCreateProcessWithDllsW(_In_opt_ LPCWSTR lpApplicationName,
+                                         _Inout_opt_ LPWSTR lpCommandLine,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                         _In_ BOOL bInheritHandles,
+                                         _In_ DWORD dwCreationFlags,
+                                         _In_opt_ LPVOID lpEnvironment,
+                                         _In_opt_ LPCWSTR lpCurrentDirectory,
+                                         _In_ LPSTARTUPINFOW lpStartupInfo,
+                                         _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                         _In_ DWORD nDlls,
+                                         _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                         _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW)
+{
+    if (pfCreateProcessW == NULL) {
+        pfCreateProcessW = CreateProcessW;
+    }
+
+    PROCESS_INFORMATION backup;
+    if (lpProcessInformation == NULL) {
+        lpProcessInformation = &backup;
+        ZeroMemory(&backup, sizeof(backup));
+    }
+
+    if (!pfCreateProcessW(lpApplicationName,
+                          lpCommandLine,
+                          lpProcessAttributes,
+                          lpThreadAttributes,
+                          bInheritHandles,
+                          dwCreationFlags | CREATE_SUSPENDED,
+                          lpEnvironment,
+                          lpCurrentDirectory,
+                          lpStartupInfo,
+                          lpProcessInformation)) {
+        return FALSE;
+    }
+
+
+    if (!DetourUpdateProcessWithDll(lpProcessInformation->hProcess, rlpDlls, nDlls) &&
+        !DetourProcessViaHelperDllsW(lpProcessInformation->dwProcessId,
+                                     nDlls,
+                                     rlpDlls,
+                                     pfCreateProcessW)) {
+
+        TerminateProcess(lpProcessInformation->hProcess, ~0u);
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+        return FALSE;
+    }
+
+    if (!(dwCreationFlags & CREATE_SUSPENDED)) {
+        ResumeThread(lpProcessInformation->hThread);
+    }
+
+    if (lpProcessInformation == &backup) {
+        CloseHandle(lpProcessInformation->hProcess);
+        CloseHandle(lpProcessInformation->hThread);
+    }
+    return TRUE;
+}
+
+//
+///////////////////////////////////////////////////////////////// End of File.
diff --git a/src/detours/detours.cpp b/src/detours/detours.cpp
new file mode 100644
index 0000000..fb18ef8
--- /dev/null
+++ b/src/detours/detours.cpp
@@ -0,0 +1,2489 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Core Detours Functionality (detours.cpp of detours.lib)
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+
+#pragma warning(disable:4068) // unknown pragma (suppress)
+
+#if _MSC_VER >= 1900
+#pragma warning(push)
+#pragma warning(disable:4091) // empty typedef
+#endif
+
+#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1
+#include <windows.h>
+
+#if (_MSC_VER < 1299)
+#pragma warning(disable: 4710)
+#endif
+
+//#define DETOUR_DEBUG 1
+#define DETOURS_INTERNAL
+
+#include "detours.h"
+
+#if DETOURS_VERSION != 0x4c0c1   // 0xMAJORcMINORcPATCH
+#error detours.h version mismatch
+#endif
+
+#if _MSC_VER >= 1900
+#pragma warning(pop)
+#endif
+
+#define NOTHROW
+
+//////////////////////////////////////////////////////////////////////////////
+//
+struct _DETOUR_ALIGN
+{
+    BYTE    obTarget        : 3;
+    BYTE    obTrampoline    : 5;
+};
+
+C_ASSERT(sizeof(_DETOUR_ALIGN) == 1);
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Region reserved for system DLLs, which cannot be used for trampolines.
+//
+static PVOID    s_pSystemRegionLowerBound   = (PVOID)(ULONG_PTR)0x70000000;
+static PVOID    s_pSystemRegionUpperBound   = (PVOID)(ULONG_PTR)0x80000000;
+
+//////////////////////////////////////////////////////////////////////////////
+//
+static bool detour_is_imported(PBYTE pbCode, PBYTE pbAddress)
+{
+    MEMORY_BASIC_INFORMATION mbi;
+    VirtualQuery((PVOID)pbCode, &mbi, sizeof(mbi));
+    __try {
+        PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)mbi.AllocationBase;
+        if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+            return false;
+        }
+
+        PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                          pDosHeader->e_lfanew);
+        if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+            return false;
+        }
+
+        if (pbAddress >= ((PBYTE)pDosHeader +
+                          pNtHeader->OptionalHeader
+                          .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress) &&
+            pbAddress < ((PBYTE)pDosHeader +
+                         pNtHeader->OptionalHeader
+                         .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress +
+                         pNtHeader->OptionalHeader
+                         .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].Size)) {
+            return true;
+        }
+    }
+#pragma prefast(suppress:28940, "A bad pointer means this probably isn't a PE header.")
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        return false;
+    }
+    return false;
+}
+
+inline ULONG_PTR detour_2gb_below(ULONG_PTR address)
+{
+    return (address > (ULONG_PTR)0x7ff80000) ? address - 0x7ff80000 : 0x80000;
+}
+
+inline ULONG_PTR detour_2gb_above(ULONG_PTR address)
+{
+#if defined(DETOURS_64BIT)
+    return (address < (ULONG_PTR)0xffffffff80000000) ? address + 0x7ff80000 : (ULONG_PTR)0xfffffffffff80000;
+#else
+    return (address < (ULONG_PTR)0x80000000) ? address + 0x7ff80000 : (ULONG_PTR)0xfff80000;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////// X86.
+//
+#ifdef DETOURS_X86
+
+struct _DETOUR_TRAMPOLINE
+{
+    BYTE            rbCode[30];     // target code + jmp to pbRemain
+    BYTE            cbCode;         // size of moved target code.
+    BYTE            cbCodeBreak;    // padding to make debugging easier.
+    BYTE            rbRestore[22];  // original target code.
+    BYTE            cbRestore;      // size of original target code.
+    BYTE            cbRestoreBreak; // padding to make debugging easier.
+    _DETOUR_ALIGN   rAlign[8];      // instruction alignment array.
+    PBYTE           pbRemain;       // first instruction after moved code. [free list]
+    PBYTE           pbDetour;       // first instruction of detour function.
+};
+
+C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 72);
+
+enum {
+    SIZE_OF_JMP = 5
+};
+
+inline PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE pbJmpVal)
+{
+    PBYTE pbJmpSrc = pbCode + 5;
+    *pbCode++ = 0xE9;   // jmp +imm32
+    *((INT32*&)pbCode)++ = (INT32)(pbJmpVal - pbJmpSrc);
+    return pbCode;
+}
+
+inline PBYTE detour_gen_jmp_indirect(PBYTE pbCode, PBYTE *ppbJmpVal)
+{
+    *pbCode++ = 0xff;   // jmp [+imm32]
+    *pbCode++ = 0x25;
+    *((INT32*&)pbCode)++ = (INT32)((PBYTE)ppbJmpVal);
+    return pbCode;
+}
+
+inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
+{
+    while (pbCode < pbLimit) {
+        *pbCode++ = 0xcc;   // brk;
+    }
+    return pbCode;
+}
+
+inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
+{
+    if (pbCode == NULL) {
+        return NULL;
+    }
+    if (ppGlobals != NULL) {
+        *ppGlobals = NULL;
+    }
+
+    // First, skip over the import vector if there is one.
+    if (pbCode[0] == 0xff && pbCode[1] == 0x25) {   // jmp [imm32]
+        // Looks like an import alias jump, then get the code it points to.
+        PBYTE pbTarget = *(UNALIGNED PBYTE *)&pbCode[2];
+        if (detour_is_imported(pbCode, pbTarget)) {
+            PBYTE pbNew = *(UNALIGNED PBYTE *)pbTarget;
+            DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
+            pbCode = pbNew;
+        }
+    }
+
+    // Then, skip over a patch jump
+    if (pbCode[0] == 0xeb) {   // jmp +imm8
+        PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
+        DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
+        pbCode = pbNew;
+
+        // First, skip over the import vector if there is one.
+        if (pbCode[0] == 0xff && pbCode[1] == 0x25) {   // jmp [imm32]
+            // Looks like an import alias jump, then get the code it points to.
+            PBYTE pbTarget = *(UNALIGNED PBYTE *)&pbCode[2];
+            if (detour_is_imported(pbCode, pbTarget)) {
+                pbNew = *(UNALIGNED PBYTE *)pbTarget;
+                DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
+                pbCode = pbNew;
+            }
+        }
+        // Finally, skip over a long jump if it is the target of the patch jump.
+        else if (pbCode[0] == 0xe9) {   // jmp +imm32
+            pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
+            DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
+            pbCode = pbNew;
+        }
+    }
+    return pbCode;
+}
+
+inline void detour_find_jmp_bounds(PBYTE pbCode,
+                                   PDETOUR_TRAMPOLINE *ppLower,
+                                   PDETOUR_TRAMPOLINE *ppUpper)
+{
+    // We have to place trampolines within +/- 2GB of code.
+    ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
+    ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
+    DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
+
+    // And, within +/- 2GB of relative jmp targets.
+    if (pbCode[0] == 0xe9) {   // jmp +imm32
+        PBYTE pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
+
+        if (pbNew < pbCode) {
+            hi = detour_2gb_above((ULONG_PTR)pbNew);
+        }
+        else {
+            lo = detour_2gb_below((ULONG_PTR)pbNew);
+        }
+        DETOUR_TRACE(("[%p..%p..%p] +imm32\n", lo, pbCode, hi));
+    }
+
+    *ppLower = (PDETOUR_TRAMPOLINE)lo;
+    *ppUpper = (PDETOUR_TRAMPOLINE)hi;
+}
+
+inline BOOL detour_does_code_end_function(PBYTE pbCode)
+{
+    if (pbCode[0] == 0xeb ||    // jmp +imm8
+        pbCode[0] == 0xe9 ||    // jmp +imm32
+        pbCode[0] == 0xe0 ||    // jmp eax
+        pbCode[0] == 0xc2 ||    // ret +imm8
+        pbCode[0] == 0xc3 ||    // ret
+        pbCode[0] == 0xcc) {    // brk
+        return TRUE;
+    }
+    else if (pbCode[0] == 0xf3 && pbCode[1] == 0xc3) {  // rep ret
+        return TRUE;
+    }
+    else if (pbCode[0] == 0xff && pbCode[1] == 0x25) {  // jmp [+imm32]
+        return TRUE;
+    }
+    else if ((pbCode[0] == 0x26 ||      // jmp es:
+              pbCode[0] == 0x2e ||      // jmp cs:
+              pbCode[0] == 0x36 ||      // jmp ss:
+              pbCode[0] == 0x3e ||      // jmp ds:
+              pbCode[0] == 0x64 ||      // jmp fs:
+              pbCode[0] == 0x65) &&     // jmp gs:
+             pbCode[1] == 0xff &&       // jmp [+imm32]
+             pbCode[2] == 0x25) {
+        return TRUE;
+    }
+    return FALSE;
+}
+
+inline ULONG detour_is_code_filler(PBYTE pbCode)
+{
+    // 1-byte through 11-byte NOPs.
+    if (pbCode[0] == 0x90) {
+        return 1;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x90) {
+        return 2;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x00) {
+        return 3;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x40 &&
+        pbCode[3] == 0x00) {
+        return 4;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x44 &&
+        pbCode[3] == 0x00 && pbCode[4] == 0x00) {
+        return 5;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
+        pbCode[3] == 0x44 && pbCode[4] == 0x00 && pbCode[5] == 0x00) {
+        return 6;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x80 &&
+        pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00) {
+        return 7;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x84 &&
+        pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00) {
+        return 8;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
+        pbCode[3] == 0x84 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00) {
+        return 9;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x0F &&
+        pbCode[3] == 0x1F && pbCode[4] == 0x84 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
+        pbCode[9] == 0x00) {
+        return 10;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x66 &&
+        pbCode[3] == 0x0F && pbCode[4] == 0x1F && pbCode[5] == 0x84 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
+        pbCode[9] == 0x00 && pbCode[10] == 0x00) {
+        return 11;
+    }
+
+    // int 3.
+    if (pbCode[0] == 0xcc) {
+        return 1;
+    }
+    return 0;
+}
+
+#endif // DETOURS_X86
+
+///////////////////////////////////////////////////////////////////////// X64.
+//
+#ifdef DETOURS_X64
+
+struct _DETOUR_TRAMPOLINE
+{
+    // An X64 instuction can be 15 bytes long.
+    // In practice 11 seems to be the limit.
+    BYTE            rbCode[30];     // target code + jmp to pbRemain.
+    BYTE            cbCode;         // size of moved target code.
+    BYTE            cbCodeBreak;    // padding to make debugging easier.
+    BYTE            rbRestore[30];  // original target code.
+    BYTE            cbRestore;      // size of original target code.
+    BYTE            cbRestoreBreak; // padding to make debugging easier.
+    _DETOUR_ALIGN   rAlign[8];      // instruction alignment array.
+    PBYTE           pbRemain;       // first instruction after moved code. [free list]
+    PBYTE           pbDetour;       // first instruction of detour function.
+    BYTE            rbCodeIn[8];    // jmp [pbDetour]
+};
+
+C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 96);
+
+enum {
+    SIZE_OF_JMP = 5
+};
+
+inline PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE pbJmpVal)
+{
+    PBYTE pbJmpSrc = pbCode + 5;
+    *pbCode++ = 0xE9;   // jmp +imm32
+    *((INT32*&)pbCode)++ = (INT32)(pbJmpVal - pbJmpSrc);
+    return pbCode;
+}
+
+inline PBYTE detour_gen_jmp_indirect(PBYTE pbCode, PBYTE *ppbJmpVal)
+{
+    PBYTE pbJmpSrc = pbCode + 6;
+    *pbCode++ = 0xff;   // jmp [+imm32]
+    *pbCode++ = 0x25;
+    *((INT32*&)pbCode)++ = (INT32)((PBYTE)ppbJmpVal - pbJmpSrc);
+    return pbCode;
+}
+
+inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
+{
+    while (pbCode < pbLimit) {
+        *pbCode++ = 0xcc;   // brk;
+    }
+    return pbCode;
+}
+
+inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
+{
+    if (pbCode == NULL) {
+        return NULL;
+    }
+    if (ppGlobals != NULL) {
+        *ppGlobals = NULL;
+    }
+
+    // First, skip over the import vector if there is one.
+    if (pbCode[0] == 0xff && pbCode[1] == 0x25) {   // jmp [+imm32]
+        // Looks like an import alias jump, then get the code it points to.
+        PBYTE pbTarget = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2];
+        if (detour_is_imported(pbCode, pbTarget)) {
+            PBYTE pbNew = *(UNALIGNED PBYTE *)pbTarget;
+            DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
+            pbCode = pbNew;
+        }
+    }
+
+    // Then, skip over a patch jump
+    if (pbCode[0] == 0xeb) {   // jmp +imm8
+        PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
+        DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
+        pbCode = pbNew;
+
+        // First, skip over the import vector if there is one.
+        if (pbCode[0] == 0xff && pbCode[1] == 0x25) {   // jmp [+imm32]
+            // Looks like an import alias jump, then get the code it points to.
+            PBYTE pbTarget = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2];
+            if (detour_is_imported(pbCode, pbTarget)) {
+                pbNew = *(UNALIGNED PBYTE *)pbTarget;
+                DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
+                pbCode = pbNew;
+            }
+        }
+        // Finally, skip over a long jump if it is the target of the patch jump.
+        else if (pbCode[0] == 0xe9) {   // jmp +imm32
+            pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
+            DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
+            pbCode = pbNew;
+        }
+    }
+    return pbCode;
+}
+
+inline void detour_find_jmp_bounds(PBYTE pbCode,
+                                   PDETOUR_TRAMPOLINE *ppLower,
+                                   PDETOUR_TRAMPOLINE *ppUpper)
+{
+    // We have to place trampolines within +/- 2GB of code.
+    ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
+    ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
+    DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
+
+    // And, within +/- 2GB of relative jmp vectors.
+    if (pbCode[0] == 0xff && pbCode[1] == 0x25) {   // jmp [+imm32]
+        PBYTE pbNew = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2];
+
+        if (pbNew < pbCode) {
+            hi = detour_2gb_above((ULONG_PTR)pbNew);
+        }
+        else {
+            lo = detour_2gb_below((ULONG_PTR)pbNew);
+        }
+        DETOUR_TRACE(("[%p..%p..%p] [+imm32]\n", lo, pbCode, hi));
+    }
+    // And, within +/- 2GB of relative jmp targets.
+    else if (pbCode[0] == 0xe9) {   // jmp +imm32
+        PBYTE pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
+
+        if (pbNew < pbCode) {
+            hi = detour_2gb_above((ULONG_PTR)pbNew);
+        }
+        else {
+            lo = detour_2gb_below((ULONG_PTR)pbNew);
+        }
+        DETOUR_TRACE(("[%p..%p..%p] +imm32\n", lo, pbCode, hi));
+    }
+
+    *ppLower = (PDETOUR_TRAMPOLINE)lo;
+    *ppUpper = (PDETOUR_TRAMPOLINE)hi;
+}
+
+inline BOOL detour_does_code_end_function(PBYTE pbCode)
+{
+    if (pbCode[0] == 0xeb ||    // jmp +imm8
+        pbCode[0] == 0xe9 ||    // jmp +imm32
+        pbCode[0] == 0xe0 ||    // jmp eax
+        pbCode[0] == 0xc2 ||    // ret +imm8
+        pbCode[0] == 0xc3 ||    // ret
+        pbCode[0] == 0xcc) {    // brk
+        return TRUE;
+    }
+    else if (pbCode[0] == 0xf3 && pbCode[1] == 0xc3) {  // rep ret
+        return TRUE;
+    }
+    else if (pbCode[0] == 0xff && pbCode[1] == 0x25) {  // jmp [+imm32]
+        return TRUE;
+    }
+    else if ((pbCode[0] == 0x26 ||      // jmp es:
+              pbCode[0] == 0x2e ||      // jmp cs:
+              pbCode[0] == 0x36 ||      // jmp ss:
+              pbCode[0] == 0x3e ||      // jmp ds:
+              pbCode[0] == 0x64 ||      // jmp fs:
+              pbCode[0] == 0x65) &&     // jmp gs:
+             pbCode[1] == 0xff &&       // jmp [+imm32]
+             pbCode[2] == 0x25) {
+        return TRUE;
+    }
+    return FALSE;
+}
+
+inline ULONG detour_is_code_filler(PBYTE pbCode)
+{
+    // 1-byte through 11-byte NOPs.
+    if (pbCode[0] == 0x90) {
+        return 1;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x90) {
+        return 2;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x00) {
+        return 3;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x40 &&
+        pbCode[3] == 0x00) {
+        return 4;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x44 &&
+        pbCode[3] == 0x00 && pbCode[4] == 0x00) {
+        return 5;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
+        pbCode[3] == 0x44 && pbCode[4] == 0x00 && pbCode[5] == 0x00) {
+        return 6;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x80 &&
+        pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00) {
+        return 7;
+    }
+    if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x84 &&
+        pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00) {
+        return 8;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
+        pbCode[3] == 0x84 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00) {
+        return 9;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x0F &&
+        pbCode[3] == 0x1F && pbCode[4] == 0x84 && pbCode[5] == 0x00 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
+        pbCode[9] == 0x00) {
+        return 10;
+    }
+    if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x66 &&
+        pbCode[3] == 0x0F && pbCode[4] == 0x1F && pbCode[5] == 0x84 &&
+        pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
+        pbCode[9] == 0x00 && pbCode[10] == 0x00) {
+        return 11;
+    }
+
+    // int 3.
+    if (pbCode[0] == 0xcc) {
+        return 1;
+    }
+    return 0;
+}
+
+#endif // DETOURS_X64
+
+//////////////////////////////////////////////////////////////////////// IA64.
+//
+#ifdef DETOURS_IA64
+
+struct _DETOUR_TRAMPOLINE
+{
+    // On the IA64, a trampoline is used for both incoming and outgoing calls.
+    //
+    // The trampoline contains the following bundles for the outgoing call:
+    //      movl gp=target_gp;
+    //      <relocated target bundle>
+    //      brl  target_code;
+    //
+    // The trampoline contains the following bundles for the incoming call:
+    //      alloc  r41=ar.pfs, b, 0, 8, 0
+    //      mov    r40=rp
+    //
+    //      adds   r50=0, r39
+    //      adds   r49=0, r38
+    //      adds   r48=0, r37 ;;
+    //
+    //      adds   r47=0, r36
+    //      adds   r46=0, r35
+    //      adds   r45=0, r34
+    //
+    //      adds   r44=0, r33
+    //      adds   r43=0, r32
+    //      adds   r42=0, gp ;;
+    //
+    //      movl   gp=ffffffff`ffffffff ;;
+    //
+    //      brl.call.sptk.few rp=disas!TestCodes+20e0 (00000000`00404ea0) ;;
+    //
+    //      adds   gp=0, r42
+    //      mov    rp=r40, +0 ;;
+    //      mov.i  ar.pfs=r41
+    //
+    //      br.ret.sptk.many rp ;;
+    //
+    // This way, we only have to relocate a single bundle.
+    //
+    // The complicated incoming trampoline is required because we have to
+    // create an additional stack frame so that we save and restore the gp.
+    // We must do this because gp is a caller-saved register, but not saved
+    // if the caller thinks the target is in the same DLL, which changes
+    // when we insert a detour.
+    //
+    DETOUR_IA64_BUNDLE  bMovlTargetGp;  // Bundle which sets target GP
+    BYTE                rbCode[sizeof(DETOUR_IA64_BUNDLE)]; // moved bundle.
+    DETOUR_IA64_BUNDLE  bBrlRemainEip;  // Brl to pbRemain
+    // This must be adjacent to bBranchIslands.
+
+    // Each instruction in the moved bundle could be a IP-relative chk or branch or call.
+    // Any such instructions are changed to point to a brl in bBranchIslands.
+    // This must be adjacent to bBrlRemainEip -- see "pbPool".
+    DETOUR_IA64_BUNDLE bBranchIslands[DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE];
+
+    // Target of brl inserted in target function
+    DETOUR_IA64_BUNDLE  bAllocFrame;    // alloc frame
+    DETOUR_IA64_BUNDLE  bSave37to39;    // save r37, r38, r39.
+    DETOUR_IA64_BUNDLE  bSave34to36;    // save r34, r35, r36.
+    DETOUR_IA64_BUNDLE  bSaveGPto33;    // save gp, r32, r33.
+    DETOUR_IA64_BUNDLE  bMovlDetourGp;  // set detour GP.
+    DETOUR_IA64_BUNDLE  bCallDetour;    // call detour.
+    DETOUR_IA64_BUNDLE  bPopFrameGp;    // pop frame and restore gp.
+    DETOUR_IA64_BUNDLE  bReturn;        // return to caller.
+
+    PLABEL_DESCRIPTOR   pldTrampoline;
+
+    BYTE                rbRestore[sizeof(DETOUR_IA64_BUNDLE)]; // original target bundle.
+    BYTE                cbRestore;      // size of original target code.
+    BYTE                cbCode;         // size of moved target code.
+    _DETOUR_ALIGN       rAlign[14];     // instruction alignment array.
+    PBYTE               pbRemain;       // first instruction after moved code. [free list]
+    PBYTE               pbDetour;       // first instruction of detour function.
+    PPLABEL_DESCRIPTOR  ppldDetour;     // [pbDetour,gpDetour]
+    PPLABEL_DESCRIPTOR  ppldTarget;     // [pbTarget,gpDetour]
+};
+
+C_ASSERT(sizeof(DETOUR_IA64_BUNDLE) == 16);
+C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 256 + DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE * 16);
+
+enum {
+    SIZE_OF_JMP = sizeof(DETOUR_IA64_BUNDLE)
+};
+
+inline PBYTE detour_skip_jmp(PBYTE pPointer, PVOID *ppGlobals)
+{
+    PBYTE pGlobals = NULL;
+    PBYTE pbCode = NULL;
+
+    if (pPointer != NULL) {
+        PPLABEL_DESCRIPTOR ppld = (PPLABEL_DESCRIPTOR)pPointer;
+        pbCode = (PBYTE)ppld->EntryPoint;
+        pGlobals = (PBYTE)ppld->GlobalPointer;
+    }
+    if (ppGlobals != NULL) {
+        *ppGlobals = pGlobals;
+    }
+    if (pbCode == NULL) {
+        return NULL;
+    }
+
+    DETOUR_IA64_BUNDLE *pb = (DETOUR_IA64_BUNDLE *)pbCode;
+
+    // IA64 Local Import Jumps look like:
+    //      addl   r2=ffffffff`ffe021c0, gp ;;
+    //      ld8    r2=[r2]
+    //      nop.i  0 ;;
+    //
+    //      ld8    r3=[r2], 8 ;;
+    //      ld8    gp=[r2]
+    //      mov    b6=r3, +0
+    //
+    //      nop.m  0
+    //      nop.i  0
+    //      br.cond.sptk.few b6
+    //
+
+    //                     002024000200100b
+    if ((pb[0].wide[0] & 0xfffffc000603ffff) == 0x002024000200100b &&
+        pb[0].wide[1] == 0x0004000000203008 &&
+        pb[1].wide[0] == 0x001014180420180a &&
+        pb[1].wide[1] == 0x07000830c0203008 &&
+        pb[2].wide[0] == 0x0000000100000010 &&
+        pb[2].wide[1] == 0x0080006000000200) {
+
+        ULONG64 offset =
+            ((pb[0].wide[0] & 0x0000000001fc0000) >> 18) |  // imm7b
+            ((pb[0].wide[0] & 0x000001ff00000000) >> 25) |  // imm9d
+            ((pb[0].wide[0] & 0x00000000f8000000) >> 11);   // imm5c
+        if (pb[0].wide[0] & 0x0000020000000000) {           // sign
+            offset |= 0xffffffffffe00000;
+        }
+        PBYTE pbTarget = pGlobals + offset;
+        DETOUR_TRACE(("%p: potential import jump, target=%p\n", pb, pbTarget));
+
+        if (detour_is_imported(pbCode, pbTarget) && *(PBYTE*)pbTarget != NULL) {
+            DETOUR_TRACE(("%p: is import jump, label=%p\n", pb, *(PBYTE *)pbTarget));
+
+            PPLABEL_DESCRIPTOR ppld = (PPLABEL_DESCRIPTOR)*(PBYTE *)pbTarget;
+            pbCode = (PBYTE)ppld->EntryPoint;
+            pGlobals = (PBYTE)ppld->GlobalPointer;
+            if (ppGlobals != NULL) {
+                *ppGlobals = pGlobals;
+            }
+        }
+    }
+    return pbCode;
+}
+
+
+inline void detour_find_jmp_bounds(PBYTE pbCode,
+                                   PDETOUR_TRAMPOLINE *ppLower,
+                                   PDETOUR_TRAMPOLINE *ppUpper)
+{
+    (void)pbCode;
+    *ppLower = (PDETOUR_TRAMPOLINE)(ULONG_PTR)0x0000000000080000;
+    *ppUpper = (PDETOUR_TRAMPOLINE)(ULONG_PTR)0xfffffffffff80000;
+}
+
+inline BOOL detour_does_code_end_function(PBYTE pbCode)
+{
+    // Routine not needed on IA64.
+    (void)pbCode;
+    return FALSE;
+}
+
+inline ULONG detour_is_code_filler(PBYTE pbCode)
+{
+    // Routine not needed on IA64.
+    (void)pbCode;
+    return 0;
+}
+
+#endif // DETOURS_IA64
+
+#ifdef DETOURS_ARM
+
+struct _DETOUR_TRAMPOLINE
+{
+    // A Thumb-2 instruction can be 2 or 4 bytes long.
+    BYTE            rbCode[62];     // target code + jmp to pbRemain
+    BYTE            cbCode;         // size of moved target code.
+    BYTE            cbCodeBreak;    // padding to make debugging easier.
+    BYTE            rbRestore[22];  // original target code.
+    BYTE            cbRestore;      // size of original target code.
+    BYTE            cbRestoreBreak; // padding to make debugging easier.
+    _DETOUR_ALIGN   rAlign[8];      // instruction alignment array.
+    PBYTE           pbRemain;       // first instruction after moved code. [free list]
+    PBYTE           pbDetour;       // first instruction of detour function.
+};
+
+C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 104);
+
+enum {
+    SIZE_OF_JMP = 8
+};
+
+inline PBYTE align4(PBYTE pValue)
+{
+    return (PBYTE)(((ULONG)pValue) & ~(ULONG)3u);
+}
+
+inline ULONG fetch_thumb_opcode(PBYTE pbCode)
+{
+    ULONG Opcode = *(UINT16 *)&pbCode[0];
+    if (Opcode >= 0xe800) {
+        Opcode = (Opcode << 16) | *(UINT16 *)&pbCode[2];
+    }
+    return Opcode;
+}
+
+inline void write_thumb_opcode(PBYTE &pbCode, ULONG Opcode)
+{
+    if (Opcode >= 0x10000) {
+        *((UINT16*&)pbCode)++ = Opcode >> 16;
+    }
+    *((UINT16*&)pbCode)++ = (UINT16)Opcode;
+}
+
+PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE *ppPool, PBYTE pbJmpVal)
+{
+    PBYTE pbLiteral;
+    if (ppPool != NULL) {
+        *ppPool = *ppPool - 4;
+        pbLiteral = *ppPool;
+    }
+    else {
+        pbLiteral = align4(pbCode + 6);
+    }
+
+    *((PBYTE*&)pbLiteral) = DETOURS_PBYTE_TO_PFUNC(pbJmpVal);
+    LONG delta = pbLiteral - align4(pbCode + 4);
+
+    write_thumb_opcode(pbCode, 0xf8dff000 | delta);     // LDR PC,[PC+n]
+
+    if (ppPool == NULL) {
+        if (((ULONG)pbCode & 2) != 0) {
+            write_thumb_opcode(pbCode, 0xdefe);         // BREAK
+        }
+        pbCode += 4;
+    }
+    return pbCode;
+}
+
+inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
+{
+    while (pbCode < pbLimit) {
+        write_thumb_opcode(pbCode, 0xdefe);
+    }
+    return pbCode;
+}
+
+inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
+{
+    if (pbCode == NULL) {
+        return NULL;
+    }
+    if (ppGlobals != NULL) {
+        *ppGlobals = NULL;
+    }
+
+    // Skip over the import jump if there is one.
+    pbCode = (PBYTE)DETOURS_PFUNC_TO_PBYTE(pbCode);
+    ULONG Opcode = fetch_thumb_opcode(pbCode);
+
+    if ((Opcode & 0xfbf08f00) == 0xf2400c00) {          // movw r12,#xxxx
+        ULONG Opcode2 = fetch_thumb_opcode(pbCode+4);
+
+        if ((Opcode2 & 0xfbf08f00) == 0xf2c00c00) {      // movt r12,#xxxx
+            ULONG Opcode3 = fetch_thumb_opcode(pbCode+8);
+            if (Opcode3 == 0xf8dcf000) {                 // ldr  pc,[r12]
+                PBYTE pbTarget = (PBYTE)(((Opcode2 << 12) & 0xf7000000) |
+                                         ((Opcode2 <<  1) & 0x08000000) |
+                                         ((Opcode2 << 16) & 0x00ff0000) |
+                                         ((Opcode  >>  4) & 0x0000f700) |
+                                         ((Opcode  >> 15) & 0x00000800) |
+                                         ((Opcode  >>  0) & 0x000000ff));
+                if (detour_is_imported(pbCode, pbTarget)) {
+                    PBYTE pbNew = *(PBYTE *)pbTarget;
+                    pbNew = DETOURS_PFUNC_TO_PBYTE(pbNew);
+                    DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
+                    return pbNew;
+                }
+            }
+        }
+    }
+    return pbCode;
+}
+
+inline void detour_find_jmp_bounds(PBYTE pbCode,
+                                   PDETOUR_TRAMPOLINE *ppLower,
+                                   PDETOUR_TRAMPOLINE *ppUpper)
+{
+    // We have to place trampolines within +/- 2GB of code.
+    ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
+    ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
+    DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
+
+    *ppLower = (PDETOUR_TRAMPOLINE)lo;
+    *ppUpper = (PDETOUR_TRAMPOLINE)hi;
+}
+
+
+inline BOOL detour_does_code_end_function(PBYTE pbCode)
+{
+    ULONG Opcode = fetch_thumb_opcode(pbCode);
+    if ((Opcode & 0xffffff87) == 0x4700 ||          // bx <reg>
+        (Opcode & 0xf800d000) == 0xf0009000) {      // b <imm20>
+        return TRUE;
+    }
+    if ((Opcode & 0xffff8000) == 0xe8bd8000) {      // pop {...,pc}
+        __debugbreak();
+        return TRUE;
+    }
+    if ((Opcode & 0xffffff00) == 0x0000bd00) {      // pop {...,pc}
+        __debugbreak();
+        return TRUE;
+    }
+    return FALSE;
+}
+
+inline ULONG detour_is_code_filler(PBYTE pbCode)
+{
+    if (pbCode[0] == 0x00 && pbCode[1] == 0xbf) { // nop.
+        return 2;
+    }
+    if (pbCode[0] == 0x00 && pbCode[1] == 0x00) { // zero-filled padding.
+        return 2;
+    }
+    return 0;
+}
+
+#endif // DETOURS_ARM
+
+#ifdef DETOURS_ARM64
+
+struct _DETOUR_TRAMPOLINE
+{
+    // An ARM64 instruction is 4 bytes long.
+    //
+    // The overwrite is always 2 instructions plus a literal, so 16 bytes, 4 instructions.
+    //
+    // Copied instructions can expand.
+    //
+    // The scheme using MovImmediate can cause an instruction
+    // to grow as much as 6 times.
+    // That would be Bcc or Tbz with a large address space:
+    //   4 instructions to form immediate
+    //   inverted tbz/bcc
+    //   br
+    //
+    // An expansion of 4 is not uncommon -- bl/blr and small address space:
+    //   3 instructions to form immediate
+    //   br or brl
+    //
+    // A theoretical maximum for rbCode is thefore 4*4*6 + 16 = 112 (another 16 for jmp to pbRemain).
+    //
+    // With literals, the maximum expansion is 5, including the literals: 4*4*5 + 16 = 96.
+    //
+    // The number is rounded up to 128. m_rbScratchDst should match this.
+    //
+    BYTE            rbCode[128];    // target code + jmp to pbRemain
+    BYTE            cbCode;         // size of moved target code.
+    BYTE            cbCodeBreak[3]; // padding to make debugging easier.
+    BYTE            rbRestore[24];  // original target code.
+    BYTE            cbRestore;      // size of original target code.
+    BYTE            cbRestoreBreak[3]; // padding to make debugging easier.
+    _DETOUR_ALIGN   rAlign[8];      // instruction alignment array.
+    PBYTE           pbRemain;       // first instruction after moved code. [free list]
+    PBYTE           pbDetour;       // first instruction of detour function.
+};
+
+C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 184);
+
+enum {
+    SIZE_OF_JMP = 16
+};
+
+inline ULONG fetch_opcode(PBYTE pbCode)
+{
+    return *(ULONG *)pbCode;
+}
+
+inline void write_opcode(PBYTE &pbCode, ULONG Opcode)
+{
+    *(ULONG *)pbCode = Opcode;
+    pbCode += 4;
+}
+
+PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE *ppPool, PBYTE pbJmpVal)
+{
+    PBYTE pbLiteral;
+    if (ppPool != NULL) {
+        *ppPool = *ppPool - 8;
+        pbLiteral = *ppPool;
+    }
+    else {
+        pbLiteral = pbCode + 8;
+    }
+
+    *((PBYTE*&)pbLiteral) = pbJmpVal;
+    LONG delta = (LONG)(pbLiteral - pbCode);
+
+    write_opcode(pbCode, 0x58000011 | ((delta / 4) << 5));  // LDR X17,[PC+n]
+    write_opcode(pbCode, 0xd61f0000 | (17 << 5));           // BR X17
+
+    if (ppPool == NULL) {
+        pbCode += 8;
+    }
+    return pbCode;
+}
+
+inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
+{
+    while (pbCode < pbLimit) {
+        write_opcode(pbCode, 0xd4100000 | (0xf000 << 5));
+    }
+    return pbCode;
+}
+
+inline INT64 detour_sign_extend(UINT64 value, UINT bits)
+{
+    const UINT left = 64 - bits;
+    const INT64 m1 = -1;
+    const INT64 wide = (INT64)(value << left);
+    const INT64 sign = (wide < 0) ? (m1 << left) : 0;
+    return value | sign;
+}
+
+inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
+{
+    if (pbCode == NULL) {
+        return NULL;
+    }
+    if (ppGlobals != NULL) {
+        *ppGlobals = NULL;
+    }
+
+    // Skip over the import jump if there is one.
+    pbCode = (PBYTE)pbCode;
+    ULONG Opcode = fetch_opcode(pbCode);
+
+    if ((Opcode & 0x9f00001f) == 0x90000010) {           // adrp  x16, IAT
+        ULONG Opcode2 = fetch_opcode(pbCode + 4);
+
+        if ((Opcode2 & 0xffe003ff) == 0xf9400210) {      // ldr   x16, [x16, IAT]
+            ULONG Opcode3 = fetch_opcode(pbCode + 8);
+
+            if (Opcode3 == 0xd61f0200) {                 // br    x16
+
+/* https://static.docs.arm.com/ddi0487/bb/DDI0487B_b_armv8_arm.pdf
+    The ADRP instruction shifts a signed, 21-bit immediate left by 12 bits, adds it to the value of the program counter with
+    the bottom 12 bits cleared to zero, and then writes the result to a general-purpose register. This permits the
+    calculation of the address at a 4KB aligned memory region. In conjunction with an ADD (immediate) instruction, or
+    a Load/Store instruction with a 12-bit immediate offset, this allows for the calculation of, or access to, any address
+    within +/- 4GB of the current PC.
+
+PC-rel. addressing
+    This section describes the encoding of the PC-rel. addressing instruction class. The encodings in this section are
+    decoded from Data Processing -- Immediate on page C4-226.
+    Add/subtract (immediate)
+    This section describes the encoding of the Add/subtract (immediate) instruction class. The encodings in this section
+    are decoded from Data Processing -- Immediate on page C4-226.
+    Decode fields
+    Instruction page
+    op
+    0 ADR
+    1 ADRP
+
+C6.2.10 ADRP
+    Form PC-relative address to 4KB page adds an immediate value that is shifted left by 12 bits, to the PC value to
+    form a PC-relative address, with the bottom 12 bits masked out, and writes the result to the destination register.
+    ADRP <Xd>, <label>
+    imm = SignExtend(immhi:immlo:Zeros(12), 64);
+
+    31  30 29 28 27 26 25 24 23 5    4 0
+    1   immlo  1  0  0  0  0  immhi  Rd
+         9             0
+
+Rd is hardcoded as 0x10 above.
+Immediate is 21 signed bits split into 2 bits and 19 bits, and is scaled by 4K.
+*/
+                UINT64 const pageLow2 = (Opcode >> 29) & 3;
+                UINT64 const pageHigh19 = (Opcode >> 5) & ~(~0ui64 << 19);
+                INT64 const page = detour_sign_extend((pageHigh19 << 2) | pageLow2, 21) << 12;
+
+/* https://static.docs.arm.com/ddi0487/bb/DDI0487B_b_armv8_arm.pdf
+
+    C6.2.101 LDR (immediate)
+    Load Register (immediate) loads a word or doubleword from memory and writes it to a register. The address that is
+    used for the load is calculated from a base register and an immediate offset.
+    The Unsigned offset variant scales the immediate offset value by the size of the value accessed before adding it
+    to the base register value.
+
+Unsigned offset
+64-bit variant Applies when size == 11.
+    31 30 29 28  27 26 25 24  23 22  21   10   9 5   4 0
+     1  x  1  1   1  0  0  1   0  1  imm12      Rn    Rt
+         F             9        4              200    10
+
+That is, two low 5 bit fields are registers, hardcoded as 0x10 and 0x10 << 5 above,
+then unsigned size-unscaled (8) 12-bit offset, then opcode bits 0xF94.
+*/
+                UINT64 const offset = ((Opcode2 >> 10) & ~(~0ui64 << 12)) << 3;
+
+                PBYTE const pbTarget = (PBYTE)((ULONG64)pbCode & 0xfffffffffffff000ULL) + page + offset;
+
+                if (detour_is_imported(pbCode, pbTarget)) {
+                    PBYTE pbNew = *(PBYTE *)pbTarget;
+                    DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
+                    return pbNew;
+                }
+            }
+        }
+    }
+    return pbCode;
+}
+
+inline void detour_find_jmp_bounds(PBYTE pbCode,
+                                   PDETOUR_TRAMPOLINE *ppLower,
+                                   PDETOUR_TRAMPOLINE *ppUpper)
+{
+    // We have to place trampolines within +/- 2GB of code.
+    ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
+    ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
+    DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
+
+    *ppLower = (PDETOUR_TRAMPOLINE)lo;
+    *ppUpper = (PDETOUR_TRAMPOLINE)hi;
+}
+
+inline BOOL detour_does_code_end_function(PBYTE pbCode)
+{
+    ULONG Opcode = fetch_opcode(pbCode);
+    if ((Opcode & 0xfffffc1f) == 0xd65f0000 ||      // br <reg>
+        (Opcode & 0xfc000000) == 0x14000000) {      // b <imm26>
+        return TRUE;
+    }
+    return FALSE;
+}
+
+inline ULONG detour_is_code_filler(PBYTE pbCode)
+{
+    if (*(ULONG *)pbCode == 0xd503201f) {   // nop.
+        return 4;
+    }
+    if (*(ULONG *)pbCode == 0x00000000) {   // zero-filled padding.
+        return 4;
+    }
+    return 0;
+}
+
+#endif // DETOURS_ARM64
+
+//////////////////////////////////////////////// Trampoline Memory Management.
+//
+struct DETOUR_REGION
+{
+    ULONG               dwSignature;
+    DETOUR_REGION *     pNext;  // Next region in list of regions.
+    DETOUR_TRAMPOLINE * pFree;  // List of free trampolines in this region.
+};
+typedef DETOUR_REGION * PDETOUR_REGION;
+
+const ULONG DETOUR_REGION_SIGNATURE = 'Rrtd';
+const ULONG DETOUR_REGION_SIZE = 0x10000;
+const ULONG DETOUR_TRAMPOLINES_PER_REGION = (DETOUR_REGION_SIZE
+                                             / sizeof(DETOUR_TRAMPOLINE)) - 1;
+static PDETOUR_REGION s_pRegions = NULL;            // List of all regions.
+static PDETOUR_REGION s_pRegion = NULL;             // Default region.
+
+static DWORD detour_writable_trampoline_regions()
+{
+    // Mark all of the regions as writable.
+    for (PDETOUR_REGION pRegion = s_pRegions; pRegion != NULL; pRegion = pRegion->pNext) {
+        DWORD dwOld;
+        if (!VirtualProtect(pRegion, DETOUR_REGION_SIZE, PAGE_EXECUTE_READWRITE, &dwOld)) {
+            return GetLastError();
+        }
+    }
+    return NO_ERROR;
+}
+
+static void detour_runnable_trampoline_regions()
+{
+    HANDLE hProcess = GetCurrentProcess();
+
+    // Mark all of the regions as executable.
+    for (PDETOUR_REGION pRegion = s_pRegions; pRegion != NULL; pRegion = pRegion->pNext) {
+        DWORD dwOld;
+        VirtualProtect(pRegion, DETOUR_REGION_SIZE, PAGE_EXECUTE_READ, &dwOld);
+        FlushInstructionCache(hProcess, pRegion, DETOUR_REGION_SIZE);
+    }
+}
+
+static PBYTE detour_alloc_round_down_to_region(PBYTE pbTry)
+{
+    // WinXP64 returns free areas that aren't REGION aligned to 32-bit applications.
+    ULONG_PTR extra = ((ULONG_PTR)pbTry) & (DETOUR_REGION_SIZE - 1);
+    if (extra != 0) {
+        pbTry -= extra;
+    }
+    return pbTry;
+}
+
+static PBYTE detour_alloc_round_up_to_region(PBYTE pbTry)
+{
+    // WinXP64 returns free areas that aren't REGION aligned to 32-bit applications.
+    ULONG_PTR extra = ((ULONG_PTR)pbTry) & (DETOUR_REGION_SIZE - 1);
+    if (extra != 0) {
+        ULONG_PTR adjust = DETOUR_REGION_SIZE - extra;
+        pbTry += adjust;
+    }
+    return pbTry;
+}
+
+// Starting at pbLo, try to allocate a memory region, continue until pbHi.
+
+static PVOID detour_alloc_region_from_lo(PBYTE pbLo, PBYTE pbHi)
+{
+    PBYTE pbTry = detour_alloc_round_up_to_region(pbLo);
+
+    DETOUR_TRACE((" Looking for free region in %p..%p from %p:\n", pbLo, pbHi, pbTry));
+
+    for (; pbTry < pbHi;) {
+        MEMORY_BASIC_INFORMATION mbi;
+
+        if (pbTry >= s_pSystemRegionLowerBound && pbTry <= s_pSystemRegionUpperBound) {
+            // Skip region reserved for system DLLs, but preserve address space entropy.
+            pbTry += 0x08000000;
+            continue;
+        }
+
+        ZeroMemory(&mbi, sizeof(mbi));
+        if (!VirtualQuery(pbTry, &mbi, sizeof(mbi))) {
+            break;
+        }
+
+        DETOUR_TRACE(("  Try %p => %p..%p %6x\n",
+                      pbTry,
+                      mbi.BaseAddress,
+                      (PBYTE)mbi.BaseAddress + mbi.RegionSize - 1,
+                      mbi.State));
+
+        if (mbi.State == MEM_FREE && mbi.RegionSize >= DETOUR_REGION_SIZE) {
+
+            PVOID pv = VirtualAlloc(pbTry,
+                                    DETOUR_REGION_SIZE,
+                                    MEM_COMMIT|MEM_RESERVE,
+                                    PAGE_EXECUTE_READWRITE);
+            if (pv != NULL) {
+                return pv;
+            }
+            pbTry += DETOUR_REGION_SIZE;
+        }
+        else {
+            pbTry = detour_alloc_round_up_to_region((PBYTE)mbi.BaseAddress + mbi.RegionSize);
+        }
+    }
+    return NULL;
+}
+
+// Starting at pbHi, try to allocate a memory region, continue until pbLo.
+
+static PVOID detour_alloc_region_from_hi(PBYTE pbLo, PBYTE pbHi)
+{
+    PBYTE pbTry = detour_alloc_round_down_to_region(pbHi - DETOUR_REGION_SIZE);
+
+    DETOUR_TRACE((" Looking for free region in %p..%p from %p:\n", pbLo, pbHi, pbTry));
+
+    for (; pbTry > pbLo;) {
+        MEMORY_BASIC_INFORMATION mbi;
+
+        DETOUR_TRACE(("  Try %p\n", pbTry));
+        if (pbTry >= s_pSystemRegionLowerBound && pbTry <= s_pSystemRegionUpperBound) {
+            // Skip region reserved for system DLLs, but preserve address space entropy.
+            pbTry -= 0x08000000;
+            continue;
+        }
+
+        ZeroMemory(&mbi, sizeof(mbi));
+        if (!VirtualQuery(pbTry, &mbi, sizeof(mbi))) {
+            break;
+        }
+
+        DETOUR_TRACE(("  Try %p => %p..%p %6x\n",
+                      pbTry,
+                      mbi.BaseAddress,
+                      (PBYTE)mbi.BaseAddress + mbi.RegionSize - 1,
+                      mbi.State));
+
+        if (mbi.State == MEM_FREE && mbi.RegionSize >= DETOUR_REGION_SIZE) {
+
+            PVOID pv = VirtualAlloc(pbTry,
+                                    DETOUR_REGION_SIZE,
+                                    MEM_COMMIT|MEM_RESERVE,
+                                    PAGE_EXECUTE_READWRITE);
+            if (pv != NULL) {
+                return pv;
+            }
+            pbTry -= DETOUR_REGION_SIZE;
+        }
+        else {
+            pbTry = detour_alloc_round_down_to_region((PBYTE)mbi.AllocationBase
+                                                      - DETOUR_REGION_SIZE);
+        }
+    }
+    return NULL;
+}
+
+static PVOID detour_alloc_trampoline_allocate_new(PBYTE pbTarget,
+                                                  PDETOUR_TRAMPOLINE pLo,
+                                                  PDETOUR_TRAMPOLINE pHi)
+{
+    PVOID pbTry = NULL;
+
+    // NB: We must always also start the search at an offset from pbTarget
+    //     in order to maintain ASLR entropy.
+
+#if defined(DETOURS_64BIT)
+    // Try looking 1GB below or lower.
+    if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) {
+        pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget - 0x40000000);
+    }
+    // Try looking 1GB above or higher.
+    if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) {
+        pbTry = detour_alloc_region_from_lo(pbTarget + 0x40000000, (PBYTE)pHi);
+    }
+    // Try looking 1GB below or higher.
+    if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) {
+        pbTry = detour_alloc_region_from_lo(pbTarget - 0x40000000, pbTarget);
+    }
+    // Try looking 1GB above or lower.
+    if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) {
+        pbTry = detour_alloc_region_from_hi(pbTarget, pbTarget + 0x40000000);
+    }
+#endif
+
+    // Try anything below.
+    if (pbTry == NULL) {
+        pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget);
+    }
+    // try anything above.
+    if (pbTry == NULL) {
+        pbTry = detour_alloc_region_from_lo(pbTarget, (PBYTE)pHi);
+    }
+
+    return pbTry;
+}
+
+PVOID WINAPI DetourAllocateRegionWithinJumpBounds(_In_ LPCVOID pbTarget,
+                                                  _Out_ PDWORD pcbAllocatedSize)
+{
+    PDETOUR_TRAMPOLINE pLo;
+    PDETOUR_TRAMPOLINE pHi;
+    detour_find_jmp_bounds((PBYTE)pbTarget, &pLo, &pHi);
+
+    PVOID pbNewlyAllocated =
+        detour_alloc_trampoline_allocate_new((PBYTE)pbTarget, pLo, pHi);
+    if (pbNewlyAllocated == NULL) {
+        DETOUR_TRACE(("Couldn't find available memory region!\n"));
+        *pcbAllocatedSize = 0;
+        return NULL;
+    }
+
+    *pcbAllocatedSize = DETOUR_REGION_SIZE;
+    return pbNewlyAllocated;
+}
+
+static PDETOUR_TRAMPOLINE detour_alloc_trampoline(PBYTE pbTarget)
+{
+    // We have to place trampolines within +/- 2GB of target.
+
+    PDETOUR_TRAMPOLINE pLo;
+    PDETOUR_TRAMPOLINE pHi;
+
+    detour_find_jmp_bounds(pbTarget, &pLo, &pHi);
+
+    PDETOUR_TRAMPOLINE pTrampoline = NULL;
+
+    // Insure that there is a default region.
+    if (s_pRegion == NULL && s_pRegions != NULL) {
+        s_pRegion = s_pRegions;
+    }
+
+    // First check the default region for an valid free block.
+    if (s_pRegion != NULL && s_pRegion->pFree != NULL &&
+        s_pRegion->pFree >= pLo && s_pRegion->pFree <= pHi) {
+
+      found_region:
+        pTrampoline = s_pRegion->pFree;
+        // do a last sanity check on region.
+        if (pTrampoline < pLo || pTrampoline > pHi) {
+            return NULL;
+        }
+        s_pRegion->pFree = (PDETOUR_TRAMPOLINE)pTrampoline->pbRemain;
+        memset(pTrampoline, 0xcc, sizeof(*pTrampoline));
+        return pTrampoline;
+    }
+
+    // Then check the existing regions for a valid free block.
+    for (s_pRegion = s_pRegions; s_pRegion != NULL; s_pRegion = s_pRegion->pNext) {
+        if (s_pRegion != NULL && s_pRegion->pFree != NULL &&
+            s_pRegion->pFree >= pLo && s_pRegion->pFree <= pHi) {
+            goto found_region;
+        }
+    }
+
+    // We need to allocate a new region.
+
+    // Round pbTarget down to 64KB block.
+    pbTarget = pbTarget - (PtrToUlong(pbTarget) & 0xffff);
+
+    PVOID pbNewlyAllocated =
+        detour_alloc_trampoline_allocate_new(pbTarget, pLo, pHi);
+    if (pbNewlyAllocated != NULL) {
+        s_pRegion = (DETOUR_REGION*)pbNewlyAllocated;
+        s_pRegion->dwSignature = DETOUR_REGION_SIGNATURE;
+        s_pRegion->pFree = NULL;
+        s_pRegion->pNext = s_pRegions;
+        s_pRegions = s_pRegion;
+        DETOUR_TRACE(("  Allocated region %p..%p\n\n",
+                      s_pRegion, ((PBYTE)s_pRegion) + DETOUR_REGION_SIZE - 1));
+
+        // Put everything but the first trampoline on the free list.
+        PBYTE pFree = NULL;
+        pTrampoline = ((PDETOUR_TRAMPOLINE)s_pRegion) + 1;
+        for (int i = DETOUR_TRAMPOLINES_PER_REGION - 1; i > 1; i--) {
+            pTrampoline[i].pbRemain = pFree;
+            pFree = (PBYTE)&pTrampoline[i];
+        }
+        s_pRegion->pFree = (PDETOUR_TRAMPOLINE)pFree;
+        goto found_region;
+    }
+
+    DETOUR_TRACE(("Couldn't find available memory region!\n"));
+    return NULL;
+}
+
+static void detour_free_trampoline(PDETOUR_TRAMPOLINE pTrampoline)
+{
+    PDETOUR_REGION pRegion = (PDETOUR_REGION)
+        ((ULONG_PTR)pTrampoline & ~(ULONG_PTR)0xffff);
+
+    memset(pTrampoline, 0, sizeof(*pTrampoline));
+    pTrampoline->pbRemain = (PBYTE)pRegion->pFree;
+    pRegion->pFree = pTrampoline;
+}
+
+static BOOL detour_is_region_empty(PDETOUR_REGION pRegion)
+{
+    // Stop if the region isn't a region (this would be bad).
+    if (pRegion->dwSignature != DETOUR_REGION_SIGNATURE) {
+        return FALSE;
+    }
+
+    PBYTE pbRegionBeg = (PBYTE)pRegion;
+    PBYTE pbRegionLim  = pbRegionBeg + DETOUR_REGION_SIZE;
+
+    // Stop if any of the trampolines aren't free.
+    PDETOUR_TRAMPOLINE pTrampoline = ((PDETOUR_TRAMPOLINE)pRegion) + 1;
+    for (int i = 0; i < DETOUR_TRAMPOLINES_PER_REGION; i++) {
+        if (pTrampoline[i].pbRemain != NULL &&
+            (pTrampoline[i].pbRemain < pbRegionBeg ||
+             pTrampoline[i].pbRemain >= pbRegionLim)) {
+            return FALSE;
+        }
+    }
+
+    // OK, the region is empty.
+    return TRUE;
+}
+
+static void detour_free_unused_trampoline_regions()
+{
+    PDETOUR_REGION *ppRegionBase = &s_pRegions;
+    PDETOUR_REGION pRegion = s_pRegions;
+
+    while (pRegion != NULL) {
+        if (detour_is_region_empty(pRegion)) {
+            *ppRegionBase = pRegion->pNext;
+
+            VirtualFree(pRegion, 0, MEM_RELEASE);
+            s_pRegion = NULL;
+        }
+        else {
+            ppRegionBase = &pRegion->pNext;
+        }
+        pRegion = *ppRegionBase;
+    }
+}
+
+///////////////////////////////////////////////////////// Transaction Structs.
+//
+struct DetourThread
+{
+    DetourThread *      pNext;
+    HANDLE              hThread;
+};
+
+struct DetourOperation
+{
+    DetourOperation *   pNext;
+    BOOL                fIsRemove;
+    PBYTE *             ppbPointer;
+    PBYTE               pbTarget;
+    PDETOUR_TRAMPOLINE  pTrampoline;
+    ULONG               dwPerm;
+};
+
+static BOOL                 s_fIgnoreTooSmall       = FALSE;
+static BOOL                 s_fRetainRegions        = FALSE;
+
+static LONG                 s_nPendingThreadId      = 0; // Thread owning pending transaction.
+static LONG                 s_nPendingError         = NO_ERROR;
+static PVOID *              s_ppPendingError        = NULL;
+static DetourThread *       s_pPendingThreads       = NULL;
+static DetourOperation *    s_pPendingOperations    = NULL;
+
+//////////////////////////////////////////////////////////////////////////////
+//
+PVOID WINAPI DetourCodeFromPointer(_In_ PVOID pPointer,
+                                   _Out_opt_ PVOID *ppGlobals)
+{
+    return detour_skip_jmp((PBYTE)pPointer, ppGlobals);
+}
+
+//////////////////////////////////////////////////////////// Transaction APIs.
+//
+BOOL WINAPI DetourSetIgnoreTooSmall(_In_ BOOL fIgnore)
+{
+    BOOL fPrevious = s_fIgnoreTooSmall;
+    s_fIgnoreTooSmall = fIgnore;
+    return fPrevious;
+}
+
+BOOL WINAPI DetourSetRetainRegions(_In_ BOOL fRetain)
+{
+    BOOL fPrevious = s_fRetainRegions;
+    s_fRetainRegions = fRetain;
+    return fPrevious;
+}
+
+PVOID WINAPI DetourSetSystemRegionLowerBound(_In_ PVOID pSystemRegionLowerBound)
+{
+    PVOID pPrevious = s_pSystemRegionLowerBound;
+    s_pSystemRegionLowerBound = pSystemRegionLowerBound;
+    return pPrevious;
+}
+
+PVOID WINAPI DetourSetSystemRegionUpperBound(_In_ PVOID pSystemRegionUpperBound)
+{
+    PVOID pPrevious = s_pSystemRegionUpperBound;
+    s_pSystemRegionUpperBound = pSystemRegionUpperBound;
+    return pPrevious;
+}
+
+LONG WINAPI DetourTransactionBegin()
+{
+    // Only one transaction is allowed at a time.
+_Benign_race_begin_
+    if (s_nPendingThreadId != 0) {
+        return ERROR_INVALID_OPERATION;
+    }
+_Benign_race_end_
+
+    // Make sure only one thread can start a transaction.
+    if (InterlockedCompareExchange(&s_nPendingThreadId, (LONG)GetCurrentThreadId(), 0) != 0) {
+        return ERROR_INVALID_OPERATION;
+    }
+
+    s_pPendingOperations = NULL;
+    s_pPendingThreads = NULL;
+    s_ppPendingError = NULL;
+
+    // Make sure the trampoline pages are writable.
+    s_nPendingError = detour_writable_trampoline_regions();
+
+    return s_nPendingError;
+}
+
+LONG WINAPI DetourTransactionAbort()
+{
+    if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
+        return ERROR_INVALID_OPERATION;
+    }
+
+    // Restore all of the page permissions.
+    for (DetourOperation *o = s_pPendingOperations; o != NULL;) {
+        // We don't care if this fails, because the code is still accessible.
+        DWORD dwOld;
+        VirtualProtect(o->pbTarget, o->pTrampoline->cbRestore,
+                       o->dwPerm, &dwOld);
+
+        if (!o->fIsRemove) {
+            if (o->pTrampoline) {
+                detour_free_trampoline(o->pTrampoline);
+                o->pTrampoline = NULL;
+            }
+        }
+
+        DetourOperation *n = o->pNext;
+        delete o;
+        o = n;
+    }
+    s_pPendingOperations = NULL;
+
+    // Make sure the trampoline pages are no longer writable.
+    detour_runnable_trampoline_regions();
+
+    // Resume any suspended threads.
+    for (DetourThread *t = s_pPendingThreads; t != NULL;) {
+        // There is nothing we can do if this fails.
+        ResumeThread(t->hThread);
+
+        DetourThread *n = t->pNext;
+        delete t;
+        t = n;
+    }
+    s_pPendingThreads = NULL;
+    s_nPendingThreadId = 0;
+
+    return NO_ERROR;
+}
+
+LONG WINAPI DetourTransactionCommit()
+{
+    return DetourTransactionCommitEx(NULL);
+}
+
+static BYTE detour_align_from_trampoline(PDETOUR_TRAMPOLINE pTrampoline, BYTE obTrampoline)
+{
+    for (LONG n = 0; n < ARRAYSIZE(pTrampoline->rAlign); n++) {
+        if (pTrampoline->rAlign[n].obTrampoline == obTrampoline) {
+            return pTrampoline->rAlign[n].obTarget;
+        }
+    }
+    return 0;
+}
+
+static LONG detour_align_from_target(PDETOUR_TRAMPOLINE pTrampoline, LONG obTarget)
+{
+    for (LONG n = 0; n < ARRAYSIZE(pTrampoline->rAlign); n++) {
+        if (pTrampoline->rAlign[n].obTarget == obTarget) {
+            return pTrampoline->rAlign[n].obTrampoline;
+        }
+    }
+    return 0;
+}
+
+LONG WINAPI DetourTransactionCommitEx(_Out_opt_ PVOID **pppFailedPointer)
+{
+    if (pppFailedPointer != NULL) {
+        // Used to get the last error.
+        *pppFailedPointer = s_ppPendingError;
+    }
+    if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
+        return ERROR_INVALID_OPERATION;
+    }
+
+    // If any of the pending operations failed, then we abort the whole transaction.
+    if (s_nPendingError != NO_ERROR) {
+        DETOUR_BREAK();
+        DetourTransactionAbort();
+        return s_nPendingError;
+    }
+
+    // Common variables.
+    DetourOperation *o;
+    DetourThread *t;
+    BOOL freed = FALSE;
+
+    // Insert or remove each of the detours.
+    for (o = s_pPendingOperations; o != NULL; o = o->pNext) {
+        if (o->fIsRemove) {
+            CopyMemory(o->pbTarget,
+                       o->pTrampoline->rbRestore,
+                       o->pTrampoline->cbRestore);
+#ifdef DETOURS_IA64
+            *o->ppbPointer = (PBYTE)o->pTrampoline->ppldTarget;
+#endif // DETOURS_IA64
+
+#ifdef DETOURS_X86
+            *o->ppbPointer = o->pbTarget;
+#endif // DETOURS_X86
+
+#ifdef DETOURS_X64
+            *o->ppbPointer = o->pbTarget;
+#endif // DETOURS_X64
+
+#ifdef DETOURS_ARM
+            *o->ppbPointer = DETOURS_PBYTE_TO_PFUNC(o->pbTarget);
+#endif // DETOURS_ARM
+
+#ifdef DETOURS_ARM64
+            *o->ppbPointer = o->pbTarget;
+#endif // DETOURS_ARM
+        }
+        else {
+            DETOUR_TRACE(("detours: pbTramp =%p, pbRemain=%p, pbDetour=%p, cbRestore=%d\n",
+                          o->pTrampoline,
+                          o->pTrampoline->pbRemain,
+                          o->pTrampoline->pbDetour,
+                          o->pTrampoline->cbRestore));
+
+            DETOUR_TRACE(("detours: pbTarget=%p: "
+                          "%02x %02x %02x %02x "
+                          "%02x %02x %02x %02x "
+                          "%02x %02x %02x %02x [before]\n",
+                          o->pbTarget,
+                          o->pbTarget[0], o->pbTarget[1], o->pbTarget[2], o->pbTarget[3],
+                          o->pbTarget[4], o->pbTarget[5], o->pbTarget[6], o->pbTarget[7],
+                          o->pbTarget[8], o->pbTarget[9], o->pbTarget[10], o->pbTarget[11]));
+
+#ifdef DETOURS_IA64
+            ((DETOUR_IA64_BUNDLE*)o->pbTarget)
+                ->SetBrl((UINT64)&o->pTrampoline->bAllocFrame);
+            *o->ppbPointer = (PBYTE)&o->pTrampoline->pldTrampoline;
+#endif // DETOURS_IA64
+
+#ifdef DETOURS_X64
+            detour_gen_jmp_indirect(o->pTrampoline->rbCodeIn, &o->pTrampoline->pbDetour);
+            PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, o->pTrampoline->rbCodeIn);
+            pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
+            *o->ppbPointer = o->pTrampoline->rbCode;
+            UNREFERENCED_PARAMETER(pbCode);
+#endif // DETOURS_X64
+
+#ifdef DETOURS_X86
+            PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, o->pTrampoline->pbDetour);
+            pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
+            *o->ppbPointer = o->pTrampoline->rbCode;
+            UNREFERENCED_PARAMETER(pbCode);
+#endif // DETOURS_X86
+
+#ifdef DETOURS_ARM
+            PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, NULL, o->pTrampoline->pbDetour);
+            pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
+            *o->ppbPointer = DETOURS_PBYTE_TO_PFUNC(o->pTrampoline->rbCode);
+            UNREFERENCED_PARAMETER(pbCode);
+#endif // DETOURS_ARM
+
+#ifdef DETOURS_ARM64
+            PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, NULL, o->pTrampoline->pbDetour);
+            pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
+            *o->ppbPointer = o->pTrampoline->rbCode;
+            UNREFERENCED_PARAMETER(pbCode);
+#endif // DETOURS_ARM64
+
+            DETOUR_TRACE(("detours: pbTarget=%p: "
+                          "%02x %02x %02x %02x "
+                          "%02x %02x %02x %02x "
+                          "%02x %02x %02x %02x [after]\n",
+                          o->pbTarget,
+                          o->pbTarget[0], o->pbTarget[1], o->pbTarget[2], o->pbTarget[3],
+                          o->pbTarget[4], o->pbTarget[5], o->pbTarget[6], o->pbTarget[7],
+                          o->pbTarget[8], o->pbTarget[9], o->pbTarget[10], o->pbTarget[11]));
+
+            DETOUR_TRACE(("detours: pbTramp =%p: "
+                          "%02x %02x %02x %02x "
+                          "%02x %02x %02x %02x "
+                          "%02x %02x %02x %02x\n",
+                          o->pTrampoline,
+                          o->pTrampoline->rbCode[0], o->pTrampoline->rbCode[1],
+                          o->pTrampoline->rbCode[2], o->pTrampoline->rbCode[3],
+                          o->pTrampoline->rbCode[4], o->pTrampoline->rbCode[5],
+                          o->pTrampoline->rbCode[6], o->pTrampoline->rbCode[7],
+                          o->pTrampoline->rbCode[8], o->pTrampoline->rbCode[9],
+                          o->pTrampoline->rbCode[10], o->pTrampoline->rbCode[11]));
+
+#ifdef DETOURS_IA64
+            DETOUR_TRACE(("\n"));
+            DETOUR_TRACE(("detours:  &pldTrampoline  =%p\n",
+                          &o->pTrampoline->pldTrampoline));
+            DETOUR_TRACE(("detours:  &bMovlTargetGp  =%p [%p]\n",
+                          &o->pTrampoline->bMovlTargetGp,
+                          o->pTrampoline->bMovlTargetGp.GetMovlGp()));
+            DETOUR_TRACE(("detours:  &rbCode         =%p [%p]\n",
+                          &o->pTrampoline->rbCode,
+                          ((DETOUR_IA64_BUNDLE&)o->pTrampoline->rbCode).GetBrlTarget()));
+            DETOUR_TRACE(("detours:  &bBrlRemainEip  =%p [%p]\n",
+                          &o->pTrampoline->bBrlRemainEip,
+                          o->pTrampoline->bBrlRemainEip.GetBrlTarget()));
+            DETOUR_TRACE(("detours:  &bMovlDetourGp  =%p [%p]\n",
+                          &o->pTrampoline->bMovlDetourGp,
+                          o->pTrampoline->bMovlDetourGp.GetMovlGp()));
+            DETOUR_TRACE(("detours:  &bBrlDetourEip  =%p [%p]\n",
+                          &o->pTrampoline->bCallDetour,
+                          o->pTrampoline->bCallDetour.GetBrlTarget()));
+            DETOUR_TRACE(("detours:  pldDetour       =%p [%p]\n",
+                          o->pTrampoline->ppldDetour->EntryPoint,
+                          o->pTrampoline->ppldDetour->GlobalPointer));
+            DETOUR_TRACE(("detours:  pldTarget       =%p [%p]\n",
+                          o->pTrampoline->ppldTarget->EntryPoint,
+                          o->pTrampoline->ppldTarget->GlobalPointer));
+            DETOUR_TRACE(("detours:  pbRemain        =%p\n",
+                          o->pTrampoline->pbRemain));
+            DETOUR_TRACE(("detours:  pbDetour        =%p\n",
+                          o->pTrampoline->pbDetour));
+            DETOUR_TRACE(("\n"));
+#endif // DETOURS_IA64
+        }
+    }
+
+    // Update any suspended threads.
+    for (t = s_pPendingThreads; t != NULL; t = t->pNext) {
+        CONTEXT cxt;
+        cxt.ContextFlags = CONTEXT_CONTROL;
+
+#undef DETOURS_EIP
+
+#ifdef DETOURS_X86
+#define DETOURS_EIP         Eip
+#endif // DETOURS_X86
+
+#ifdef DETOURS_X64
+#define DETOURS_EIP         Rip
+#endif // DETOURS_X64
+
+#ifdef DETOURS_IA64
+#define DETOURS_EIP         StIIP
+#endif // DETOURS_IA64
+
+#ifdef DETOURS_ARM
+#define DETOURS_EIP         Pc
+#endif // DETOURS_ARM
+
+#ifdef DETOURS_ARM64
+#define DETOURS_EIP         Pc
+#endif // DETOURS_ARM64
+
+typedef ULONG_PTR DETOURS_EIP_TYPE;
+
+        if (GetThreadContext(t->hThread, &cxt)) {
+            for (o = s_pPendingOperations; o != NULL; o = o->pNext) {
+                if (o->fIsRemove) {
+                    if (cxt.DETOURS_EIP >= (DETOURS_EIP_TYPE)(ULONG_PTR)o->pTrampoline &&
+                        cxt.DETOURS_EIP < (DETOURS_EIP_TYPE)((ULONG_PTR)o->pTrampoline
+                                                             + sizeof(o->pTrampoline))
+                       ) {
+
+                        cxt.DETOURS_EIP = (DETOURS_EIP_TYPE)
+                            ((ULONG_PTR)o->pbTarget
+                             + detour_align_from_trampoline(o->pTrampoline,
+                                                            (BYTE)(cxt.DETOURS_EIP
+                                                                   - (DETOURS_EIP_TYPE)(ULONG_PTR)
+                                                                   o->pTrampoline)));
+
+                        SetThreadContext(t->hThread, &cxt);
+                    }
+                }
+                else {
+                    if (cxt.DETOURS_EIP >= (DETOURS_EIP_TYPE)(ULONG_PTR)o->pbTarget &&
+                        cxt.DETOURS_EIP < (DETOURS_EIP_TYPE)((ULONG_PTR)o->pbTarget
+                                                             + o->pTrampoline->cbRestore)
+                       ) {
+
+                        cxt.DETOURS_EIP = (DETOURS_EIP_TYPE)
+                            ((ULONG_PTR)o->pTrampoline
+                             + detour_align_from_target(o->pTrampoline,
+                                                        (BYTE)(cxt.DETOURS_EIP
+                                                               - (DETOURS_EIP_TYPE)(ULONG_PTR)
+                                                               o->pbTarget)));
+
+                        SetThreadContext(t->hThread, &cxt);
+                    }
+                }
+            }
+        }
+#undef DETOURS_EIP
+    }
+
+    // Restore all of the page permissions and flush the icache.
+    HANDLE hProcess = GetCurrentProcess();
+    for (o = s_pPendingOperations; o != NULL;) {
+        // We don't care if this fails, because the code is still accessible.
+        DWORD dwOld;
+        VirtualProtect(o->pbTarget, o->pTrampoline->cbRestore, o->dwPerm, &dwOld);
+        FlushInstructionCache(hProcess, o->pbTarget, o->pTrampoline->cbRestore);
+
+        if (o->fIsRemove && o->pTrampoline) {
+            detour_free_trampoline(o->pTrampoline);
+            o->pTrampoline = NULL;
+            freed = true;
+        }
+
+        DetourOperation *n = o->pNext;
+        delete o;
+        o = n;
+    }
+    s_pPendingOperations = NULL;
+
+    // Free any trampoline regions that are now unused.
+    if (freed && !s_fRetainRegions) {
+        detour_free_unused_trampoline_regions();
+    }
+
+    // Make sure the trampoline pages are no longer writable.
+    detour_runnable_trampoline_regions();
+
+    // Resume any suspended threads.
+    for (t = s_pPendingThreads; t != NULL;) {
+        // There is nothing we can do if this fails.
+        ResumeThread(t->hThread);
+
+        DetourThread *n = t->pNext;
+        delete t;
+        t = n;
+    }
+    s_pPendingThreads = NULL;
+    s_nPendingThreadId = 0;
+
+    if (pppFailedPointer != NULL) {
+        *pppFailedPointer = s_ppPendingError;
+    }
+
+    return s_nPendingError;
+}
+
+LONG WINAPI DetourUpdateThread(_In_ HANDLE hThread)
+{
+    LONG error;
+
+    // If any of the pending operations failed, then we don't need to do this.
+    if (s_nPendingError != NO_ERROR) {
+        return s_nPendingError;
+    }
+
+    // Silently (and safely) drop any attempt to suspend our own thread.
+    if (hThread == GetCurrentThread()) {
+        return NO_ERROR;
+    }
+
+    DetourThread *t = new NOTHROW DetourThread;
+    if (t == NULL) {
+        error = ERROR_NOT_ENOUGH_MEMORY;
+      fail:
+        if (t != NULL) {
+            delete t;
+            t = NULL;
+        }
+        s_nPendingError = error;
+        s_ppPendingError = NULL;
+        DETOUR_BREAK();
+        return error;
+    }
+
+    if (SuspendThread(hThread) == (DWORD)-1) {
+        error = GetLastError();
+        DETOUR_BREAK();
+        goto fail;
+    }
+
+    t->hThread = hThread;
+    t->pNext = s_pPendingThreads;
+    s_pPendingThreads = t;
+
+    return NO_ERROR;
+}
+
+///////////////////////////////////////////////////////////// Transacted APIs.
+//
+LONG WINAPI DetourAttach(_Inout_ PVOID *ppPointer,
+                         _In_ PVOID pDetour)
+{
+    return DetourAttachEx(ppPointer, pDetour, NULL, NULL, NULL);
+}
+
+LONG WINAPI DetourAttachEx(_Inout_ PVOID *ppPointer,
+                           _In_ PVOID pDetour,
+                           _Out_opt_ PDETOUR_TRAMPOLINE *ppRealTrampoline,
+                           _Out_opt_ PVOID *ppRealTarget,
+                           _Out_opt_ PVOID *ppRealDetour)
+{
+    LONG error = NO_ERROR;
+
+    if (ppRealTrampoline != NULL) {
+        *ppRealTrampoline = NULL;
+    }
+    if (ppRealTarget != NULL) {
+        *ppRealTarget = NULL;
+    }
+    if (ppRealDetour != NULL) {
+        *ppRealDetour = NULL;
+    }
+    if (pDetour == NULL) {
+        DETOUR_TRACE(("empty detour\n"));
+        return ERROR_INVALID_PARAMETER;
+    }
+
+    if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
+        DETOUR_TRACE(("transaction conflict with thread id=%d\n", s_nPendingThreadId));
+        return ERROR_INVALID_OPERATION;
+    }
+
+    // If any of the pending operations failed, then we don't need to do this.
+    if (s_nPendingError != NO_ERROR) {
+        DETOUR_TRACE(("pending transaction error=%d\n", s_nPendingError));
+        return s_nPendingError;
+    }
+
+    if (ppPointer == NULL) {
+        DETOUR_TRACE(("ppPointer is null\n"));
+        return ERROR_INVALID_HANDLE;
+    }
+    if (*ppPointer == NULL) {
+        error = ERROR_INVALID_HANDLE;
+        s_nPendingError = error;
+        s_ppPendingError = ppPointer;
+        DETOUR_TRACE(("*ppPointer is null (ppPointer=%p)\n", ppPointer));
+        DETOUR_BREAK();
+        return error;
+    }
+
+    PBYTE pbTarget = (PBYTE)*ppPointer;
+    PDETOUR_TRAMPOLINE pTrampoline = NULL;
+    DetourOperation *o = NULL;
+
+#ifdef DETOURS_IA64
+    PPLABEL_DESCRIPTOR ppldDetour = (PPLABEL_DESCRIPTOR)pDetour;
+    PPLABEL_DESCRIPTOR ppldTarget = (PPLABEL_DESCRIPTOR)pbTarget;
+    PVOID pDetourGlobals = NULL;
+    PVOID pTargetGlobals = NULL;
+
+    pDetour = (PBYTE)DetourCodeFromPointer(ppldDetour, &pDetourGlobals);
+    pbTarget = (PBYTE)DetourCodeFromPointer(ppldTarget, &pTargetGlobals);
+    DETOUR_TRACE(("  ppldDetour=%p, code=%p [gp=%p]\n",
+                  ppldDetour, pDetour, pDetourGlobals));
+    DETOUR_TRACE(("  ppldTarget=%p, code=%p [gp=%p]\n",
+                  ppldTarget, pbTarget, pTargetGlobals));
+#else // DETOURS_IA64
+    pbTarget = (PBYTE)DetourCodeFromPointer(pbTarget, NULL);
+    pDetour = DetourCodeFromPointer(pDetour, NULL);
+#endif // !DETOURS_IA64
+
+    // Don't follow a jump if its destination is the target function.
+    // This happens when the detour does nothing other than call the target.
+    if (pDetour == (PVOID)pbTarget) {
+        if (s_fIgnoreTooSmall) {
+            goto stop;
+        }
+        else {
+            DETOUR_BREAK();
+            goto fail;
+        }
+    }
+
+    if (ppRealTarget != NULL) {
+        *ppRealTarget = pbTarget;
+    }
+    if (ppRealDetour != NULL) {
+        *ppRealDetour = pDetour;
+    }
+
+    o = new NOTHROW DetourOperation;
+    if (o == NULL) {
+        error = ERROR_NOT_ENOUGH_MEMORY;
+      fail:
+        s_nPendingError = error;
+        DETOUR_BREAK();
+      stop:
+        if (pTrampoline != NULL) {
+            detour_free_trampoline(pTrampoline);
+            pTrampoline = NULL;
+            if (ppRealTrampoline != NULL) {
+                *ppRealTrampoline = NULL;
+            }
+        }
+        if (o != NULL) {
+            delete o;
+            o = NULL;
+        }
+        s_ppPendingError = ppPointer;
+        return error;
+    }
+
+    pTrampoline = detour_alloc_trampoline(pbTarget);
+    if (pTrampoline == NULL) {
+        error = ERROR_NOT_ENOUGH_MEMORY;
+        DETOUR_BREAK();
+        goto fail;
+    }
+
+    if (ppRealTrampoline != NULL) {
+        *ppRealTrampoline = pTrampoline;
+    }
+
+    DETOUR_TRACE(("detours: pbTramp=%p, pDetour=%p\n", pTrampoline, pDetour));
+
+    memset(pTrampoline->rAlign, 0, sizeof(pTrampoline->rAlign));
+
+    // Determine the number of movable target instructions.
+    PBYTE pbSrc = pbTarget;
+    PBYTE pbTrampoline = pTrampoline->rbCode;
+#ifdef DETOURS_IA64
+    PBYTE pbPool = (PBYTE)(&pTrampoline->bBranchIslands + 1);
+#else
+    PBYTE pbPool = pbTrampoline + sizeof(pTrampoline->rbCode);
+#endif
+    ULONG cbTarget = 0;
+    ULONG cbJump = SIZE_OF_JMP;
+    ULONG nAlign = 0;
+
+#ifdef DETOURS_ARM
+    // On ARM, we need an extra instruction when the function isn't 32-bit aligned.
+    // Check if the existing code is another detour (or at least a similar
+    // "ldr pc, [PC+0]" jump.
+    if ((ULONG)pbTarget & 2) {
+        cbJump += 2;
+
+        ULONG op = fetch_thumb_opcode(pbSrc);
+        if (op == 0xbf00) {
+            op = fetch_thumb_opcode(pbSrc + 2);
+            if (op == 0xf8dff000) { // LDR PC,[PC]
+                *((PUSHORT&)pbTrampoline)++ = *((PUSHORT&)pbSrc)++;
+                *((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
+                *((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
+                cbTarget = (LONG)(pbSrc - pbTarget);
+                // We will fall through the "while" because cbTarget is now >= cbJump.
+            }
+        }
+    }
+    else {
+        ULONG op = fetch_thumb_opcode(pbSrc);
+        if (op == 0xf8dff000) { // LDR PC,[PC]
+            *((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
+            *((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
+            cbTarget = (LONG)(pbSrc - pbTarget);
+            // We will fall through the "while" because cbTarget is now >= cbJump.
+        }
+    }
+#endif
+
+    while (cbTarget < cbJump) {
+        PBYTE pbOp = pbSrc;
+        LONG lExtra = 0;
+
+        DETOUR_TRACE((" DetourCopyInstruction(%p,%p)\n",
+                      pbTrampoline, pbSrc));
+        pbSrc = (PBYTE)
+            DetourCopyInstruction(pbTrampoline, (PVOID*)&pbPool, pbSrc, NULL, &lExtra);
+        DETOUR_TRACE((" DetourCopyInstruction() = %p (%d bytes)\n",
+                      pbSrc, (int)(pbSrc - pbOp)));
+        pbTrampoline += (pbSrc - pbOp) + lExtra;
+        cbTarget = (LONG)(pbSrc - pbTarget);
+        pTrampoline->rAlign[nAlign].obTarget = cbTarget;
+        pTrampoline->rAlign[nAlign].obTrampoline = pbTrampoline - pTrampoline->rbCode;
+        nAlign++;
+
+        if (nAlign >= ARRAYSIZE(pTrampoline->rAlign)) {
+            break;
+        }
+
+        if (detour_does_code_end_function(pbOp)) {
+            break;
+        }
+    }
+
+    // Consume, but don't duplicate padding if it is needed and available.
+    while (cbTarget < cbJump) {
+        LONG cFiller = detour_is_code_filler(pbSrc);
+        if (cFiller == 0) {
+            break;
+        }
+
+        pbSrc += cFiller;
+        cbTarget = (LONG)(pbSrc - pbTarget);
+    }
+
+#if DETOUR_DEBUG
+    {
+        DETOUR_TRACE((" detours: rAlign ["));
+        LONG n = 0;
+        for (n = 0; n < ARRAYSIZE(pTrampoline->rAlign); n++) {
+            if (pTrampoline->rAlign[n].obTarget == 0 &&
+                pTrampoline->rAlign[n].obTrampoline == 0) {
+                break;
+            }
+            DETOUR_TRACE((" %d/%d",
+                          pTrampoline->rAlign[n].obTarget,
+                          pTrampoline->rAlign[n].obTrampoline
+                          ));
+
+        }
+        DETOUR_TRACE((" ]\n"));
+    }
+#endif
+
+    if (cbTarget < cbJump || nAlign > ARRAYSIZE(pTrampoline->rAlign)) {
+        // Too few instructions.
+
+        error = ERROR_INVALID_BLOCK;
+        if (s_fIgnoreTooSmall) {
+            goto stop;
+        }
+        else {
+            DETOUR_BREAK();
+            goto fail;
+        }
+    }
+
+    if (pbTrampoline > pbPool) {
+        __debugbreak();
+    }
+
+    pTrampoline->cbCode = (BYTE)(pbTrampoline - pTrampoline->rbCode);
+    pTrampoline->cbRestore = (BYTE)cbTarget;
+    CopyMemory(pTrampoline->rbRestore, pbTarget, cbTarget);
+
+#if !defined(DETOURS_IA64)
+    if (cbTarget > sizeof(pTrampoline->rbCode) - cbJump) {
+        // Too many instructions.
+        error = ERROR_INVALID_HANDLE;
+        DETOUR_BREAK();
+        goto fail;
+    }
+#endif // !DETOURS_IA64
+
+    pTrampoline->pbRemain = pbTarget + cbTarget;
+    pTrampoline->pbDetour = (PBYTE)pDetour;
+
+#ifdef DETOURS_IA64
+    pTrampoline->ppldDetour = ppldDetour;
+    pTrampoline->ppldTarget = ppldTarget;
+    pTrampoline->pldTrampoline.EntryPoint = (UINT64)&pTrampoline->bMovlTargetGp;
+    pTrampoline->pldTrampoline.GlobalPointer = (UINT64)pDetourGlobals;
+
+    ((DETOUR_IA64_BUNDLE *)pTrampoline->rbCode)->SetStop();
+
+    pTrampoline->bMovlTargetGp.SetMovlGp((UINT64)pTargetGlobals);
+    pTrampoline->bBrlRemainEip.SetBrl((UINT64)pTrampoline->pbRemain);
+
+    // Alloc frame:      alloc r41=ar.pfs,11,0,8,0; mov r40=rp
+    pTrampoline->bAllocFrame.wide[0] = 0x00000580164d480c;
+    pTrampoline->bAllocFrame.wide[1] = 0x00c4000500000200;
+    // save r36, r37, r38.
+    pTrampoline->bSave37to39.wide[0] = 0x031021004e019001;
+    pTrampoline->bSave37to39.wide[1] = 0x8401280600420098;
+    // save r34,r35,r36: adds r47=0,r36; adds r46=0,r35; adds r45=0,r34
+    pTrampoline->bSave34to36.wide[0] = 0x02e0210048017800;
+    pTrampoline->bSave34to36.wide[1] = 0x84011005a042008c;
+    // save gp,r32,r33"  adds r44=0,r33; adds r43=0,r32; adds r42=0,gp ;;
+    pTrampoline->bSaveGPto33.wide[0] = 0x02b0210042016001;
+    pTrampoline->bSaveGPto33.wide[1] = 0x8400080540420080;
+    // set detour GP.
+    pTrampoline->bMovlDetourGp.SetMovlGp((UINT64)pDetourGlobals);
+    // call detour:      brl.call.sptk.few rp=detour ;;
+    pTrampoline->bCallDetour.wide[0] = 0x0000000100000005;
+    pTrampoline->bCallDetour.wide[1] = 0xd000001000000000;
+    pTrampoline->bCallDetour.SetBrlTarget((UINT64)pDetour);
+    // pop frame & gp:   adds gp=0,r42; mov rp=r40,+0;; mov.i ar.pfs=r41
+    pTrampoline->bPopFrameGp.wide[0] = 0x4000210054000802;
+    pTrampoline->bPopFrameGp.wide[1] = 0x00aa029000038005;
+    // return to caller: br.ret.sptk.many rp ;;
+    pTrampoline->bReturn.wide[0] = 0x0000000100000019;
+    pTrampoline->bReturn.wide[1] = 0x0084000880000200;
+
+    DETOUR_TRACE(("detours: &bMovlTargetGp=%p\n", &pTrampoline->bMovlTargetGp));
+    DETOUR_TRACE(("detours: &bMovlDetourGp=%p\n", &pTrampoline->bMovlDetourGp));
+#endif // DETOURS_IA64
+
+    pbTrampoline = pTrampoline->rbCode + pTrampoline->cbCode;
+#ifdef DETOURS_X64
+    pbTrampoline = detour_gen_jmp_indirect(pbTrampoline, &pTrampoline->pbRemain);
+    pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
+#endif // DETOURS_X64
+
+#ifdef DETOURS_X86
+    pbTrampoline = detour_gen_jmp_immediate(pbTrampoline, pTrampoline->pbRemain);
+    pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
+#endif // DETOURS_X86
+
+#ifdef DETOURS_ARM
+    pbTrampoline = detour_gen_jmp_immediate(pbTrampoline, &pbPool, pTrampoline->pbRemain);
+    pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
+#endif // DETOURS_ARM
+
+#ifdef DETOURS_ARM64
+    pbTrampoline = detour_gen_jmp_immediate(pbTrampoline, &pbPool, pTrampoline->pbRemain);
+    pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
+#endif // DETOURS_ARM64
+
+    (void)pbTrampoline;
+
+    DWORD dwOld = 0;
+    if (!VirtualProtect(pbTarget, cbTarget, PAGE_EXECUTE_READWRITE, &dwOld)) {
+        error = GetLastError();
+        DETOUR_BREAK();
+        goto fail;
+    }
+
+    DETOUR_TRACE(("detours: pbTarget=%p: "
+                  "%02x %02x %02x %02x "
+                  "%02x %02x %02x %02x "
+                  "%02x %02x %02x %02x\n",
+                  pbTarget,
+                  pbTarget[0], pbTarget[1], pbTarget[2], pbTarget[3],
+                  pbTarget[4], pbTarget[5], pbTarget[6], pbTarget[7],
+                  pbTarget[8], pbTarget[9], pbTarget[10], pbTarget[11]));
+    DETOUR_TRACE(("detours: pbTramp =%p: "
+                  "%02x %02x %02x %02x "
+                  "%02x %02x %02x %02x "
+                  "%02x %02x %02x %02x\n",
+                  pTrampoline,
+                  pTrampoline->rbCode[0], pTrampoline->rbCode[1],
+                  pTrampoline->rbCode[2], pTrampoline->rbCode[3],
+                  pTrampoline->rbCode[4], pTrampoline->rbCode[5],
+                  pTrampoline->rbCode[6], pTrampoline->rbCode[7],
+                  pTrampoline->rbCode[8], pTrampoline->rbCode[9],
+                  pTrampoline->rbCode[10], pTrampoline->rbCode[11]));
+
+    o->fIsRemove = FALSE;
+    o->ppbPointer = (PBYTE*)ppPointer;
+    o->pTrampoline = pTrampoline;
+    o->pbTarget = pbTarget;
+    o->dwPerm = dwOld;
+    o->pNext = s_pPendingOperations;
+    s_pPendingOperations = o;
+
+    return NO_ERROR;
+}
+
+LONG WINAPI DetourDetach(_Inout_ PVOID *ppPointer,
+                         _In_ PVOID pDetour)
+{
+    LONG error = NO_ERROR;
+
+    if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
+        return ERROR_INVALID_OPERATION;
+    }
+
+    // If any of the pending operations failed, then we don't need to do this.
+    if (s_nPendingError != NO_ERROR) {
+        return s_nPendingError;
+    }
+
+    if (pDetour == NULL) {
+        return ERROR_INVALID_PARAMETER;
+    }
+    if (ppPointer == NULL) {
+        return ERROR_INVALID_HANDLE;
+    }
+    if (*ppPointer == NULL) {
+        error = ERROR_INVALID_HANDLE;
+        s_nPendingError = error;
+        s_ppPendingError = ppPointer;
+        DETOUR_BREAK();
+        return error;
+    }
+
+    DetourOperation *o = new NOTHROW DetourOperation;
+    if (o == NULL) {
+        error = ERROR_NOT_ENOUGH_MEMORY;
+      fail:
+        s_nPendingError = error;
+        DETOUR_BREAK();
+      stop:
+        if (o != NULL) {
+            delete o;
+            o = NULL;
+        }
+        s_ppPendingError = ppPointer;
+        return error;
+    }
+
+
+#ifdef DETOURS_IA64
+    PPLABEL_DESCRIPTOR ppldTrampo = (PPLABEL_DESCRIPTOR)*ppPointer;
+    PPLABEL_DESCRIPTOR ppldDetour = (PPLABEL_DESCRIPTOR)pDetour;
+    PVOID pDetourGlobals = NULL;
+    PVOID pTrampoGlobals = NULL;
+
+    pDetour = (PBYTE)DetourCodeFromPointer(ppldDetour, &pDetourGlobals);
+    PDETOUR_TRAMPOLINE pTrampoline = (PDETOUR_TRAMPOLINE)
+        DetourCodeFromPointer(ppldTrampo, &pTrampoGlobals);
+    DETOUR_TRACE(("  ppldDetour=%p, code=%p [gp=%p]\n",
+                  ppldDetour, pDetour, pDetourGlobals));
+    DETOUR_TRACE(("  ppldTrampo=%p, code=%p [gp=%p]\n",
+                  ppldTrampo, pTrampoline, pTrampoGlobals));
+
+
+    DETOUR_TRACE(("\n"));
+    DETOUR_TRACE(("detours:  &pldTrampoline  =%p\n",
+                  &pTrampoline->pldTrampoline));
+    DETOUR_TRACE(("detours:  &bMovlTargetGp  =%p [%p]\n",
+                  &pTrampoline->bMovlTargetGp,
+                  pTrampoline->bMovlTargetGp.GetMovlGp()));
+    DETOUR_TRACE(("detours:  &rbCode         =%p [%p]\n",
+                  &pTrampoline->rbCode,
+                  ((DETOUR_IA64_BUNDLE&)pTrampoline->rbCode).GetBrlTarget()));
+    DETOUR_TRACE(("detours:  &bBrlRemainEip  =%p [%p]\n",
+                  &pTrampoline->bBrlRemainEip,
+                  pTrampoline->bBrlRemainEip.GetBrlTarget()));
+    DETOUR_TRACE(("detours:  &bMovlDetourGp  =%p [%p]\n",
+                  &pTrampoline->bMovlDetourGp,
+                  pTrampoline->bMovlDetourGp.GetMovlGp()));
+    DETOUR_TRACE(("detours:  &bBrlDetourEip  =%p [%p]\n",
+                  &pTrampoline->bCallDetour,
+                  pTrampoline->bCallDetour.GetBrlTarget()));
+    DETOUR_TRACE(("detours:  pldDetour       =%p [%p]\n",
+                  pTrampoline->ppldDetour->EntryPoint,
+                  pTrampoline->ppldDetour->GlobalPointer));
+    DETOUR_TRACE(("detours:  pldTarget       =%p [%p]\n",
+                  pTrampoline->ppldTarget->EntryPoint,
+                  pTrampoline->ppldTarget->GlobalPointer));
+    DETOUR_TRACE(("detours:  pbRemain        =%p\n",
+                  pTrampoline->pbRemain));
+    DETOUR_TRACE(("detours:  pbDetour        =%p\n",
+                  pTrampoline->pbDetour));
+    DETOUR_TRACE(("\n"));
+#else // !DETOURS_IA64
+    PDETOUR_TRAMPOLINE pTrampoline =
+        (PDETOUR_TRAMPOLINE)DetourCodeFromPointer(*ppPointer, NULL);
+    pDetour = DetourCodeFromPointer(pDetour, NULL);
+#endif // !DETOURS_IA64
+
+    ////////////////////////////////////// Verify that Trampoline is in place.
+    //
+    LONG cbTarget = pTrampoline->cbRestore;
+    PBYTE pbTarget = pTrampoline->pbRemain - cbTarget;
+    if (cbTarget == 0 || cbTarget > sizeof(pTrampoline->rbCode)) {
+        error = ERROR_INVALID_BLOCK;
+        if (s_fIgnoreTooSmall) {
+            goto stop;
+        }
+        else {
+            DETOUR_BREAK();
+            goto fail;
+        }
+    }
+
+    if (pTrampoline->pbDetour != pDetour) {
+        error = ERROR_INVALID_BLOCK;
+        if (s_fIgnoreTooSmall) {
+            goto stop;
+        }
+        else {
+            DETOUR_BREAK();
+            goto fail;
+        }
+    }
+
+    DWORD dwOld = 0;
+    if (!VirtualProtect(pbTarget, cbTarget,
+                        PAGE_EXECUTE_READWRITE, &dwOld)) {
+        error = GetLastError();
+        DETOUR_BREAK();
+        goto fail;
+    }
+
+    o->fIsRemove = TRUE;
+    o->ppbPointer = (PBYTE*)ppPointer;
+    o->pTrampoline = pTrampoline;
+    o->pbTarget = pbTarget;
+    o->dwPerm = dwOld;
+    o->pNext = s_pPendingOperations;
+    s_pPendingOperations = o;
+
+    return NO_ERROR;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Helpers for manipulating page protection.
+//
+
+// For reference:
+//   PAGE_NOACCESS          0x01
+//   PAGE_READONLY          0x02
+//   PAGE_READWRITE         0x04
+//   PAGE_WRITECOPY         0x08
+//   PAGE_EXECUTE           0x10
+//   PAGE_EXECUTE_READ      0x20
+//   PAGE_EXECUTE_READWRITE 0x40
+//   PAGE_EXECUTE_WRITECOPY 0x80
+//   PAGE_GUARD             ...
+//   PAGE_NOCACHE           ...
+//   PAGE_WRITECOMBINE      ...
+
+#define DETOUR_PAGE_EXECUTE_ALL    (PAGE_EXECUTE |              \
+                                    PAGE_EXECUTE_READ |         \
+                                    PAGE_EXECUTE_READWRITE |    \
+                                    PAGE_EXECUTE_WRITECOPY)
+
+#define DETOUR_PAGE_NO_EXECUTE_ALL (PAGE_NOACCESS |             \
+                                    PAGE_READONLY |             \
+                                    PAGE_READWRITE |            \
+                                    PAGE_WRITECOPY)
+
+#define DETOUR_PAGE_ATTRIBUTES     (~(DETOUR_PAGE_EXECUTE_ALL | DETOUR_PAGE_NO_EXECUTE_ALL))
+
+C_ASSERT((DETOUR_PAGE_NO_EXECUTE_ALL << 4) == DETOUR_PAGE_EXECUTE_ALL);
+
+static DWORD DetourPageProtectAdjustExecute(_In_  DWORD dwOldProtect,
+                                            _In_  DWORD dwNewProtect)
+//  Copy EXECUTE from dwOldProtect to dwNewProtect.
+{
+    bool const fOldExecute = ((dwOldProtect & DETOUR_PAGE_EXECUTE_ALL) != 0);
+    bool const fNewExecute = ((dwNewProtect & DETOUR_PAGE_EXECUTE_ALL) != 0);
+
+    if (fOldExecute && !fNewExecute) {
+        dwNewProtect = ((dwNewProtect & DETOUR_PAGE_NO_EXECUTE_ALL) << 4)
+            | (dwNewProtect & DETOUR_PAGE_ATTRIBUTES);
+    }
+    else if (!fOldExecute && fNewExecute) {
+        dwNewProtect = ((dwNewProtect & DETOUR_PAGE_EXECUTE_ALL) >> 4)
+            | (dwNewProtect & DETOUR_PAGE_ATTRIBUTES);
+    }
+    return dwNewProtect;
+}
+
+_Success_(return != FALSE)
+BOOL WINAPI DetourVirtualProtectSameExecuteEx(_In_  HANDLE hProcess,
+                                              _In_  PVOID pAddress,
+                                              _In_  SIZE_T nSize,
+                                              _In_  DWORD dwNewProtect,
+                                              _Out_ PDWORD pdwOldProtect)
+// Some systems do not allow executability of a page to change. This function applies
+// dwNewProtect to [pAddress, nSize), but preserving the previous executability.
+// This function is meant to be a drop-in replacement for some uses of VirtualProtectEx.
+// When "restoring" page protection, there is no need to use this function.
+{
+    MEMORY_BASIC_INFORMATION mbi;
+
+    // Query to get existing execute access.
+
+    ZeroMemory(&mbi, sizeof(mbi));
+
+    if (VirtualQueryEx(hProcess, pAddress, &mbi, sizeof(mbi)) == 0) {
+        return FALSE;
+    }
+    return VirtualProtectEx(hProcess, pAddress, nSize,
+                            DetourPageProtectAdjustExecute(mbi.Protect, dwNewProtect),
+                            pdwOldProtect);
+}
+
+_Success_(return != FALSE)
+BOOL WINAPI DetourVirtualProtectSameExecute(_In_  PVOID pAddress,
+                                            _In_  SIZE_T nSize,
+                                            _In_  DWORD dwNewProtect,
+                                            _Out_ PDWORD pdwOldProtect)
+{
+    return DetourVirtualProtectSameExecuteEx(GetCurrentProcess(),
+                                             pAddress, nSize, dwNewProtect, pdwOldProtect);
+}
+
+//  End of File
diff --git a/src/detours/detours.h b/src/detours/detours.h
new file mode 100644
index 0000000..adb34eb
--- /dev/null
+++ b/src/detours/detours.h
@@ -0,0 +1,1080 @@
+/////////////////////////////////////////////////////////////////////////////
+//
+//  Core Detours Functionality (detours.h of detours.lib)
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+
+#pragma once
+#ifndef _DETOURS_H_
+#define _DETOURS_H_
+
+#define DETOURS_VERSION     0x4c0c1   // 0xMAJORcMINORcPATCH
+
+//////////////////////////////////////////////////////////////////////////////
+//
+
+#undef DETOURS_X64
+#undef DETOURS_X86
+#undef DETOURS_IA64
+#undef DETOURS_ARM
+#undef DETOURS_ARM64
+#undef DETOURS_BITS
+#undef DETOURS_32BIT
+#undef DETOURS_64BIT
+
+#if defined(_X86_)
+#define DETOURS_X86
+#define DETOURS_OPTION_BITS 64
+
+#elif defined(_AMD64_)
+#define DETOURS_X64
+#define DETOURS_OPTION_BITS 32
+
+#elif defined(_IA64_)
+#define DETOURS_IA64
+#define DETOURS_OPTION_BITS 32
+
+#elif defined(_ARM_)
+#define DETOURS_ARM
+
+#elif defined(_ARM64_)
+#define DETOURS_ARM64
+
+#else
+#error Unknown architecture (x86, amd64, ia64, arm, arm64)
+#endif
+
+#ifdef _WIN64
+#undef DETOURS_32BIT
+#define DETOURS_64BIT 1
+#define DETOURS_BITS 64
+// If all 64bit kernels can run one and only one 32bit architecture.
+//#define DETOURS_OPTION_BITS 32
+#else
+#define DETOURS_32BIT 1
+#undef DETOURS_64BIT
+#define DETOURS_BITS 32
+// If all 64bit kernels can run one and only one 32bit architecture.
+//#define DETOURS_OPTION_BITS 32
+#endif
+
+#define VER_DETOURS_BITS    DETOUR_STRINGIFY(DETOURS_BITS)
+
+//////////////////////////////////////////////////////////////////////////////
+//
+
+#if (_MSC_VER < 1299)
+typedef LONG LONG_PTR;
+typedef ULONG ULONG_PTR;
+#endif
+
+///////////////////////////////////////////////// SAL 2.0 Annotations w/o SAL.
+//
+//  These definitions are include so that Detours will build even if the
+//  compiler doesn't have full SAL 2.0 support.
+//
+#ifndef DETOURS_DONT_REMOVE_SAL_20
+
+#ifdef DETOURS_TEST_REMOVE_SAL_20
+#undef _Analysis_assume_
+#undef _Benign_race_begin_
+#undef _Benign_race_end_
+#undef _Field_range_
+#undef _Field_size_
+#undef _In_
+#undef _In_bytecount_
+#undef _In_count_
+#undef _In_opt_
+#undef _In_opt_bytecount_
+#undef _In_opt_count_
+#undef _In_opt_z_
+#undef _In_range_
+#undef _In_reads_
+#undef _In_reads_bytes_
+#undef _In_reads_opt_
+#undef _In_reads_opt_bytes_
+#undef _In_reads_or_z_
+#undef _In_z_
+#undef _Inout_
+#undef _Inout_opt_
+#undef _Inout_z_count_
+#undef _Out_
+#undef _Out_opt_
+#undef _Out_writes_
+#undef _Outptr_result_maybenull_
+#undef _Readable_bytes_
+#undef _Success_
+#undef _Writable_bytes_
+#undef _Pre_notnull_
+#endif
+
+#if defined(_Deref_out_opt_z_) && !defined(_Outptr_result_maybenull_)
+#define _Outptr_result_maybenull_ _Deref_out_opt_z_
+#endif
+
+#if defined(_In_count_) && !defined(_In_reads_)
+#define _In_reads_(x) _In_count_(x)
+#endif
+
+#if defined(_In_opt_count_) && !defined(_In_reads_opt_)
+#define _In_reads_opt_(x) _In_opt_count_(x)
+#endif
+
+#if defined(_In_opt_bytecount_) && !defined(_In_reads_opt_bytes_)
+#define _In_reads_opt_bytes_(x) _In_opt_bytecount_(x)
+#endif
+
+#if defined(_In_bytecount_) && !defined(_In_reads_bytes_)
+#define _In_reads_bytes_(x) _In_bytecount_(x)
+#endif
+
+#ifndef _In_
+#define _In_
+#endif
+
+#ifndef _In_bytecount_
+#define _In_bytecount_(x)
+#endif
+
+#ifndef _In_count_
+#define _In_count_(x)
+#endif
+
+#ifndef _In_opt_
+#define _In_opt_
+#endif
+
+#ifndef _In_opt_bytecount_
+#define _In_opt_bytecount_(x)
+#endif
+
+#ifndef _In_opt_count_
+#define _In_opt_count_(x)
+#endif
+
+#ifndef _In_opt_z_
+#define _In_opt_z_
+#endif
+
+#ifndef _In_range_
+#define _In_range_(x,y)
+#endif
+
+#ifndef _In_reads_
+#define _In_reads_(x)
+#endif
+
+#ifndef _In_reads_bytes_
+#define _In_reads_bytes_(x)
+#endif
+
+#ifndef _In_reads_opt_
+#define _In_reads_opt_(x)
+#endif
+
+#ifndef _In_reads_opt_bytes_
+#define _In_reads_opt_bytes_(x)
+#endif
+
+#ifndef _In_reads_or_z_
+#define _In_reads_or_z_
+#endif
+
+#ifndef _In_z_
+#define _In_z_
+#endif
+
+#ifndef _Inout_
+#define _Inout_
+#endif
+
+#ifndef _Inout_opt_
+#define _Inout_opt_
+#endif
+
+#ifndef _Inout_z_count_
+#define _Inout_z_count_(x)
+#endif
+
+#ifndef _Out_
+#define _Out_
+#endif
+
+#ifndef _Out_opt_
+#define _Out_opt_
+#endif
+
+#ifndef _Out_writes_
+#define _Out_writes_(x)
+#endif
+
+#ifndef _Outptr_result_maybenull_
+#define _Outptr_result_maybenull_
+#endif
+
+#ifndef _Writable_bytes_
+#define _Writable_bytes_(x)
+#endif
+
+#ifndef _Readable_bytes_
+#define _Readable_bytes_(x)
+#endif
+
+#ifndef _Success_
+#define _Success_(x)
+#endif
+
+#ifndef _Pre_notnull_
+#define _Pre_notnull_
+#endif
+
+#ifdef DETOURS_INTERNAL
+
+#pragma warning(disable:4615) // unknown warning type (suppress with older compilers)
+
+#ifndef _Benign_race_begin_
+#define _Benign_race_begin_
+#endif
+
+#ifndef _Benign_race_end_
+#define _Benign_race_end_
+#endif
+
+#ifndef _Field_size_
+#define _Field_size_(x)
+#endif
+
+#ifndef _Field_range_
+#define _Field_range_(x,y)
+#endif
+
+#ifndef _Analysis_assume_
+#define _Analysis_assume_(x)
+#endif
+
+#endif // DETOURS_INTERNAL
+#endif // DETOURS_DONT_REMOVE_SAL_20
+
+//////////////////////////////////////////////////////////////////////////////
+//
+#ifndef GUID_DEFINED
+#define GUID_DEFINED
+typedef struct  _GUID
+{
+    DWORD Data1;
+    WORD Data2;
+    WORD Data3;
+    BYTE Data4[ 8 ];
+} GUID;
+
+#ifdef INITGUID
+#define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
+        const GUID name \
+                = { l, w1, w2, { b1, b2,  b3,  b4,  b5,  b6,  b7,  b8 } }
+#else
+#define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
+    const GUID name
+#endif // INITGUID
+#endif // !GUID_DEFINED
+
+#if defined(__cplusplus)
+#ifndef _REFGUID_DEFINED
+#define _REFGUID_DEFINED
+#define REFGUID             const GUID &
+#endif // !_REFGUID_DEFINED
+#else // !__cplusplus
+#ifndef _REFGUID_DEFINED
+#define _REFGUID_DEFINED
+#define REFGUID             const GUID * const
+#endif // !_REFGUID_DEFINED
+#endif // !__cplusplus
+
+#ifndef ARRAYSIZE
+#define ARRAYSIZE(x)    (sizeof(x)/sizeof(x[0]))
+#endif
+
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/////////////////////////////////////////////////// Instruction Target Macros.
+//
+#define DETOUR_INSTRUCTION_TARGET_NONE          ((PVOID)0)
+#define DETOUR_INSTRUCTION_TARGET_DYNAMIC       ((PVOID)(LONG_PTR)-1)
+#define DETOUR_SECTION_HEADER_SIGNATURE         0x00727444   // "Dtr\0"
+
+extern const GUID DETOUR_EXE_RESTORE_GUID;
+extern const GUID DETOUR_EXE_HELPER_GUID;
+
+#define DETOUR_TRAMPOLINE_SIGNATURE             0x21727444  // Dtr!
+typedef struct _DETOUR_TRAMPOLINE DETOUR_TRAMPOLINE, *PDETOUR_TRAMPOLINE;
+
+/////////////////////////////////////////////////////////// Binary Structures.
+//
+#pragma pack(push, 8)
+typedef struct _DETOUR_SECTION_HEADER
+{
+    DWORD       cbHeaderSize;
+    DWORD       nSignature;
+    DWORD       nDataOffset;
+    DWORD       cbDataSize;
+
+    DWORD       nOriginalImportVirtualAddress;
+    DWORD       nOriginalImportSize;
+    DWORD       nOriginalBoundImportVirtualAddress;
+    DWORD       nOriginalBoundImportSize;
+
+    DWORD       nOriginalIatVirtualAddress;
+    DWORD       nOriginalIatSize;
+    DWORD       nOriginalSizeOfImage;
+    DWORD       cbPrePE;
+
+    DWORD       nOriginalClrFlags;
+    DWORD       reserved1;
+    DWORD       reserved2;
+    DWORD       reserved3;
+
+    // Followed by cbPrePE bytes of data.
+} DETOUR_SECTION_HEADER, *PDETOUR_SECTION_HEADER;
+
+typedef struct _DETOUR_SECTION_RECORD
+{
+    DWORD       cbBytes;
+    DWORD       nReserved;
+    GUID        guid;
+} DETOUR_SECTION_RECORD, *PDETOUR_SECTION_RECORD;
+
+typedef struct _DETOUR_CLR_HEADER
+{
+    // Header versioning
+    ULONG                   cb;
+    USHORT                  MajorRuntimeVersion;
+    USHORT                  MinorRuntimeVersion;
+
+    // Symbol table and startup information
+    IMAGE_DATA_DIRECTORY    MetaData;
+    ULONG                   Flags;
+
+    // Followed by the rest of the IMAGE_COR20_HEADER
+} DETOUR_CLR_HEADER, *PDETOUR_CLR_HEADER;
+
+typedef struct _DETOUR_EXE_RESTORE
+{
+    DWORD               cb;
+    DWORD               cbidh;
+    DWORD               cbinh;
+    DWORD               cbclr;
+
+    PBYTE               pidh;
+    PBYTE               pinh;
+    PBYTE               pclr;
+
+    IMAGE_DOS_HEADER    idh;
+    union {
+        IMAGE_NT_HEADERS    inh;        // all environments have this
+#ifdef IMAGE_NT_OPTIONAL_HDR32_MAGIC    // some environments do not have this
+        IMAGE_NT_HEADERS32  inh32;
+#endif
+#ifdef IMAGE_NT_OPTIONAL_HDR64_MAGIC    // some environments do not have this
+        IMAGE_NT_HEADERS64  inh64;
+#endif
+#ifdef IMAGE_NT_OPTIONAL_HDR64_MAGIC    // some environments do not have this
+        BYTE                raw[sizeof(IMAGE_NT_HEADERS64) +
+                                sizeof(IMAGE_SECTION_HEADER) * 32];
+#else
+        BYTE                raw[0x108 + sizeof(IMAGE_SECTION_HEADER) * 32];
+#endif
+    };
+    DETOUR_CLR_HEADER   clr;
+
+} DETOUR_EXE_RESTORE, *PDETOUR_EXE_RESTORE;
+
+#ifdef IMAGE_NT_OPTIONAL_HDR64_MAGIC
+C_ASSERT(sizeof(IMAGE_NT_HEADERS64) == 0x108);
+#endif
+
+// The size can change, but assert for clarity due to the muddying #ifdefs.
+#ifdef _WIN64
+C_ASSERT(sizeof(DETOUR_EXE_RESTORE) == 0x688);
+#else
+C_ASSERT(sizeof(DETOUR_EXE_RESTORE) == 0x678);
+#endif
+
+typedef struct _DETOUR_EXE_HELPER
+{
+    DWORD               cb;
+    DWORD               pid;
+    DWORD               nDlls;
+    CHAR                rDlls[4];
+} DETOUR_EXE_HELPER, *PDETOUR_EXE_HELPER;
+
+#pragma pack(pop)
+
+#define DETOUR_SECTION_HEADER_DECLARE(cbSectionSize) \
+{ \
+      sizeof(DETOUR_SECTION_HEADER),\
+      DETOUR_SECTION_HEADER_SIGNATURE,\
+      sizeof(DETOUR_SECTION_HEADER),\
+      (cbSectionSize),\
+      \
+      0,\
+      0,\
+      0,\
+      0,\
+      \
+      0,\
+      0,\
+      0,\
+      0,\
+}
+
+/////////////////////////////////////////////////////////////// Helper Macros.
+//
+#define DETOURS_STRINGIFY(x)    DETOURS_STRINGIFY_(x)
+#define DETOURS_STRINGIFY_(x)    #x
+
+///////////////////////////////////////////////////////////// Binary Typedefs.
+//
+typedef BOOL (CALLBACK *PF_DETOUR_BINARY_BYWAY_CALLBACK)(
+    _In_opt_ PVOID pContext,
+    _In_opt_ LPCSTR pszFile,
+    _Outptr_result_maybenull_ LPCSTR *ppszOutFile);
+
+typedef BOOL (CALLBACK *PF_DETOUR_BINARY_FILE_CALLBACK)(
+    _In_opt_ PVOID pContext,
+    _In_ LPCSTR pszOrigFile,
+    _In_ LPCSTR pszFile,
+    _Outptr_result_maybenull_ LPCSTR *ppszOutFile);
+
+typedef BOOL (CALLBACK *PF_DETOUR_BINARY_SYMBOL_CALLBACK)(
+    _In_opt_ PVOID pContext,
+    _In_ ULONG nOrigOrdinal,
+    _In_ ULONG nOrdinal,
+    _Out_ ULONG *pnOutOrdinal,
+    _In_opt_ LPCSTR pszOrigSymbol,
+    _In_opt_ LPCSTR pszSymbol,
+    _Outptr_result_maybenull_ LPCSTR *ppszOutSymbol);
+
+typedef BOOL (CALLBACK *PF_DETOUR_BINARY_COMMIT_CALLBACK)(
+    _In_opt_ PVOID pContext);
+
+typedef BOOL (CALLBACK *PF_DETOUR_ENUMERATE_EXPORT_CALLBACK)(_In_opt_ PVOID pContext,
+                                                             _In_ ULONG nOrdinal,
+                                                             _In_opt_ LPCSTR pszName,
+                                                             _In_opt_ PVOID pCode);
+
+typedef BOOL (CALLBACK *PF_DETOUR_IMPORT_FILE_CALLBACK)(_In_opt_ PVOID pContext,
+                                                        _In_opt_ HMODULE hModule,
+                                                        _In_opt_ LPCSTR pszFile);
+
+typedef BOOL (CALLBACK *PF_DETOUR_IMPORT_FUNC_CALLBACK)(_In_opt_ PVOID pContext,
+                                                        _In_ DWORD nOrdinal,
+                                                        _In_opt_ LPCSTR pszFunc,
+                                                        _In_opt_ PVOID pvFunc);
+
+// Same as PF_DETOUR_IMPORT_FUNC_CALLBACK but extra indirection on last parameter.
+typedef BOOL (CALLBACK *PF_DETOUR_IMPORT_FUNC_CALLBACK_EX)(_In_opt_ PVOID pContext,
+                                                           _In_ DWORD nOrdinal,
+                                                           _In_opt_ LPCSTR pszFunc,
+                                                           _In_opt_ PVOID* ppvFunc);
+
+typedef VOID * PDETOUR_BINARY;
+typedef VOID * PDETOUR_LOADED_BINARY;
+
+//////////////////////////////////////////////////////////// Transaction APIs.
+//
+LONG WINAPI DetourTransactionBegin(VOID);
+LONG WINAPI DetourTransactionAbort(VOID);
+LONG WINAPI DetourTransactionCommit(VOID);
+LONG WINAPI DetourTransactionCommitEx(_Out_opt_ PVOID **pppFailedPointer);
+
+LONG WINAPI DetourUpdateThread(_In_ HANDLE hThread);
+
+LONG WINAPI DetourAttach(_Inout_ PVOID *ppPointer,
+                         _In_ PVOID pDetour);
+
+LONG WINAPI DetourAttachEx(_Inout_ PVOID *ppPointer,
+                           _In_ PVOID pDetour,
+                           _Out_opt_ PDETOUR_TRAMPOLINE *ppRealTrampoline,
+                           _Out_opt_ PVOID *ppRealTarget,
+                           _Out_opt_ PVOID *ppRealDetour);
+
+LONG WINAPI DetourDetach(_Inout_ PVOID *ppPointer,
+                         _In_ PVOID pDetour);
+
+BOOL WINAPI DetourSetIgnoreTooSmall(_In_ BOOL fIgnore);
+BOOL WINAPI DetourSetRetainRegions(_In_ BOOL fRetain);
+PVOID WINAPI DetourSetSystemRegionLowerBound(_In_ PVOID pSystemRegionLowerBound);
+PVOID WINAPI DetourSetSystemRegionUpperBound(_In_ PVOID pSystemRegionUpperBound);
+
+////////////////////////////////////////////////////////////// Code Functions.
+//
+PVOID WINAPI DetourFindFunction(_In_ LPCSTR pszModule,
+                                _In_ LPCSTR pszFunction);
+PVOID WINAPI DetourCodeFromPointer(_In_ PVOID pPointer,
+                                   _Out_opt_ PVOID *ppGlobals);
+PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst,
+                                   _Inout_opt_ PVOID *ppDstPool,
+                                   _In_ PVOID pSrc,
+                                   _Out_opt_ PVOID *ppTarget,
+                                   _Out_opt_ LONG *plExtra);
+BOOL WINAPI DetourSetCodeModule(_In_ HMODULE hModule,
+                                _In_ BOOL fLimitReferencesToModule);
+PVOID WINAPI DetourAllocateRegionWithinJumpBounds(_In_ LPCVOID pbTarget,
+                                                  _Out_ PDWORD pcbAllocatedSize);
+
+///////////////////////////////////////////////////// Loaded Binary Functions.
+//
+HMODULE WINAPI DetourGetContainingModule(_In_ PVOID pvAddr);
+HMODULE WINAPI DetourEnumerateModules(_In_opt_ HMODULE hModuleLast);
+PVOID WINAPI DetourGetEntryPoint(_In_opt_ HMODULE hModule);
+ULONG WINAPI DetourGetModuleSize(_In_opt_ HMODULE hModule);
+BOOL WINAPI DetourEnumerateExports(_In_ HMODULE hModule,
+                                   _In_opt_ PVOID pContext,
+                                   _In_ PF_DETOUR_ENUMERATE_EXPORT_CALLBACK pfExport);
+BOOL WINAPI DetourEnumerateImports(_In_opt_ HMODULE hModule,
+                                   _In_opt_ PVOID pContext,
+                                   _In_opt_ PF_DETOUR_IMPORT_FILE_CALLBACK pfImportFile,
+                                   _In_opt_ PF_DETOUR_IMPORT_FUNC_CALLBACK pfImportFunc);
+
+BOOL WINAPI DetourEnumerateImportsEx(_In_opt_ HMODULE hModule,
+                                     _In_opt_ PVOID pContext,
+                                     _In_opt_ PF_DETOUR_IMPORT_FILE_CALLBACK pfImportFile,
+                                     _In_opt_ PF_DETOUR_IMPORT_FUNC_CALLBACK_EX pfImportFuncEx);
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourFindPayload(_In_opt_ HMODULE hModule,
+                               _In_ REFGUID rguid,
+                               _Out_ DWORD *pcbData);
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourFindPayloadEx(_In_ REFGUID rguid,
+                                 _Out_ DWORD * pcbData);
+
+DWORD WINAPI DetourGetSizeOfPayloads(_In_opt_ HMODULE hModule);
+
+///////////////////////////////////////////////// Persistent Binary Functions.
+//
+
+PDETOUR_BINARY WINAPI DetourBinaryOpen(_In_ HANDLE hFile);
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourBinaryEnumeratePayloads(_In_ PDETOUR_BINARY pBinary,
+                                           _Out_opt_ GUID *pGuid,
+                                           _Out_ DWORD *pcbData,
+                                           _Inout_ DWORD *pnIterator);
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourBinaryFindPayload(_In_ PDETOUR_BINARY pBinary,
+                                     _In_ REFGUID rguid,
+                                     _Out_ DWORD *pcbData);
+
+PVOID WINAPI DetourBinarySetPayload(_In_ PDETOUR_BINARY pBinary,
+                                    _In_ REFGUID rguid,
+                                    _In_reads_opt_(cbData) PVOID pData,
+                                    _In_ DWORD cbData);
+BOOL WINAPI DetourBinaryDeletePayload(_In_ PDETOUR_BINARY pBinary, _In_ REFGUID rguid);
+BOOL WINAPI DetourBinaryPurgePayloads(_In_ PDETOUR_BINARY pBinary);
+BOOL WINAPI DetourBinaryResetImports(_In_ PDETOUR_BINARY pBinary);
+BOOL WINAPI DetourBinaryEditImports(_In_ PDETOUR_BINARY pBinary,
+                                    _In_opt_ PVOID pContext,
+                                    _In_opt_ PF_DETOUR_BINARY_BYWAY_CALLBACK pfByway,
+                                    _In_opt_ PF_DETOUR_BINARY_FILE_CALLBACK pfFile,
+                                    _In_opt_ PF_DETOUR_BINARY_SYMBOL_CALLBACK pfSymbol,
+                                    _In_opt_ PF_DETOUR_BINARY_COMMIT_CALLBACK pfCommit);
+BOOL WINAPI DetourBinaryWrite(_In_ PDETOUR_BINARY pBinary, _In_ HANDLE hFile);
+BOOL WINAPI DetourBinaryClose(_In_ PDETOUR_BINARY pBinary);
+
+/////////////////////////////////////////////////// Create Process & Load Dll.
+//
+typedef BOOL (WINAPI *PDETOUR_CREATE_PROCESS_ROUTINEA)(
+    _In_opt_ LPCSTR lpApplicationName,
+    _Inout_opt_ LPSTR lpCommandLine,
+    _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+    _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+    _In_ BOOL bInheritHandles,
+    _In_ DWORD dwCreationFlags,
+    _In_opt_ LPVOID lpEnvironment,
+    _In_opt_ LPCSTR lpCurrentDirectory,
+    _In_ LPSTARTUPINFOA lpStartupInfo,
+    _Out_ LPPROCESS_INFORMATION lpProcessInformation);
+
+typedef BOOL (WINAPI *PDETOUR_CREATE_PROCESS_ROUTINEW)(
+    _In_opt_ LPCWSTR lpApplicationName,
+    _Inout_opt_ LPWSTR lpCommandLine,
+    _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+    _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+    _In_ BOOL bInheritHandles,
+    _In_ DWORD dwCreationFlags,
+    _In_opt_ LPVOID lpEnvironment,
+    _In_opt_ LPCWSTR lpCurrentDirectory,
+    _In_ LPSTARTUPINFOW lpStartupInfo,
+    _Out_ LPPROCESS_INFORMATION lpProcessInformation);
+
+BOOL WINAPI DetourCreateProcessWithDllA(_In_opt_ LPCSTR lpApplicationName,
+                                        _Inout_opt_ LPSTR lpCommandLine,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                        _In_ BOOL bInheritHandles,
+                                        _In_ DWORD dwCreationFlags,
+                                        _In_opt_ LPVOID lpEnvironment,
+                                        _In_opt_ LPCSTR lpCurrentDirectory,
+                                        _In_ LPSTARTUPINFOA lpStartupInfo,
+                                        _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                        _In_ LPCSTR lpDllName,
+                                        _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA);
+
+BOOL WINAPI DetourCreateProcessWithDllW(_In_opt_ LPCWSTR lpApplicationName,
+                                        _Inout_opt_ LPWSTR lpCommandLine,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                        _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                        _In_ BOOL bInheritHandles,
+                                        _In_ DWORD dwCreationFlags,
+                                        _In_opt_ LPVOID lpEnvironment,
+                                        _In_opt_ LPCWSTR lpCurrentDirectory,
+                                        _In_ LPSTARTUPINFOW lpStartupInfo,
+                                        _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                        _In_ LPCSTR lpDllName,
+                                        _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW);
+
+#ifdef UNICODE
+#define DetourCreateProcessWithDll      DetourCreateProcessWithDllW
+#define PDETOUR_CREATE_PROCESS_ROUTINE  PDETOUR_CREATE_PROCESS_ROUTINEW
+#else
+#define DetourCreateProcessWithDll      DetourCreateProcessWithDllA
+#define PDETOUR_CREATE_PROCESS_ROUTINE  PDETOUR_CREATE_PROCESS_ROUTINEA
+#endif // !UNICODE
+
+BOOL WINAPI DetourCreateProcessWithDllExA(_In_opt_ LPCSTR lpApplicationName,
+                                          _Inout_opt_ LPSTR lpCommandLine,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                          _In_ BOOL bInheritHandles,
+                                          _In_ DWORD dwCreationFlags,
+                                          _In_opt_ LPVOID lpEnvironment,
+                                          _In_opt_ LPCSTR lpCurrentDirectory,
+                                          _In_ LPSTARTUPINFOA lpStartupInfo,
+                                          _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                          _In_ LPCSTR lpDllName,
+                                          _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA);
+
+BOOL WINAPI DetourCreateProcessWithDllExW(_In_opt_ LPCWSTR lpApplicationName,
+                                          _Inout_opt_  LPWSTR lpCommandLine,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                          _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                          _In_ BOOL bInheritHandles,
+                                          _In_ DWORD dwCreationFlags,
+                                          _In_opt_ LPVOID lpEnvironment,
+                                          _In_opt_ LPCWSTR lpCurrentDirectory,
+                                          _In_ LPSTARTUPINFOW lpStartupInfo,
+                                          _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                          _In_ LPCSTR lpDllName,
+                                          _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW);
+
+#ifdef UNICODE
+#define DetourCreateProcessWithDllEx    DetourCreateProcessWithDllExW
+#else
+#define DetourCreateProcessWithDllEx    DetourCreateProcessWithDllExA
+#endif // !UNICODE
+
+BOOL WINAPI DetourCreateProcessWithDllsA(_In_opt_ LPCSTR lpApplicationName,
+                                         _Inout_opt_ LPSTR lpCommandLine,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                         _In_ BOOL bInheritHandles,
+                                         _In_ DWORD dwCreationFlags,
+                                         _In_opt_ LPVOID lpEnvironment,
+                                         _In_opt_ LPCSTR lpCurrentDirectory,
+                                         _In_ LPSTARTUPINFOA lpStartupInfo,
+                                         _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                         _In_ DWORD nDlls,
+                                         _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                         _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA);
+
+BOOL WINAPI DetourCreateProcessWithDllsW(_In_opt_ LPCWSTR lpApplicationName,
+                                         _Inout_opt_ LPWSTR lpCommandLine,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpProcessAttributes,
+                                         _In_opt_ LPSECURITY_ATTRIBUTES lpThreadAttributes,
+                                         _In_ BOOL bInheritHandles,
+                                         _In_ DWORD dwCreationFlags,
+                                         _In_opt_ LPVOID lpEnvironment,
+                                         _In_opt_ LPCWSTR lpCurrentDirectory,
+                                         _In_ LPSTARTUPINFOW lpStartupInfo,
+                                         _Out_ LPPROCESS_INFORMATION lpProcessInformation,
+                                         _In_ DWORD nDlls,
+                                         _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                         _In_opt_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW);
+
+#ifdef UNICODE
+#define DetourCreateProcessWithDlls     DetourCreateProcessWithDllsW
+#else
+#define DetourCreateProcessWithDlls     DetourCreateProcessWithDllsA
+#endif // !UNICODE
+
+BOOL WINAPI DetourProcessViaHelperA(_In_ DWORD dwTargetPid,
+                                    _In_ LPCSTR lpDllName,
+                                    _In_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA);
+
+BOOL WINAPI DetourProcessViaHelperW(_In_ DWORD dwTargetPid,
+                                    _In_ LPCSTR lpDllName,
+                                    _In_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW);
+
+#ifdef UNICODE
+#define DetourProcessViaHelper          DetourProcessViaHelperW
+#else
+#define DetourProcessViaHelper          DetourProcessViaHelperA
+#endif // !UNICODE
+
+BOOL WINAPI DetourProcessViaHelperDllsA(_In_ DWORD dwTargetPid,
+                                        _In_ DWORD nDlls,
+                                        _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                        _In_ PDETOUR_CREATE_PROCESS_ROUTINEA pfCreateProcessA);
+
+BOOL WINAPI DetourProcessViaHelperDllsW(_In_ DWORD dwTargetPid,
+                                        _In_ DWORD nDlls,
+                                        _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                        _In_ PDETOUR_CREATE_PROCESS_ROUTINEW pfCreateProcessW);
+
+#ifdef UNICODE
+#define DetourProcessViaHelperDlls      DetourProcessViaHelperDllsW
+#else
+#define DetourProcessViaHelperDlls      DetourProcessViaHelperDllsA
+#endif // !UNICODE
+
+BOOL WINAPI DetourUpdateProcessWithDll(_In_ HANDLE hProcess,
+                                       _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                       _In_ DWORD nDlls);
+
+BOOL WINAPI DetourUpdateProcessWithDllEx(_In_ HANDLE hProcess,
+                                         _In_ HMODULE hImage,
+                                         _In_ BOOL bIs32Bit,
+                                         _In_reads_(nDlls) LPCSTR *rlpDlls,
+                                         _In_ DWORD nDlls);
+
+BOOL WINAPI DetourCopyPayloadToProcess(_In_ HANDLE hProcess,
+                                       _In_ REFGUID rguid,
+                                       _In_reads_bytes_(cbData) PVOID pvData,
+                                       _In_ DWORD cbData);
+BOOL WINAPI DetourRestoreAfterWith(VOID);
+BOOL WINAPI DetourRestoreAfterWithEx(_In_reads_bytes_(cbData) PVOID pvData,
+                                     _In_ DWORD cbData);
+BOOL WINAPI DetourIsHelperProcess(VOID);
+VOID CALLBACK DetourFinishHelperProcess(_In_ HWND,
+                                        _In_ HINSTANCE,
+                                        _In_ LPSTR,
+                                        _In_ INT);
+
+//
+//////////////////////////////////////////////////////////////////////////////
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+//////////////////////////////////////////////// Detours Internal Definitions.
+//
+#ifdef __cplusplus
+#ifdef DETOURS_INTERNAL
+
+#define NOTHROW
+// #define NOTHROW (nothrow)
+
+//////////////////////////////////////////////////////////////////////////////
+//
+#if (_MSC_VER < 1299)
+#include <imagehlp.h>
+typedef IMAGEHLP_MODULE IMAGEHLP_MODULE64;
+typedef PIMAGEHLP_MODULE PIMAGEHLP_MODULE64;
+typedef IMAGEHLP_SYMBOL SYMBOL_INFO;
+typedef PIMAGEHLP_SYMBOL PSYMBOL_INFO;
+
+static inline
+LONG InterlockedCompareExchange(_Inout_ LONG *ptr, _In_ LONG nval, _In_ LONG oval)
+{
+    return (LONG)::InterlockedCompareExchange((PVOID*)ptr, (PVOID)nval, (PVOID)oval);
+}
+#else
+#pragma warning(push)
+#pragma warning(disable:4091) // empty typedef
+#include <dbghelp.h>
+#pragma warning(pop)
+#endif
+
+#ifdef IMAGEAPI // defined by DBGHELP.H
+typedef LPAPI_VERSION (NTAPI *PF_ImagehlpApiVersionEx)(_In_ LPAPI_VERSION AppVersion);
+
+typedef BOOL (NTAPI *PF_SymInitialize)(_In_ HANDLE hProcess,
+                                       _In_opt_ LPCSTR UserSearchPath,
+                                       _In_ BOOL fInvadeProcess);
+typedef DWORD (NTAPI *PF_SymSetOptions)(_In_ DWORD SymOptions);
+typedef DWORD (NTAPI *PF_SymGetOptions)(VOID);
+typedef DWORD64 (NTAPI *PF_SymLoadModule64)(_In_ HANDLE hProcess,
+                                            _In_opt_ HANDLE hFile,
+                                            _In_ LPSTR ImageName,
+                                            _In_opt_ LPSTR ModuleName,
+                                            _In_ DWORD64 BaseOfDll,
+                                            _In_opt_ DWORD SizeOfDll);
+typedef BOOL (NTAPI *PF_SymGetModuleInfo64)(_In_ HANDLE hProcess,
+                                            _In_ DWORD64 qwAddr,
+                                            _Out_ PIMAGEHLP_MODULE64 ModuleInfo);
+typedef BOOL (NTAPI *PF_SymFromName)(_In_ HANDLE hProcess,
+                                     _In_ LPSTR Name,
+                                     _Out_ PSYMBOL_INFO Symbol);
+
+typedef struct _DETOUR_SYM_INFO
+{
+    HANDLE                  hProcess;
+    HMODULE                 hDbgHelp;
+    PF_ImagehlpApiVersionEx pfImagehlpApiVersionEx;
+    PF_SymInitialize        pfSymInitialize;
+    PF_SymSetOptions        pfSymSetOptions;
+    PF_SymGetOptions        pfSymGetOptions;
+    PF_SymLoadModule64      pfSymLoadModule64;
+    PF_SymGetModuleInfo64   pfSymGetModuleInfo64;
+    PF_SymFromName          pfSymFromName;
+} DETOUR_SYM_INFO, *PDETOUR_SYM_INFO;
+
+PDETOUR_SYM_INFO DetourLoadImageHlp(VOID);
+
+#endif // IMAGEAPI
+
+#if defined(_INC_STDIO) && !defined(_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS)
+#error detours.h must be included before stdio.h (or at least define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS earlier)
+#endif
+#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1
+
+#ifndef DETOUR_TRACE
+#if DETOUR_DEBUG
+#define DETOUR_TRACE(x) printf x
+#define DETOUR_BREAK()  __debugbreak()
+#include <stdio.h>
+#include <limits.h>
+#else
+#define DETOUR_TRACE(x)
+#define DETOUR_BREAK()
+#endif
+#endif
+
+#if 1 || defined(DETOURS_IA64)
+
+//
+// IA64 instructions are 41 bits, 3 per bundle, plus 5 bit bundle template => 128 bits per bundle.
+//
+
+#define DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE (3)
+
+#define DETOUR_IA64_TEMPLATE_OFFSET (0)
+#define DETOUR_IA64_TEMPLATE_SIZE   (5)
+
+#define DETOUR_IA64_INSTRUCTION_SIZE (41)
+#define DETOUR_IA64_INSTRUCTION0_OFFSET (DETOUR_IA64_TEMPLATE_SIZE)
+#define DETOUR_IA64_INSTRUCTION1_OFFSET (DETOUR_IA64_TEMPLATE_SIZE + DETOUR_IA64_INSTRUCTION_SIZE)
+#define DETOUR_IA64_INSTRUCTION2_OFFSET (DETOUR_IA64_TEMPLATE_SIZE + DETOUR_IA64_INSTRUCTION_SIZE + DETOUR_IA64_INSTRUCTION_SIZE)
+
+C_ASSERT(DETOUR_IA64_TEMPLATE_SIZE + DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE * DETOUR_IA64_INSTRUCTION_SIZE == 128);
+
+__declspec(align(16)) struct DETOUR_IA64_BUNDLE
+{
+  public:
+    union
+    {
+        BYTE    data[16];
+        UINT64  wide[2];
+    };
+
+    enum {
+        A_UNIT  = 1u,
+        I_UNIT  = 2u,
+        M_UNIT  = 3u,
+        B_UNIT  = 4u,
+        F_UNIT  = 5u,
+        L_UNIT  = 6u,
+        X_UNIT  = 7u,
+    };
+    struct DETOUR_IA64_METADATA
+    {
+        ULONG       nTemplate       : 8;    // Instruction template.
+        ULONG       nUnit0          : 4;    // Unit for slot 0
+        ULONG       nUnit1          : 4;    // Unit for slot 1
+        ULONG       nUnit2          : 4;    // Unit for slot 2
+    };
+
+  protected:
+    static const DETOUR_IA64_METADATA s_rceCopyTable[33];
+
+    UINT RelocateBundle(_Inout_ DETOUR_IA64_BUNDLE* pDst, _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const;
+
+    bool RelocateInstruction(_Inout_ DETOUR_IA64_BUNDLE* pDst,
+                             _In_ BYTE slot,
+                             _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const;
+
+    // 120 112 104 96 88 80 72 64 56 48 40 32 24 16  8  0
+    //  f.  e.  d. c. b. a. 9. 8. 7. 6. 5. 4. 3. 2. 1. 0.
+
+    //                                      00
+    // f.e. d.c. b.a. 9.8. 7.6. 5.4. 3.2. 1.0.
+    // 0000 0000 0000 0000 0000 0000 0000 001f : Template [4..0]
+    // 0000 0000 0000 0000 0000 03ff ffff ffe0 : Zero [ 41..  5]
+    // 0000 0000 0000 0000 0000 3c00 0000 0000 : Zero [ 45.. 42]
+    // 0000 0000 0007 ffff ffff c000 0000 0000 : One  [ 82.. 46]
+    // 0000 0000 0078 0000 0000 0000 0000 0000 : One  [ 86.. 83]
+    // 0fff ffff ff80 0000 0000 0000 0000 0000 : Two  [123.. 87]
+    // f000 0000 0000 0000 0000 0000 0000 0000 : Two  [127..124]
+    BYTE    GetTemplate() const;
+    // Get 4 bit opcodes.
+    BYTE    GetInst0() const;
+    BYTE    GetInst1() const;
+    BYTE    GetInst2() const;
+    BYTE    GetUnit(BYTE slot) const;
+    BYTE    GetUnit0() const;
+    BYTE    GetUnit1() const;
+    BYTE    GetUnit2() const;
+    // Get 37 bit data.
+    UINT64  GetData0() const;
+    UINT64  GetData1() const;
+    UINT64  GetData2() const;
+
+    // Get/set the full 41 bit instructions.
+    UINT64  GetInstruction(BYTE slot) const;
+    UINT64  GetInstruction0() const;
+    UINT64  GetInstruction1() const;
+    UINT64  GetInstruction2() const;
+    void    SetInstruction(BYTE slot, UINT64 instruction);
+    void    SetInstruction0(UINT64 instruction);
+    void    SetInstruction1(UINT64 instruction);
+    void    SetInstruction2(UINT64 instruction);
+
+    // Get/set bitfields.
+    static UINT64 GetBits(UINT64 Value, UINT64 Offset, UINT64 Count);
+    static UINT64 SetBits(UINT64 Value, UINT64 Offset, UINT64 Count, UINT64 Field);
+
+    // Get specific read-only fields.
+    static UINT64 GetOpcode(UINT64 instruction); // 4bit opcode
+    static UINT64 GetX(UINT64 instruction); // 1bit opcode extension
+    static UINT64 GetX3(UINT64 instruction); // 3bit opcode extension
+    static UINT64 GetX6(UINT64 instruction); // 6bit opcode extension
+
+    // Get/set specific fields.
+    static UINT64 GetImm7a(UINT64 instruction);
+    static UINT64 SetImm7a(UINT64 instruction, UINT64 imm7a);
+    static UINT64 GetImm13c(UINT64 instruction);
+    static UINT64 SetImm13c(UINT64 instruction, UINT64 imm13c);
+    static UINT64 GetSignBit(UINT64 instruction);
+    static UINT64 SetSignBit(UINT64 instruction, UINT64 signBit);
+    static UINT64 GetImm20a(UINT64 instruction);
+    static UINT64 SetImm20a(UINT64 instruction, UINT64 imm20a);
+    static UINT64 GetImm20b(UINT64 instruction);
+    static UINT64 SetImm20b(UINT64 instruction, UINT64 imm20b);
+
+    static UINT64 SignExtend(UINT64 Value, UINT64 Offset);
+
+    BOOL    IsMovlGp() const;
+
+    VOID    SetInst(BYTE Slot, BYTE nInst);
+    VOID    SetInst0(BYTE nInst);
+    VOID    SetInst1(BYTE nInst);
+    VOID    SetInst2(BYTE nInst);
+    VOID    SetData(BYTE Slot, UINT64 nData);
+    VOID    SetData0(UINT64 nData);
+    VOID    SetData1(UINT64 nData);
+    VOID    SetData2(UINT64 nData);
+    BOOL    SetNop(BYTE Slot);
+    BOOL    SetNop0();
+    BOOL    SetNop1();
+    BOOL    SetNop2();
+
+  public:
+    BOOL    IsBrl() const;
+    VOID    SetBrl();
+    VOID    SetBrl(UINT64 target);
+    UINT64  GetBrlTarget() const;
+    VOID    SetBrlTarget(UINT64 target);
+    VOID    SetBrlImm(UINT64 imm);
+    UINT64  GetBrlImm() const;
+
+    UINT64  GetMovlGp() const;
+    VOID    SetMovlGp(UINT64 gp);
+
+    VOID    SetStop();
+
+    UINT    Copy(_Out_ DETOUR_IA64_BUNDLE *pDst, _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra = NULL) const;
+};
+#endif // DETOURS_IA64
+
+#ifdef DETOURS_ARM
+
+#define DETOURS_PFUNC_TO_PBYTE(p)  ((PBYTE)(((ULONG_PTR)(p)) & ~(ULONG_PTR)1))
+#define DETOURS_PBYTE_TO_PFUNC(p)  ((PBYTE)(((ULONG_PTR)(p)) | (ULONG_PTR)1))
+
+#endif // DETOURS_ARM
+
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define DETOUR_OFFLINE_LIBRARY(x)                                       \
+PVOID WINAPI DetourCopyInstruction##x(_In_opt_ PVOID pDst,              \
+                                      _Inout_opt_ PVOID *ppDstPool,     \
+                                      _In_ PVOID pSrc,                  \
+                                      _Out_opt_ PVOID *ppTarget,        \
+                                      _Out_opt_ LONG *plExtra);         \
+                                                                        \
+BOOL WINAPI DetourSetCodeModule##x(_In_ HMODULE hModule,                \
+                                   _In_ BOOL fLimitReferencesToModule); \
+
+DETOUR_OFFLINE_LIBRARY(X86)
+DETOUR_OFFLINE_LIBRARY(X64)
+DETOUR_OFFLINE_LIBRARY(ARM)
+DETOUR_OFFLINE_LIBRARY(ARM64)
+DETOUR_OFFLINE_LIBRARY(IA64)
+
+#undef DETOUR_OFFLINE_LIBRARY
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Helpers for manipulating page protection.
+//
+
+_Success_(return != FALSE)
+BOOL WINAPI DetourVirtualProtectSameExecuteEx(_In_  HANDLE hProcess,
+                                              _In_  PVOID pAddress,
+                                              _In_  SIZE_T nSize,
+                                              _In_  DWORD dwNewProtect,
+                                              _Out_ PDWORD pdwOldProtect);
+
+_Success_(return != FALSE)
+BOOL WINAPI DetourVirtualProtectSameExecute(_In_  PVOID pAddress,
+                                            _In_  SIZE_T nSize,
+                                            _In_  DWORD dwNewProtect,
+                                            _Out_ PDWORD pdwOldProtect);
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+//////////////////////////////////////////////////////////////////////////////
+
+#define MM_ALLOCATION_GRANULARITY 0x10000
+
+//////////////////////////////////////////////////////////////////////////////
+
+#endif // DETOURS_INTERNAL
+#endif // __cplusplus
+
+#endif // _DETOURS_H_
+//
+////////////////////////////////////////////////////////////////  End of File.
diff --git a/src/detours/detver.h b/src/detours/detver.h
new file mode 100644
index 0000000..f0aae9b
--- /dev/null
+++ b/src/detours/detver.h
@@ -0,0 +1,27 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Common version parameters.
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+
+#define _USING_V110_SDK71_ 1
+#include "winver.h"
+#if 0
+#include <windows.h>
+#include <detours.h>
+#else
+#ifndef DETOURS_STRINGIFY
+#define DETOURS_STRINGIFY(x)    DETOURS_STRINGIFY_(x)
+#define DETOURS_STRINGIFY_(x)    #x
+#endif
+
+#define VER_FILEFLAGSMASK   0x3fL
+#define VER_FILEFLAGS       0x0L
+#define VER_FILEOS          0x00040004L
+#define VER_FILETYPE        0x00000002L
+#define VER_FILESUBTYPE     0x00000000L
+#endif
+#define VER_DETOURS_BITS    DETOUR_STRINGIFY(DETOURS_BITS)
diff --git a/src/detours/disasm.cpp b/src/detours/disasm.cpp
new file mode 100644
index 0000000..0ce5993
--- /dev/null
+++ b/src/detours/disasm.cpp
@@ -0,0 +1,4344 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Detours Disassembler (disasm.cpp of detours.lib)
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+
+#if _MSC_VER >= 1900
+#pragma warning(push)
+#pragma warning(disable:4091) // empty typedef
+#endif
+
+#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1
+#include <windows.h>
+#include <limits.h>
+
+// #define DETOUR_DEBUG 1
+#define DETOURS_INTERNAL
+
+#include "detours.h"
+
+#if DETOURS_VERSION != 0x4c0c1   // 0xMAJORcMINORcPATCH
+#error detours.h version mismatch
+#endif
+
+#if _MSC_VER >= 1900
+#pragma warning(pop)
+#endif
+
+#undef ASSERT
+#define ASSERT(x)
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Special macros to handle the case when we are building disassembler for
+//  offline processing.
+//
+
+
+#if defined(DETOURS_X86_OFFLINE_LIBRARY) \
+ || defined(DETOURS_X64_OFFLINE_LIBRARY) \
+ || defined(DETOURS_ARM_OFFLINE_LIBRARY) \
+ || defined(DETOURS_ARM64_OFFLINE_LIBRARY) \
+ || defined(DETOURS_IA64_OFFLINE_LIBRARY)
+
+#undef DETOURS_X64
+#undef DETOURS_X86
+#undef DETOURS_IA64
+#undef DETOURS_ARM
+#undef DETOURS_ARM64
+
+#if defined(DETOURS_X86_OFFLINE_LIBRARY)
+
+#define DetourCopyInstruction   DetourCopyInstructionX86
+#define DetourSetCodeModule     DetourSetCodeModuleX86
+#define CDetourDis              CDetourDisX86
+#define DETOURS_X86
+
+#elif defined(DETOURS_X64_OFFLINE_LIBRARY)
+
+#if !defined(DETOURS_64BIT)
+// Fix this as/if bugs are discovered.
+//#error X64 disassembler can only build for 64-bit.
+#endif
+
+#define DetourCopyInstruction   DetourCopyInstructionX64
+#define DetourSetCodeModule     DetourSetCodeModuleX64
+#define CDetourDis              CDetourDisX64
+#define DETOURS_X64
+
+#elif defined(DETOURS_ARM_OFFLINE_LIBRARY)
+
+#define DetourCopyInstruction   DetourCopyInstructionARM
+#define DetourSetCodeModule     DetourSetCodeModuleARM
+#define CDetourDis              CDetourDisARM
+#define DETOURS_ARM
+
+#elif defined(DETOURS_ARM64_OFFLINE_LIBRARY)
+
+#define DetourCopyInstruction   DetourCopyInstructionARM64
+#define DetourSetCodeModule     DetourSetCodeModuleARM64
+#define CDetourDis              CDetourDisARM64
+#define DETOURS_ARM64
+
+#elif defined(DETOURS_IA64_OFFLINE_LIBRARY)
+
+#define DetourCopyInstruction   DetourCopyInstructionIA64
+#define DetourSetCodeModule     DetourSetCodeModuleIA64
+#define DETOURS_IA64
+
+#else
+
+#error
+
+#endif
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Function:
+//      DetourCopyInstruction(PVOID pDst,
+//                            PVOID *ppDstPool
+//                            PVOID pSrc,
+//                            PVOID *ppTarget,
+//                            LONG *plExtra)
+//  Purpose:
+//      Copy a single instruction from pSrc to pDst.
+//
+//  Arguments:
+//      pDst:
+//          Destination address for the instruction.  May be NULL in which
+//          case DetourCopyInstruction is used to measure an instruction.
+//          If not NULL then the source instruction is copied to the
+//          destination instruction and any relative arguments are adjusted.
+//      ppDstPool:
+//          Destination address for the end of the constant pool.  The
+//          constant pool works backwards toward pDst.  All memory between
+//          pDst and *ppDstPool must be available for use by this function.
+//          ppDstPool may be NULL if pDst is NULL.
+//      pSrc:
+//          Source address of the instruction.
+//      ppTarget:
+//          Out parameter for any target instruction address pointed to by
+//          the instruction.  For example, a branch or a jump insruction has
+//          a target, but a load or store instruction doesn't.  A target is
+//          another instruction that may be executed as a result of this
+//          instruction.  ppTarget may be NULL.
+//      plExtra:
+//          Out parameter for the number of extra bytes needed by the
+//          instruction to reach the target.  For example, lExtra = 3 if the
+//          instruction had an 8-bit relative offset, but needs a 32-bit
+//          relative offset.
+//
+//  Returns:
+//      Returns the address of the next instruction (following in the source)
+//      instruction.  By subtracting pSrc from the return value, the caller
+//      can determinte the size of the instruction copied.
+//
+//  Comments:
+//      By following the pTarget, the caller can follow alternate
+//      instruction streams.  However, it is not always possible to determine
+//      the target based on static analysis.  For example, the destination of
+//      a jump relative to a register cannot be determined from just the
+//      instruction stream.  The output value, pTarget, can have any of the
+//      following outputs:
+//          DETOUR_INSTRUCTION_TARGET_NONE:
+//              The instruction has no targets.
+//          DETOUR_INSTRUCTION_TARGET_DYNAMIC:
+//              The instruction has a non-deterministic (dynamic) target.
+//              (i.e. the jump is to an address held in a register.)
+//          Address:   The instruction has the specified target.
+//
+//      When copying instructions, DetourCopyInstruction insures that any
+//      targets remain constant.  It does so by adjusting any IP relative
+//      offsets.
+//
+
+#pragma data_seg(".detourd")
+#pragma const_seg(".detourc")
+
+//////////////////////////////////////////////////// X86 and X64 Disassembler.
+//
+//  Includes full support for all x86 chips prior to the Pentium III, and some newer stuff.
+//
+#if defined(DETOURS_X64) || defined(DETOURS_X86)
+
+class CDetourDis
+{
+  public:
+    CDetourDis(_Out_opt_ PBYTE *ppbTarget,
+               _Out_opt_ LONG *plExtra);
+
+    PBYTE   CopyInstruction(PBYTE pbDst, PBYTE pbSrc);
+    static BOOL SanityCheckSystem();
+    static BOOL SetCodeModule(PBYTE pbBeg, PBYTE pbEnd, BOOL fLimitReferencesToModule);
+
+  public:
+    struct COPYENTRY;
+    typedef const COPYENTRY * REFCOPYENTRY;
+
+    typedef PBYTE (CDetourDis::* COPYFUNC)(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+
+    // nFlagBits flags.
+    enum {
+        DYNAMIC     = 0x1u,
+        ADDRESS     = 0x2u,
+        NOENLARGE   = 0x4u,
+        RAX         = 0x8u,
+    };
+
+    // ModR/M Flags
+    enum {
+        SIB         = 0x10u,
+        RIP         = 0x20u,
+        NOTSIB      = 0x0fu,
+    };
+
+    struct COPYENTRY
+    {
+        // Many of these fields are often ignored. See ENTRY_DataIgnored.
+        ULONG       nOpcode         : 8;    // Opcode (ignored)
+        ULONG       nFixedSize      : 4;    // Fixed size of opcode
+        ULONG       nFixedSize16    : 4;    // Fixed size when 16 bit operand
+        ULONG       nModOffset      : 4;    // Offset to mod/rm byte (0=none)
+        ULONG       nRelOffset      : 4;    // Offset to relative target.
+        ULONG       nFlagBits       : 4;    // Flags for DYNAMIC, etc.
+        COPYFUNC    pfCopy;                 // Function pointer.
+    };
+
+  protected:
+// These macros define common uses of nFixedSize, nFixedSize16, nModOffset, nRelOffset, nFlagBits, pfCopy.
+#define ENTRY_DataIgnored           0, 0, 0, 0, 0,
+#define ENTRY_CopyBytes1            1, 1, 0, 0, 0, &CDetourDis::CopyBytes
+#ifdef DETOURS_X64
+#define ENTRY_CopyBytes1Address     9, 5, 0, 0, ADDRESS, &CDetourDis::CopyBytes
+#else
+#define ENTRY_CopyBytes1Address     5, 3, 0, 0, ADDRESS, &CDetourDis::CopyBytes
+#endif
+#define ENTRY_CopyBytes1Dynamic     1, 1, 0, 0, DYNAMIC, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes2            2, 2, 0, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes2Jump        ENTRY_DataIgnored &CDetourDis::CopyBytesJump
+#define ENTRY_CopyBytes2CantJump    2, 2, 0, 1, NOENLARGE, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes2Dynamic     2, 2, 0, 0, DYNAMIC, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes3            3, 3, 0, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes3Dynamic     3, 3, 0, 0, DYNAMIC, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes3Or5         5, 3, 0, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes3Or5Dynamic  5, 3, 0, 0, DYNAMIC, &CDetourDis::CopyBytes // x86 only
+#ifdef DETOURS_X64
+#define ENTRY_CopyBytes3Or5Rax      5, 3, 0, 0, RAX, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes3Or5Target   5, 5, 0, 1, 0, &CDetourDis::CopyBytes
+#else
+#define ENTRY_CopyBytes3Or5Rax      5, 3, 0, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes3Or5Target   5, 3, 0, 1, 0, &CDetourDis::CopyBytes
+#endif
+#define ENTRY_CopyBytes4            4, 4, 0, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes5            5, 5, 0, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes5Or7Dynamic  7, 5, 0, 0, DYNAMIC, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes7            7, 7, 0, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes2Mod         2, 2, 1, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes2ModDynamic  2, 2, 1, 0, DYNAMIC, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes2Mod1        3, 3, 1, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes2ModOperand  6, 4, 1, 0, 0, &CDetourDis::CopyBytes
+#define ENTRY_CopyBytes3Mod         3, 3, 2, 0, 0, &CDetourDis::CopyBytes // SSE3 0F 38 opcode modrm
+#define ENTRY_CopyBytes3Mod1        4, 4, 2, 0, 0, &CDetourDis::CopyBytes // SSE3 0F 3A opcode modrm .. imm8
+#define ENTRY_CopyBytesPrefix       ENTRY_DataIgnored &CDetourDis::CopyBytesPrefix
+#define ENTRY_CopyBytesSegment      ENTRY_DataIgnored &CDetourDis::CopyBytesSegment
+#define ENTRY_CopyBytesRax          ENTRY_DataIgnored &CDetourDis::CopyBytesRax
+#define ENTRY_CopyF2                ENTRY_DataIgnored &CDetourDis::CopyF2
+#define ENTRY_CopyF3                ENTRY_DataIgnored &CDetourDis::CopyF3   // 32bit x86 only
+#define ENTRY_Copy0F                ENTRY_DataIgnored &CDetourDis::Copy0F
+#define ENTRY_Copy0F78              ENTRY_DataIgnored &CDetourDis::Copy0F78
+#define ENTRY_Copy0F00              ENTRY_DataIgnored &CDetourDis::Copy0F00 // 32bit x86 only
+#define ENTRY_Copy0FB8              ENTRY_DataIgnored &CDetourDis::Copy0FB8 // 32bit x86 only
+#define ENTRY_Copy66                ENTRY_DataIgnored &CDetourDis::Copy66
+#define ENTRY_Copy67                ENTRY_DataIgnored &CDetourDis::Copy67
+#define ENTRY_CopyF6                ENTRY_DataIgnored &CDetourDis::CopyF6
+#define ENTRY_CopyF7                ENTRY_DataIgnored &CDetourDis::CopyF7
+#define ENTRY_CopyFF                ENTRY_DataIgnored &CDetourDis::CopyFF
+#define ENTRY_CopyVex2              ENTRY_DataIgnored &CDetourDis::CopyVex2
+#define ENTRY_CopyVex3              ENTRY_DataIgnored &CDetourDis::CopyVex3
+#define ENTRY_CopyEvex              ENTRY_DataIgnored &CDetourDis::CopyEvex // 62, 3 byte payload, then normal with implied prefixes like vex
+#define ENTRY_CopyXop               ENTRY_DataIgnored &CDetourDis::CopyXop   // 0x8F ... POP /0 or AMD XOP
+#define ENTRY_CopyBytesXop          5, 5, 4, 0, 0, &CDetourDis::CopyBytes // 0x8F xop1 xop2 opcode modrm
+#define ENTRY_CopyBytesXop1         6, 6, 4, 0, 0, &CDetourDis::CopyBytes // 0x8F xop1 xop2 opcode modrm ... imm8
+#define ENTRY_CopyBytesXop4         9, 9, 4, 0, 0, &CDetourDis::CopyBytes // 0x8F xop1 xop2 opcode modrm ... imm32
+#define ENTRY_Invalid               ENTRY_DataIgnored &CDetourDis::Invalid
+#define ENTRY_End                   ENTRY_DataIgnored NULL
+
+    PBYTE CopyBytes(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyBytesPrefix(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyBytesSegment(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyBytesRax(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyBytesJump(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+
+    PBYTE Invalid(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+
+    PBYTE AdjustTarget(PBYTE pbDst, PBYTE pbSrc, UINT cbOp,
+                       UINT cbTargetOffset, UINT cbTargetSize);
+
+  protected:
+    PBYTE Copy0F(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE Copy0F00(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // x86 only sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6/dynamic invalid/7
+    PBYTE Copy0F78(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // vmread, 66/extrq/ib/ib, F2/insertq/ib/ib
+    PBYTE Copy0FB8(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // jmpe or F3/popcnt
+    PBYTE Copy66(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE Copy67(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyF2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyF3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // x86 only
+    PBYTE CopyF6(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyF7(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyVex2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyVex3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p);
+    PBYTE CopyEvex(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+    PBYTE CopyXop(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
+
+  protected:
+    static const COPYENTRY  s_rceCopyTable[257];
+    static const COPYENTRY  s_rceCopyTable0F[257];
+    static const BYTE       s_rbModRm[256];
+    static PBYTE            s_pbModuleBeg;
+    static PBYTE            s_pbModuleEnd;
+    static BOOL             s_fLimitReferencesToModule;
+
+  protected:
+    BOOL                m_bOperandOverride;
+    BOOL                m_bAddressOverride;
+    BOOL                m_bRaxOverride; // AMD64 only
+    BOOL                m_bVex;
+    BOOL                m_bEvex;
+    BOOL                m_bF2;
+    BOOL                m_bF3; // x86 only
+    BYTE                m_nSegmentOverride;
+
+    PBYTE *             m_ppbTarget;
+    LONG *              m_plExtra;
+
+    LONG                m_lScratchExtra;
+    PBYTE               m_pbScratchTarget;
+    BYTE                m_rbScratchDst[64]; // matches or exceeds rbCode
+};
+
+PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst,
+                                   _Inout_opt_ PVOID *ppDstPool,
+                                   _In_ PVOID pSrc,
+                                   _Out_opt_ PVOID *ppTarget,
+                                   _Out_opt_ LONG *plExtra)
+{
+    UNREFERENCED_PARAMETER(ppDstPool);  // x86 & x64 don't use a constant pool.
+
+    CDetourDis oDetourDisasm((PBYTE*)ppTarget, plExtra);
+    return oDetourDisasm.CopyInstruction((PBYTE)pDst, (PBYTE)pSrc);
+}
+
+/////////////////////////////////////////////////////////// Disassembler Code.
+//
+CDetourDis::CDetourDis(_Out_opt_ PBYTE *ppbTarget, _Out_opt_ LONG *plExtra)
+{
+    m_bOperandOverride = FALSE;
+    m_bAddressOverride = FALSE;
+    m_bRaxOverride = FALSE;
+    m_bF2 = FALSE;
+    m_bF3 = FALSE;
+    m_bVex = FALSE;
+    m_bEvex = FALSE;
+
+    m_ppbTarget = ppbTarget ? ppbTarget : &m_pbScratchTarget;
+    m_plExtra = plExtra ? plExtra : &m_lScratchExtra;
+
+    *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_NONE;
+    *m_plExtra = 0;
+}
+
+PBYTE CDetourDis::CopyInstruction(PBYTE pbDst, PBYTE pbSrc)
+{
+    // Configure scratch areas if real areas are not available.
+    if (NULL == pbDst) {
+        pbDst = m_rbScratchDst;
+    }
+    if (NULL == pbSrc) {
+        // We can't copy a non-existent instruction.
+        SetLastError(ERROR_INVALID_DATA);
+        return NULL;
+    }
+
+    // Figure out how big the instruction is, do the appropriate copy,
+    // and figure out what the target of the instruction is if any.
+    //
+    REFCOPYENTRY pEntry = &s_rceCopyTable[pbSrc[0]];
+    return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyBytes(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    UINT nBytesFixed;
+
+    if (m_bVex || m_bEvex)
+    {
+        ASSERT(pEntry->nFlagBits == 0);
+        ASSERT(pEntry->nFixedSize == pEntry->nFixedSize16);
+    }
+
+    UINT const nModOffset = pEntry->nModOffset;
+    UINT const nFlagBits = pEntry->nFlagBits;
+    UINT const nFixedSize = pEntry->nFixedSize;
+    UINT const nFixedSize16 = pEntry->nFixedSize16;
+
+    if (nFlagBits & ADDRESS) {
+        nBytesFixed = m_bAddressOverride ? nFixedSize16 : nFixedSize;
+    }
+#ifdef DETOURS_X64
+    // REX.W trumps 66
+    else if (m_bRaxOverride) {
+        nBytesFixed = nFixedSize + ((nFlagBits & RAX) ? 4 : 0);
+    }
+#endif
+    else {
+        nBytesFixed = m_bOperandOverride ? nFixedSize16 : nFixedSize;
+    }
+
+    UINT nBytes = nBytesFixed;
+    UINT nRelOffset = pEntry->nRelOffset;
+    UINT cbTarget = nBytes - nRelOffset;
+    if (nModOffset > 0) {
+        ASSERT(nRelOffset == 0);
+        BYTE const bModRm = pbSrc[nModOffset];
+        BYTE const bFlags = s_rbModRm[bModRm];
+
+        nBytes += bFlags & NOTSIB;
+
+        if (bFlags & SIB) {
+            BYTE const bSib = pbSrc[nModOffset + 1];
+
+            if ((bSib & 0x07) == 0x05) {
+                if ((bModRm & 0xc0) == 0x00) {
+                    nBytes += 4;
+                }
+                else if ((bModRm & 0xc0) == 0x40) {
+                    nBytes += 1;
+                }
+                else if ((bModRm & 0xc0) == 0x80) {
+                    nBytes += 4;
+                }
+            }
+            cbTarget = nBytes - nRelOffset;
+        }
+#ifdef DETOURS_X64
+        else if (bFlags & RIP) {
+            nRelOffset = nModOffset + 1;
+            cbTarget = 4;
+        }
+#endif
+    }
+    CopyMemory(pbDst, pbSrc, nBytes);
+
+    if (nRelOffset) {
+        *m_ppbTarget = AdjustTarget(pbDst, pbSrc, nBytes, nRelOffset, cbTarget);
+#ifdef DETOURS_X64
+        if (pEntry->nRelOffset == 0) {
+            // This is a data target, not a code target, so we shouldn't return it.
+            *m_ppbTarget = NULL;
+        }
+#endif
+    }
+    if (nFlagBits & NOENLARGE) {
+        *m_plExtra = -*m_plExtra;
+    }
+    if (nFlagBits & DYNAMIC) {
+        *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+    }
+    return pbSrc + nBytes;
+}
+
+PBYTE CDetourDis::CopyBytesPrefix(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    pbDst[0] = pbSrc[0];
+    pEntry = &s_rceCopyTable[pbSrc[1]];
+    return (this->*pEntry->pfCopy)(pEntry, pbDst + 1, pbSrc + 1);
+}
+
+PBYTE CDetourDis::CopyBytesSegment(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+{
+    m_nSegmentOverride = pbSrc[0];
+    return CopyBytesPrefix(0, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyBytesRax(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+{ // AMD64 only
+    if (pbSrc[0] & 0x8) {
+        m_bRaxOverride = TRUE;
+    }
+    return CopyBytesPrefix(0, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyBytesJump(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    (void)pEntry;
+
+    PVOID pvSrcAddr = &pbSrc[1];
+    PVOID pvDstAddr = NULL;
+    LONG_PTR nOldOffset = (LONG_PTR)*(signed char*&)pvSrcAddr;
+    LONG_PTR nNewOffset = 0;
+
+    *m_ppbTarget = pbSrc + 2 + nOldOffset;
+
+    if (pbSrc[0] == 0xeb) {
+        pbDst[0] = 0xe9;
+        pvDstAddr = &pbDst[1];
+        nNewOffset = nOldOffset - ((pbDst - pbSrc) + 3);
+        *(UNALIGNED LONG*&)pvDstAddr = (LONG)nNewOffset;
+
+        *m_plExtra = 3;
+        return pbSrc + 2;
+    }
+
+    ASSERT(pbSrc[0] >= 0x70 && pbSrc[0] <= 0x7f);
+
+    pbDst[0] = 0x0f;
+    pbDst[1] = 0x80 | (pbSrc[0] & 0xf);
+    pvDstAddr = &pbDst[2];
+    nNewOffset = nOldOffset - ((pbDst - pbSrc) + 4);
+    *(UNALIGNED LONG*&)pvDstAddr = (LONG)nNewOffset;
+
+    *m_plExtra = 4;
+    return pbSrc + 2;
+}
+
+PBYTE CDetourDis::AdjustTarget(PBYTE pbDst, PBYTE pbSrc, UINT cbOp,
+                               UINT cbTargetOffset, UINT cbTargetSize)
+{
+    PBYTE pbTarget = NULL;
+#if 1 // fault injection to test test code
+#if defined(DETOURS_X64)
+    typedef LONGLONG T;
+#else
+    typedef LONG T;
+#endif
+    T nOldOffset;
+    T nNewOffset;
+    PVOID pvTargetAddr = &pbDst[cbTargetOffset];
+
+    switch (cbTargetSize) {
+      case 1:
+        nOldOffset = *(signed char*&)pvTargetAddr;
+        break;
+      case 2:
+        nOldOffset = *(UNALIGNED SHORT*&)pvTargetAddr;
+        break;
+      case 4:
+        nOldOffset = *(UNALIGNED LONG*&)pvTargetAddr;
+        break;
+#if defined(DETOURS_X64)
+      case 8:
+        nOldOffset = *(UNALIGNED LONGLONG*&)pvTargetAddr;
+        break;
+#endif
+      default:
+        ASSERT(!"cbTargetSize is invalid.");
+        nOldOffset = 0;
+        break;
+    }
+
+    pbTarget = pbSrc + cbOp + nOldOffset;
+    nNewOffset = nOldOffset - (T)(pbDst - pbSrc);
+
+    switch (cbTargetSize) {
+      case 1:
+        *(CHAR*&)pvTargetAddr = (CHAR)nNewOffset;
+        if (nNewOffset < SCHAR_MIN || nNewOffset > SCHAR_MAX) {
+            *m_plExtra = sizeof(ULONG) - 1;
+        }
+        break;
+      case 2:
+        *(UNALIGNED SHORT*&)pvTargetAddr = (SHORT)nNewOffset;
+        if (nNewOffset < SHRT_MIN || nNewOffset > SHRT_MAX) {
+            *m_plExtra = sizeof(ULONG) - 2;
+        }
+        break;
+      case 4:
+        *(UNALIGNED LONG*&)pvTargetAddr = (LONG)nNewOffset;
+        if (nNewOffset < LONG_MIN || nNewOffset > LONG_MAX) {
+            *m_plExtra = sizeof(ULONG) - 4;
+        }
+        break;
+#if defined(DETOURS_X64)
+      case 8:
+        *(UNALIGNED LONGLONG*&)pvTargetAddr = nNewOffset;
+        break;
+#endif
+    }
+#ifdef DETOURS_X64
+    // When we are only computing size, source and dest can be
+    // far apart, distance not encodable in 32bits. Ok.
+    // At least still check the lower 32bits.
+
+    if (pbDst >= m_rbScratchDst && pbDst < (sizeof(m_rbScratchDst) + m_rbScratchDst)) {
+        ASSERT((((size_t)pbDst + cbOp + nNewOffset) & 0xFFFFFFFF) == (((size_t)pbTarget) & 0xFFFFFFFF));
+    }
+    else
+#endif
+    {
+        ASSERT(pbDst + cbOp + nNewOffset == pbTarget);
+    }
+#endif
+    return pbTarget;
+}
+
+PBYTE CDetourDis::Invalid(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    (void)pbDst;
+    (void)pEntry;
+    ASSERT(!"Invalid Instruction");
+    return pbSrc + 1;
+}
+
+////////////////////////////////////////////////////// Individual Bytes Codes.
+//
+PBYTE CDetourDis::Copy0F(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    pbDst[0] = pbSrc[0];
+    pEntry = &s_rceCopyTable0F[pbSrc[1]];
+    return (this->*pEntry->pfCopy)(pEntry, pbDst + 1, pbSrc + 1);
+}
+
+PBYTE CDetourDis::Copy0F78(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+{
+    // vmread, 66/extrq, F2/insertq
+
+    static const COPYENTRY vmread = { 0x78, ENTRY_CopyBytes2Mod };
+    static const COPYENTRY extrq_insertq = { 0x78, ENTRY_CopyBytes4 };
+
+    ASSERT(!(m_bF2 && m_bOperandOverride));
+
+    // For insertq and presumably despite documentation extrq, mode must be 11, not checked.
+    // insertq/extrq/78 are followed by two immediate bytes, and given mode == 11, mod/rm byte is always one byte,
+    // and the 0x78 makes 4 bytes (not counting the 66/F2/F which are accounted for elsewhere)
+
+    REFCOPYENTRY const pEntry = ((m_bF2 || m_bOperandOverride) ? &extrq_insertq : &vmread);
+
+    return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::Copy0F00(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+{
+    // jmpe is 32bit x86 only
+    // Notice that the sizes are the same either way, but jmpe is marked as "dynamic".
+
+    static const COPYENTRY other = { 0xB8, ENTRY_CopyBytes2Mod }; // sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6 invalid/7
+    static const COPYENTRY jmpe = { 0xB8, ENTRY_CopyBytes2ModDynamic }; // jmpe/6 x86-on-IA64 syscalls
+
+    REFCOPYENTRY const pEntry = (((6 << 3) == ((7 << 3) & pbSrc[1])) ?  &jmpe : &other);
+    return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::Copy0FB8(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+{
+    // jmpe is 32bit x86 only
+
+    static const COPYENTRY popcnt = { 0xB8, ENTRY_CopyBytes2Mod };
+    static const COPYENTRY jmpe = { 0xB8, ENTRY_CopyBytes3Or5Dynamic }; // jmpe x86-on-IA64 syscalls
+    REFCOPYENTRY const pEntry = m_bF3 ? &popcnt : &jmpe;
+    return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::Copy66(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{   // Operand-size override prefix
+    m_bOperandOverride = TRUE;
+    return CopyBytesPrefix(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::Copy67(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{   // Address size override prefix
+    m_bAddressOverride = TRUE;
+    return CopyBytesPrefix(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyF2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    m_bF2 = TRUE;
+    return CopyBytesPrefix(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyF3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{ // x86 only
+    m_bF3 = TRUE;
+    return CopyBytesPrefix(pEntry, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyF6(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    (void)pEntry;
+
+    // TEST BYTE /0
+    if (0x00 == (0x38 & pbSrc[1])) {    // reg(bits 543) of ModR/M == 0
+        static const COPYENTRY ce = { 0xf6, ENTRY_CopyBytes2Mod1 };
+        return (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
+    }
+    // DIV /6
+    // IDIV /7
+    // IMUL /5
+    // MUL /4
+    // NEG /3
+    // NOT /2
+
+    static const COPYENTRY ce = { 0xf6, ENTRY_CopyBytes2Mod };
+    return (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyF7(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{
+    (void)pEntry;
+
+    // TEST WORD /0
+    if (0x00 == (0x38 & pbSrc[1])) {    // reg(bits 543) of ModR/M == 0
+        static const COPYENTRY ce = { 0xf7, ENTRY_CopyBytes2ModOperand };
+        return (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
+    }
+
+    // DIV /6
+    // IDIV /7
+    // IMUL /5
+    // MUL /4
+    // NEG /3
+    // NOT /2
+    static const COPYENTRY ce = { 0xf7, ENTRY_CopyBytes2Mod };
+    return (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
+}
+
+PBYTE CDetourDis::CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
+{   // INC /0
+    // DEC /1
+    // CALL /2
+    // CALL /3
+    // JMP /4
+    // JMP /5
+    // PUSH /6
+    // invalid/7
+    (void)pEntry;
+
+    static const COPYENTRY ce = { 0xff, ENTRY_CopyBytes2Mod };
+    PBYTE pbOut = (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
+
+    BYTE const b1 = pbSrc[1];
+
+    if (0x15 == b1 || 0x25 == b1) {         // CALL [], JMP []
+#ifdef DETOURS_X64
+        // All segments but FS and GS are equivalent.
+        if (m_nSegmentOverride != 0x64 && m_nSegmentOverride != 0x65)
+#else
+        if (m_nSegmentOverride == 0 || m_nSegmentOverride == 0x2E)
+#endif
+        {
+#ifdef DETOURS_X64
+            INT32 offset = *(UNALIGNED INT32*)&pbSrc[2];
+            PBYTE *ppbTarget = (PBYTE *)(pbSrc + 6 + offset);
+#else
+            PBYTE *ppbTarget = (PBYTE *)(SIZE_T)*(UNALIGNED ULONG*)&pbSrc[2];
+#endif
+            if (s_fLimitReferencesToModule &&
+                (ppbTarget < (PVOID)s_pbModuleBeg || ppbTarget >= (PVOID)s_pbModuleEnd)) {
+
+                *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+            }
+            else {
+                // This can access violate on random bytes. Use DetourSetCodeModule.
+                *m_ppbTarget = *ppbTarget;
+            }
+        }
+        else {
+            *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+        }
+    }
+    else if (0x10 == (0x30 & b1) || // CALL /2 or /3  --> reg(bits 543) of ModR/M == 010 or 011
+             0x20 == (0x30 & b1)) { // JMP /4 or /5 --> reg(bits 543) of ModR/M == 100 or 101
+        *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+    }
+    return pbOut;
+}
+
+PBYTE CDetourDis::CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p)
+// m is first instead of last in the hopes of pbDst/pbSrc being
+// passed along efficiently in the registers they were already in.
+{
+    static const COPYENTRY ceF38 = { 0x38, ENTRY_CopyBytes2Mod };
+    static const COPYENTRY ceF3A = { 0x3A, ENTRY_CopyBytes2Mod1 };
+    static const COPYENTRY ceInvalid = { 0xC4, ENTRY_Invalid };
+
+    switch (p & 3) {
+    case 0: break;
+    case 1: m_bOperandOverride = TRUE; break;
+    case 2: m_bF3 = TRUE; break;
+    case 3: m_bF2 = TRUE; break;
+    }
+
+    REFCOPYENTRY pEntry;
+
+    switch (m) {
+    default: return Invalid(&ceInvalid, pbDst, pbSrc);
+    case 1:  pEntry = &s_rceCopyTable0F[pbSrc[0]];
+             return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
+    case 2:  return CopyBytes(&ceF38, pbDst, pbSrc);
+    case 3:  return CopyBytes(&ceF3A, pbDst, pbSrc);
+    }
+}
+
+PBYTE CDetourDis::CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc)
+// m is first instead of last in the hopes of pbDst/pbSrc being
+// passed along efficiently in the registers they were already in.
+{
+    m_bVex = TRUE;
+    BYTE const p = (BYTE)(pbSrc[-1] & 3); // p in last byte
+    return CopyVexEvexCommon(m, pbDst, pbSrc, p);
+}
+
+
+PBYTE CDetourDis::CopyVex3(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+// 3 byte VEX prefix 0xC4
+{
+#ifdef DETOURS_X86
+    const static COPYENTRY ceLES = { 0xC4, ENTRY_CopyBytes2Mod };
+    if ((pbSrc[1] & 0xC0) != 0xC0) {
+        REFCOPYENTRY pEntry = &ceLES;
+        return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
+    }
+#endif
+    pbDst[0] = pbSrc[0];
+    pbDst[1] = pbSrc[1];
+    pbDst[2] = pbSrc[2];
+#ifdef DETOURS_X64
+    m_bRaxOverride |= !!(pbSrc[2] & 0x80); // w in last byte, see CopyBytesRax
+#else
+    //
+    // TODO
+    //
+    // Usually the VEX.W bit changes the size of a general purpose register and is ignored for 32bit.
+    // Sometimes it is an opcode extension.
+    // Look in the Intel manual, in the instruction-by-instruction reference, for ".W1",
+    // without nearby wording saying it is ignored for 32bit.
+    // For example: "VFMADD132PD/VFMADD213PD/VFMADD231PD Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
+    //
+    // Then, go through each such case and determine if W0 vs. W1 affect the size of the instruction. Probably not.
+    // Look for the same encoding but with "W1" changed to "W0".
+    // Here is one such pairing:
+    // VFMADD132PD/VFMADD213PD/VFMADD231PD Fused Multiply-Add of Packed Double-Precision Floating-Point Values
+    //
+    // VEX.DDS.128.66.0F38.W1 98 /r A V/V FMA Multiply packed double-precision floating-point values
+    // from xmm0 and xmm2/mem, add to xmm1 and
+    // put result in xmm0.
+    // VFMADD132PD xmm0, xmm1, xmm2/m128
+    //
+    // VFMADD132PS/VFMADD213PS/VFMADD231PS Fused Multiply-Add of Packed Single-Precision Floating-Point Values
+    // VEX.DDS.128.66.0F38.W0 98 /r A V/V FMA Multiply packed single-precision floating-point values
+    // from xmm0 and xmm2/mem, add to xmm1 and put
+    // result in xmm0.
+    // VFMADD132PS xmm0, xmm1, xmm2/m128
+    //
+#endif
+    return CopyVexCommon(pbSrc[1] & 0x1F, pbDst + 3, pbSrc + 3);
+}
+
+PBYTE CDetourDis::CopyVex2(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+// 2 byte VEX prefix 0xC5
+{
+#ifdef DETOURS_X86
+    const static COPYENTRY ceLDS = { 0xC5, ENTRY_CopyBytes2Mod };
+    if ((pbSrc[1] & 0xC0) != 0xC0) {
+        REFCOPYENTRY pEntry = &ceLDS;
+        return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
+    }
+#endif
+    pbDst[0] = pbSrc[0];
+    pbDst[1] = pbSrc[1];
+    return CopyVexCommon(1, pbDst + 2, pbSrc + 2);
+}
+
+PBYTE CDetourDis::CopyEvex(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+// 62, 3 byte payload, x86 with implied prefixes like Vex
+// for 32bit, mode 0xC0 else fallback to bound /r
+{
+    // NOTE: Intel and Wikipedia number these differently.
+    // Intel says 0-2, Wikipedia says 1-3.
+
+    BYTE const p0 = pbSrc[1];
+
+#ifdef DETOURS_X86
+    const static COPYENTRY ceBound = { 0x62, ENTRY_CopyBytes2Mod };
+    if ((p0 & 0xC0) != 0xC0) {
+        return CopyBytes(&ceBound, pbDst, pbSrc);
+    }
+#endif
+
+    static const COPYENTRY ceInvalid = { 0x62, ENTRY_Invalid };
+
+    if ((p0 & 0x0C) != 0)
+        return Invalid(&ceInvalid, pbDst, pbSrc);
+
+    BYTE const p1 = pbSrc[2];
+
+    if ((p1 & 0x04) != 0x04)
+        return Invalid(&ceInvalid, pbDst, pbSrc);
+
+    // Copy 4 byte prefix.
+    *(UNALIGNED ULONG *)pbDst = *(UNALIGNED ULONG*)pbSrc;
+
+    m_bEvex = TRUE;
+
+#ifdef DETOURS_X64
+    m_bRaxOverride |= !!(p1 & 0x80); // w
+#endif
+
+    return CopyVexEvexCommon(p0 & 3u, pbDst + 4, pbSrc + 4, p1 & 3u);
+}
+
+PBYTE CDetourDis::CopyXop(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
+/* 3 byte AMD XOP prefix 0x8F
+byte0: 0x8F
+byte1: RXBmmmmm
+byte2: WvvvvLpp
+byte3: opcode
+mmmmm >= 8, else pop
+mmmmm only otherwise defined for 8, 9, A.
+pp is like VEX but only instructions with 0 are defined
+*/
+{
+    const static COPYENTRY cePop = { 0x8F, ENTRY_CopyBytes2Mod };
+    const static COPYENTRY ceXop = { 0x8F, ENTRY_CopyBytesXop };
+    const static COPYENTRY ceXop1 = { 0x8F, ENTRY_CopyBytesXop1 };
+    const static COPYENTRY ceXop4 = { 0x8F, ENTRY_CopyBytesXop4 };
+
+    BYTE const m = (BYTE)(pbSrc[1] & 0x1F);
+    ASSERT(m <= 10);
+    switch (m)
+    {
+    default:
+        return CopyBytes(&cePop, pbDst, pbSrc);
+
+    case 8: // modrm with 8bit immediate
+        return CopyBytes(&ceXop1, pbDst, pbSrc);
+
+    case 9: // modrm with no immediate
+        return CopyBytes(&ceXop, pbDst, pbSrc);
+
+    case 10: // modrm with 32bit immediate
+        return CopyBytes(&ceXop4, pbDst, pbSrc);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+PBYTE CDetourDis::s_pbModuleBeg = NULL;
+PBYTE CDetourDis::s_pbModuleEnd = (PBYTE)~(ULONG_PTR)0;
+BOOL CDetourDis::s_fLimitReferencesToModule = FALSE;
+
+BOOL CDetourDis::SetCodeModule(PBYTE pbBeg, PBYTE pbEnd, BOOL fLimitReferencesToModule)
+{
+    if (pbEnd < pbBeg) {
+        return FALSE;
+    }
+
+    s_pbModuleBeg = pbBeg;
+    s_pbModuleEnd = pbEnd;
+    s_fLimitReferencesToModule = fLimitReferencesToModule;
+
+    return TRUE;
+}
+
+///////////////////////////////////////////////////////// Disassembler Tables.
+//
+const BYTE CDetourDis::s_rbModRm[256] = {
+    0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 0x
+    0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 1x
+    0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 2x
+    0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 3x
+    1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1,                 // 4x
+    1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1,                 // 5x
+    1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1,                 // 6x
+    1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1,                 // 7x
+    4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4,                 // 8x
+    4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4,                 // 9x
+    4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4,                 // Ax
+    4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4,                 // Bx
+    0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,                 // Cx
+    0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,                 // Dx
+    0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,                 // Ex
+    0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0                  // Fx
+};
+
+const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable[257] =
+{
+    { 0x00, ENTRY_CopyBytes2Mod },                      // ADD /r
+    { 0x01, ENTRY_CopyBytes2Mod },                      // ADD /r
+    { 0x02, ENTRY_CopyBytes2Mod },                      // ADD /r
+    { 0x03, ENTRY_CopyBytes2Mod },                      // ADD /r
+    { 0x04, ENTRY_CopyBytes2 },                         // ADD ib
+    { 0x05, ENTRY_CopyBytes3Or5 },                      // ADD iw
+#ifdef DETOURS_X64
+    { 0x06, ENTRY_Invalid },                            // Invalid
+    { 0x07, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x06, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x07, ENTRY_CopyBytes1 },                         // POP
+#endif
+    { 0x08, ENTRY_CopyBytes2Mod },                      // OR /r
+    { 0x09, ENTRY_CopyBytes2Mod },                      // OR /r
+    { 0x0A, ENTRY_CopyBytes2Mod },                      // OR /r
+    { 0x0B, ENTRY_CopyBytes2Mod },                      // OR /r
+    { 0x0C, ENTRY_CopyBytes2 },                         // OR ib
+    { 0x0D, ENTRY_CopyBytes3Or5 },                      // OR iw
+#ifdef DETOURS_X64
+    { 0x0E, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x0E, ENTRY_CopyBytes1 },                         // PUSH
+#endif
+    { 0x0F, ENTRY_Copy0F },                             // Extension Ops
+    { 0x10, ENTRY_CopyBytes2Mod },                      // ADC /r
+    { 0x11, ENTRY_CopyBytes2Mod },                      // ADC /r
+    { 0x12, ENTRY_CopyBytes2Mod },                      // ADC /r
+    { 0x13, ENTRY_CopyBytes2Mod },                      // ADC /r
+    { 0x14, ENTRY_CopyBytes2 },                         // ADC ib
+    { 0x15, ENTRY_CopyBytes3Or5 },                      // ADC id
+#ifdef DETOURS_X64
+    { 0x16, ENTRY_Invalid },                            // Invalid
+    { 0x17, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x16, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x17, ENTRY_CopyBytes1 },                         // POP
+#endif
+    { 0x18, ENTRY_CopyBytes2Mod },                      // SBB /r
+    { 0x19, ENTRY_CopyBytes2Mod },                      // SBB /r
+    { 0x1A, ENTRY_CopyBytes2Mod },                      // SBB /r
+    { 0x1B, ENTRY_CopyBytes2Mod },                      // SBB /r
+    { 0x1C, ENTRY_CopyBytes2 },                         // SBB ib
+    { 0x1D, ENTRY_CopyBytes3Or5 },                      // SBB id
+#ifdef DETOURS_X64
+    { 0x1E, ENTRY_Invalid },                            // Invalid
+    { 0x1F, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x1E, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x1F, ENTRY_CopyBytes1 },                         // POP
+#endif
+    { 0x20, ENTRY_CopyBytes2Mod },                      // AND /r
+    { 0x21, ENTRY_CopyBytes2Mod },                      // AND /r
+    { 0x22, ENTRY_CopyBytes2Mod },                      // AND /r
+    { 0x23, ENTRY_CopyBytes2Mod },                      // AND /r
+    { 0x24, ENTRY_CopyBytes2 },                         // AND ib
+    { 0x25, ENTRY_CopyBytes3Or5 },                      // AND id
+    { 0x26, ENTRY_CopyBytesSegment },                   // ES prefix
+#ifdef DETOURS_X64
+    { 0x27, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x27, ENTRY_CopyBytes1 },                         // DAA
+#endif
+    { 0x28, ENTRY_CopyBytes2Mod },                      // SUB /r
+    { 0x29, ENTRY_CopyBytes2Mod },                      // SUB /r
+    { 0x2A, ENTRY_CopyBytes2Mod },                      // SUB /r
+    { 0x2B, ENTRY_CopyBytes2Mod },                      // SUB /r
+    { 0x2C, ENTRY_CopyBytes2 },                         // SUB ib
+    { 0x2D, ENTRY_CopyBytes3Or5 },                      // SUB id
+    { 0x2E, ENTRY_CopyBytesSegment },                   // CS prefix
+#ifdef DETOURS_X64
+    { 0x2F, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x2F, ENTRY_CopyBytes1 },                         // DAS
+#endif
+    { 0x30, ENTRY_CopyBytes2Mod },                      // XOR /r
+    { 0x31, ENTRY_CopyBytes2Mod },                      // XOR /r
+    { 0x32, ENTRY_CopyBytes2Mod },                      // XOR /r
+    { 0x33, ENTRY_CopyBytes2Mod },                      // XOR /r
+    { 0x34, ENTRY_CopyBytes2 },                         // XOR ib
+    { 0x35, ENTRY_CopyBytes3Or5 },                      // XOR id
+    { 0x36, ENTRY_CopyBytesSegment },                   // SS prefix
+#ifdef DETOURS_X64
+    { 0x37, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x37, ENTRY_CopyBytes1 },                         // AAA
+#endif
+    { 0x38, ENTRY_CopyBytes2Mod },                      // CMP /r
+    { 0x39, ENTRY_CopyBytes2Mod },                      // CMP /r
+    { 0x3A, ENTRY_CopyBytes2Mod },                      // CMP /r
+    { 0x3B, ENTRY_CopyBytes2Mod },                      // CMP /r
+    { 0x3C, ENTRY_CopyBytes2 },                         // CMP ib
+    { 0x3D, ENTRY_CopyBytes3Or5 },                      // CMP id
+    { 0x3E, ENTRY_CopyBytesSegment },                   // DS prefix
+#ifdef DETOURS_X64
+    { 0x3F, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x3F, ENTRY_CopyBytes1 },                         // AAS
+#endif
+#ifdef DETOURS_X64 // For Rax Prefix
+    { 0x40, ENTRY_CopyBytesRax },                       // Rax
+    { 0x41, ENTRY_CopyBytesRax },                       // Rax
+    { 0x42, ENTRY_CopyBytesRax },                       // Rax
+    { 0x43, ENTRY_CopyBytesRax },                       // Rax
+    { 0x44, ENTRY_CopyBytesRax },                       // Rax
+    { 0x45, ENTRY_CopyBytesRax },                       // Rax
+    { 0x46, ENTRY_CopyBytesRax },                       // Rax
+    { 0x47, ENTRY_CopyBytesRax },                       // Rax
+    { 0x48, ENTRY_CopyBytesRax },                       // Rax
+    { 0x49, ENTRY_CopyBytesRax },                       // Rax
+    { 0x4A, ENTRY_CopyBytesRax },                       // Rax
+    { 0x4B, ENTRY_CopyBytesRax },                       // Rax
+    { 0x4C, ENTRY_CopyBytesRax },                       // Rax
+    { 0x4D, ENTRY_CopyBytesRax },                       // Rax
+    { 0x4E, ENTRY_CopyBytesRax },                       // Rax
+    { 0x4F, ENTRY_CopyBytesRax },                       // Rax
+#else
+    { 0x40, ENTRY_CopyBytes1 },                         // INC
+    { 0x41, ENTRY_CopyBytes1 },                         // INC
+    { 0x42, ENTRY_CopyBytes1 },                         // INC
+    { 0x43, ENTRY_CopyBytes1 },                         // INC
+    { 0x44, ENTRY_CopyBytes1 },                         // INC
+    { 0x45, ENTRY_CopyBytes1 },                         // INC
+    { 0x46, ENTRY_CopyBytes1 },                         // INC
+    { 0x47, ENTRY_CopyBytes1 },                         // INC
+    { 0x48, ENTRY_CopyBytes1 },                         // DEC
+    { 0x49, ENTRY_CopyBytes1 },                         // DEC
+    { 0x4A, ENTRY_CopyBytes1 },                         // DEC
+    { 0x4B, ENTRY_CopyBytes1 },                         // DEC
+    { 0x4C, ENTRY_CopyBytes1 },                         // DEC
+    { 0x4D, ENTRY_CopyBytes1 },                         // DEC
+    { 0x4E, ENTRY_CopyBytes1 },                         // DEC
+    { 0x4F, ENTRY_CopyBytes1 },                         // DEC
+#endif
+    { 0x50, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x51, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x52, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x53, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x54, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x55, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x56, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x57, ENTRY_CopyBytes1 },                         // PUSH
+    { 0x58, ENTRY_CopyBytes1 },                         // POP
+    { 0x59, ENTRY_CopyBytes1 },                         // POP
+    { 0x5A, ENTRY_CopyBytes1 },                         // POP
+    { 0x5B, ENTRY_CopyBytes1 },                         // POP
+    { 0x5C, ENTRY_CopyBytes1 },                         // POP
+    { 0x5D, ENTRY_CopyBytes1 },                         // POP
+    { 0x5E, ENTRY_CopyBytes1 },                         // POP
+    { 0x5F, ENTRY_CopyBytes1 },                         // POP
+#ifdef DETOURS_X64
+    { 0x60, ENTRY_Invalid },                            // Invalid
+    { 0x61, ENTRY_Invalid },                            // Invalid
+    { 0x62, ENTRY_CopyEvex },                           // EVEX / AVX512
+#else
+    { 0x60, ENTRY_CopyBytes1 },                         // PUSHAD
+    { 0x61, ENTRY_CopyBytes1 },                         // POPAD
+    { 0x62, ENTRY_CopyEvex },                           // BOUND /r and EVEX / AVX512
+#endif
+    { 0x63, ENTRY_CopyBytes2Mod },                      // 32bit ARPL /r, 64bit MOVSXD
+    { 0x64, ENTRY_CopyBytesSegment },                   // FS prefix
+    { 0x65, ENTRY_CopyBytesSegment },                   // GS prefix
+    { 0x66, ENTRY_Copy66 },                             // Operand Prefix
+    { 0x67, ENTRY_Copy67 },                             // Address Prefix
+    { 0x68, ENTRY_CopyBytes3Or5 },                      // PUSH
+    { 0x69, ENTRY_CopyBytes2ModOperand },               // IMUL /r iz
+    { 0x6A, ENTRY_CopyBytes2 },                         // PUSH
+    { 0x6B, ENTRY_CopyBytes2Mod1 },                     // IMUL /r ib
+    { 0x6C, ENTRY_CopyBytes1 },                         // INS
+    { 0x6D, ENTRY_CopyBytes1 },                         // INS
+    { 0x6E, ENTRY_CopyBytes1 },                         // OUTS/OUTSB
+    { 0x6F, ENTRY_CopyBytes1 },                         // OUTS/OUTSW
+    { 0x70, ENTRY_CopyBytes2Jump },                     // JO           // 0f80
+    { 0x71, ENTRY_CopyBytes2Jump },                     // JNO          // 0f81
+    { 0x72, ENTRY_CopyBytes2Jump },                     // JB/JC/JNAE   // 0f82
+    { 0x73, ENTRY_CopyBytes2Jump },                     // JAE/JNB/JNC  // 0f83
+    { 0x74, ENTRY_CopyBytes2Jump },                     // JE/JZ        // 0f84
+    { 0x75, ENTRY_CopyBytes2Jump },                     // JNE/JNZ      // 0f85
+    { 0x76, ENTRY_CopyBytes2Jump },                     // JBE/JNA      // 0f86
+    { 0x77, ENTRY_CopyBytes2Jump },                     // JA/JNBE      // 0f87
+    { 0x78, ENTRY_CopyBytes2Jump },                     // JS           // 0f88
+    { 0x79, ENTRY_CopyBytes2Jump },                     // JNS          // 0f89
+    { 0x7A, ENTRY_CopyBytes2Jump },                     // JP/JPE       // 0f8a
+    { 0x7B, ENTRY_CopyBytes2Jump },                     // JNP/JPO      // 0f8b
+    { 0x7C, ENTRY_CopyBytes2Jump },                     // JL/JNGE      // 0f8c
+    { 0x7D, ENTRY_CopyBytes2Jump },                     // JGE/JNL      // 0f8d
+    { 0x7E, ENTRY_CopyBytes2Jump },                     // JLE/JNG      // 0f8e
+    { 0x7F, ENTRY_CopyBytes2Jump },                     // JG/JNLE      // 0f8f
+    { 0x80, ENTRY_CopyBytes2Mod1 },                     // ADD/0 OR/1 ADC/2 SBB/3 AND/4 SUB/5 XOR/6 CMP/7 byte reg, immediate byte
+    { 0x81, ENTRY_CopyBytes2ModOperand },               // ADD/0 OR/1 ADC/2 SBB/3 AND/4 SUB/5 XOR/6 CMP/7 byte reg, immediate word or dword
+#ifdef DETOURS_X64
+    { 0x82, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x82, ENTRY_CopyBytes2Mod1 },                     // MOV al,x
+#endif
+    { 0x83, ENTRY_CopyBytes2Mod1 },                     // ADD/0 OR/1 ADC/2 SBB/3 AND/4 SUB/5 XOR/6 CMP/7 reg, immediate byte
+    { 0x84, ENTRY_CopyBytes2Mod },                      // TEST /r
+    { 0x85, ENTRY_CopyBytes2Mod },                      // TEST /r
+    { 0x86, ENTRY_CopyBytes2Mod },                      // XCHG /r @todo
+    { 0x87, ENTRY_CopyBytes2Mod },                      // XCHG /r @todo
+    { 0x88, ENTRY_CopyBytes2Mod },                      // MOV /r
+    { 0x89, ENTRY_CopyBytes2Mod },                      // MOV /r
+    { 0x8A, ENTRY_CopyBytes2Mod },                      // MOV /r
+    { 0x8B, ENTRY_CopyBytes2Mod },                      // MOV /r
+    { 0x8C, ENTRY_CopyBytes2Mod },                      // MOV /r
+    { 0x8D, ENTRY_CopyBytes2Mod },                      // LEA /r
+    { 0x8E, ENTRY_CopyBytes2Mod },                      // MOV /r
+    { 0x8F, ENTRY_CopyXop },                            // POP /0 or AMD XOP
+    { 0x90, ENTRY_CopyBytes1 },                         // NOP
+    { 0x91, ENTRY_CopyBytes1 },                         // XCHG
+    { 0x92, ENTRY_CopyBytes1 },                         // XCHG
+    { 0x93, ENTRY_CopyBytes1 },                         // XCHG
+    { 0x94, ENTRY_CopyBytes1 },                         // XCHG
+    { 0x95, ENTRY_CopyBytes1 },                         // XCHG
+    { 0x96, ENTRY_CopyBytes1 },                         // XCHG
+    { 0x97, ENTRY_CopyBytes1 },                         // XCHG
+    { 0x98, ENTRY_CopyBytes1 },                         // CWDE
+    { 0x99, ENTRY_CopyBytes1 },                         // CDQ
+#ifdef DETOURS_X64
+    { 0x9A, ENTRY_Invalid },                            // Invalid
+#else
+    { 0x9A, ENTRY_CopyBytes5Or7Dynamic },               // CALL cp
+#endif
+    { 0x9B, ENTRY_CopyBytes1 },                         // WAIT/FWAIT
+    { 0x9C, ENTRY_CopyBytes1 },                         // PUSHFD
+    { 0x9D, ENTRY_CopyBytes1 },                         // POPFD
+    { 0x9E, ENTRY_CopyBytes1 },                         // SAHF
+    { 0x9F, ENTRY_CopyBytes1 },                         // LAHF
+    { 0xA0, ENTRY_CopyBytes1Address },                  // MOV
+    { 0xA1, ENTRY_CopyBytes1Address },                  // MOV
+    { 0xA2, ENTRY_CopyBytes1Address },                  // MOV
+    { 0xA3, ENTRY_CopyBytes1Address },                  // MOV
+    { 0xA4, ENTRY_CopyBytes1 },                         // MOVS
+    { 0xA5, ENTRY_CopyBytes1 },                         // MOVS/MOVSD
+    { 0xA6, ENTRY_CopyBytes1 },                         // CMPS/CMPSB
+    { 0xA7, ENTRY_CopyBytes1 },                         // CMPS/CMPSW
+    { 0xA8, ENTRY_CopyBytes2 },                         // TEST
+    { 0xA9, ENTRY_CopyBytes3Or5 },                      // TEST
+    { 0xAA, ENTRY_CopyBytes1 },                         // STOS/STOSB
+    { 0xAB, ENTRY_CopyBytes1 },                         // STOS/STOSW
+    { 0xAC, ENTRY_CopyBytes1 },                         // LODS/LODSB
+    { 0xAD, ENTRY_CopyBytes1 },                         // LODS/LODSW
+    { 0xAE, ENTRY_CopyBytes1 },                         // SCAS/SCASB
+    { 0xAF, ENTRY_CopyBytes1 },                         // SCAS/SCASD
+    { 0xB0, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB1, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB2, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB3, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB4, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB5, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB6, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB7, ENTRY_CopyBytes2 },                         // MOV B0+rb
+    { 0xB8, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xB9, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xBA, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xBB, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xBC, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xBD, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xBE, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xBF, ENTRY_CopyBytes3Or5Rax },                   // MOV B8+rb
+    { 0xC0, ENTRY_CopyBytes2Mod1 },                     // RCL/2 ib, etc.
+    { 0xC1, ENTRY_CopyBytes2Mod1 },                     // RCL/2 ib, etc.
+    { 0xC2, ENTRY_CopyBytes3 },                         // RET
+    { 0xC3, ENTRY_CopyBytes1 },                         // RET
+    { 0xC4, ENTRY_CopyVex3 },                           // LES, VEX 3-byte opcodes.
+    { 0xC5, ENTRY_CopyVex2 },                           // LDS, VEX 2-byte opcodes.
+    { 0xC6, ENTRY_CopyBytes2Mod1 },                     // MOV
+    { 0xC7, ENTRY_CopyBytes2ModOperand },               // MOV/0 XBEGIN/7
+    { 0xC8, ENTRY_CopyBytes4 },                         // ENTER
+    { 0xC9, ENTRY_CopyBytes1 },                         // LEAVE
+    { 0xCA, ENTRY_CopyBytes3Dynamic },                  // RET
+    { 0xCB, ENTRY_CopyBytes1Dynamic },                  // RET
+    { 0xCC, ENTRY_CopyBytes1Dynamic },                  // INT 3
+    { 0xCD, ENTRY_CopyBytes2Dynamic },                  // INT ib
+#ifdef DETOURS_X64
+    { 0xCE, ENTRY_Invalid },                            // Invalid
+#else
+    { 0xCE, ENTRY_CopyBytes1Dynamic },                  // INTO
+#endif
+    { 0xCF, ENTRY_CopyBytes1Dynamic },                  // IRET
+    { 0xD0, ENTRY_CopyBytes2Mod },                      // RCL/2, etc.
+    { 0xD1, ENTRY_CopyBytes2Mod },                      // RCL/2, etc.
+    { 0xD2, ENTRY_CopyBytes2Mod },                      // RCL/2, etc.
+    { 0xD3, ENTRY_CopyBytes2Mod },                      // RCL/2, etc.
+#ifdef DETOURS_X64
+    { 0xD4, ENTRY_Invalid },                            // Invalid
+    { 0xD5, ENTRY_Invalid },                            // Invalid
+#else
+    { 0xD4, ENTRY_CopyBytes2 },                         // AAM
+    { 0xD5, ENTRY_CopyBytes2 },                         // AAD
+#endif
+    { 0xD6, ENTRY_Invalid },                            // Invalid
+    { 0xD7, ENTRY_CopyBytes1 },                         // XLAT/XLATB
+    { 0xD8, ENTRY_CopyBytes2Mod },                      // FADD, etc.
+    { 0xD9, ENTRY_CopyBytes2Mod },                      // F2XM1, etc.
+    { 0xDA, ENTRY_CopyBytes2Mod },                      // FLADD, etc.
+    { 0xDB, ENTRY_CopyBytes2Mod },                      // FCLEX, etc.
+    { 0xDC, ENTRY_CopyBytes2Mod },                      // FADD/0, etc.
+    { 0xDD, ENTRY_CopyBytes2Mod },                      // FFREE, etc.
+    { 0xDE, ENTRY_CopyBytes2Mod },                      // FADDP, etc.
+    { 0xDF, ENTRY_CopyBytes2Mod },                      // FBLD/4, etc.
+    { 0xE0, ENTRY_CopyBytes2CantJump },                 // LOOPNE cb
+    { 0xE1, ENTRY_CopyBytes2CantJump },                 // LOOPE cb
+    { 0xE2, ENTRY_CopyBytes2CantJump },                 // LOOP cb
+    { 0xE3, ENTRY_CopyBytes2CantJump },                 // JCXZ/JECXZ
+    { 0xE4, ENTRY_CopyBytes2 },                         // IN ib
+    { 0xE5, ENTRY_CopyBytes2 },                         // IN id
+    { 0xE6, ENTRY_CopyBytes2 },                         // OUT ib
+    { 0xE7, ENTRY_CopyBytes2 },                         // OUT ib
+    { 0xE8, ENTRY_CopyBytes3Or5Target },                // CALL cd
+    { 0xE9, ENTRY_CopyBytes3Or5Target },                // JMP cd
+#ifdef DETOURS_X64
+    { 0xEA, ENTRY_Invalid },                            // Invalid
+#else
+    { 0xEA, ENTRY_CopyBytes5Or7Dynamic },               // JMP cp
+#endif
+    { 0xEB, ENTRY_CopyBytes2Jump },                     // JMP cb
+    { 0xEC, ENTRY_CopyBytes1 },                         // IN ib
+    { 0xED, ENTRY_CopyBytes1 },                         // IN id
+    { 0xEE, ENTRY_CopyBytes1 },                         // OUT
+    { 0xEF, ENTRY_CopyBytes1 },                         // OUT
+    { 0xF0, ENTRY_CopyBytesPrefix },                    // LOCK prefix
+    { 0xF1, ENTRY_CopyBytes1Dynamic },                  // INT1 / ICEBP somewhat documented by AMD, not by Intel
+    { 0xF2, ENTRY_CopyF2 },                             // REPNE prefix
+//#ifdef DETOURS_X86
+    { 0xF3, ENTRY_CopyF3 },                             // REPE prefix
+//#else
+// This does presently suffice for AMD64 but it requires tracing
+// through a bunch of code to verify and seems not worth maintaining.
+//  { 0xF3, ENTRY_CopyBytesPrefix },                    // REPE prefix
+//#endif
+    { 0xF4, ENTRY_CopyBytes1 },                         // HLT
+    { 0xF5, ENTRY_CopyBytes1 },                         // CMC
+    { 0xF6, ENTRY_CopyF6 },                             // TEST/0, DIV/6
+    { 0xF7, ENTRY_CopyF7 },                             // TEST/0, DIV/6
+    { 0xF8, ENTRY_CopyBytes1 },                         // CLC
+    { 0xF9, ENTRY_CopyBytes1 },                         // STC
+    { 0xFA, ENTRY_CopyBytes1 },                         // CLI
+    { 0xFB, ENTRY_CopyBytes1 },                         // STI
+    { 0xFC, ENTRY_CopyBytes1 },                         // CLD
+    { 0xFD, ENTRY_CopyBytes1 },                         // STD
+    { 0xFE, ENTRY_CopyBytes2Mod },                      // DEC/1,INC/0
+    { 0xFF, ENTRY_CopyFF },                             // CALL/2
+    { 0, ENTRY_End },
+};
+
+const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable0F[257] =
+{
+#ifdef DETOURS_X86
+    { 0x00, ENTRY_Copy0F00 },                           // sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6/dynamic invalid/7
+#else
+    { 0x00, ENTRY_CopyBytes2Mod },                      // sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6/dynamic invalid/7
+#endif
+    { 0x01, ENTRY_CopyBytes2Mod },                      // INVLPG/7, etc.
+    { 0x02, ENTRY_CopyBytes2Mod },                      // LAR/r
+    { 0x03, ENTRY_CopyBytes2Mod },                      // LSL/r
+    { 0x04, ENTRY_Invalid },                            // _04
+    { 0x05, ENTRY_CopyBytes1 },                         // SYSCALL
+    { 0x06, ENTRY_CopyBytes1 },                         // CLTS
+    { 0x07, ENTRY_CopyBytes1 },                         // SYSRET
+    { 0x08, ENTRY_CopyBytes1 },                         // INVD
+    { 0x09, ENTRY_CopyBytes1 },                         // WBINVD
+    { 0x0A, ENTRY_Invalid },                            // _0A
+    { 0x0B, ENTRY_CopyBytes1 },                         // UD2
+    { 0x0C, ENTRY_Invalid },                            // _0C
+    { 0x0D, ENTRY_CopyBytes2Mod },                      // PREFETCH
+    { 0x0E, ENTRY_CopyBytes1 },                         // FEMMS (3DNow -- not in Intel documentation)
+    { 0x0F, ENTRY_CopyBytes2Mod1 },                     // 3DNow Opcodes
+    { 0x10, ENTRY_CopyBytes2Mod },                      // MOVSS MOVUPD MOVSD
+    { 0x11, ENTRY_CopyBytes2Mod },                      // MOVSS MOVUPD MOVSD
+    { 0x12, ENTRY_CopyBytes2Mod },                      // MOVLPD
+    { 0x13, ENTRY_CopyBytes2Mod },                      // MOVLPD
+    { 0x14, ENTRY_CopyBytes2Mod },                      // UNPCKLPD
+    { 0x15, ENTRY_CopyBytes2Mod },                      // UNPCKHPD
+    { 0x16, ENTRY_CopyBytes2Mod },                      // MOVHPD
+    { 0x17, ENTRY_CopyBytes2Mod },                      // MOVHPD
+    { 0x18, ENTRY_CopyBytes2Mod },                      // PREFETCHINTA...
+    { 0x19, ENTRY_CopyBytes2Mod },                      // NOP/r multi byte nop, not documented by Intel, documented by AMD
+    { 0x1A, ENTRY_CopyBytes2Mod },                      // NOP/r multi byte nop, not documented by Intel, documented by AMD
+    { 0x1B, ENTRY_CopyBytes2Mod },                      // NOP/r multi byte nop, not documented by Intel, documented by AMD
+    { 0x1C, ENTRY_CopyBytes2Mod },                      // NOP/r multi byte nop, not documented by Intel, documented by AMD
+    { 0x1D, ENTRY_CopyBytes2Mod },                      // NOP/r multi byte nop, not documented by Intel, documented by AMD
+    { 0x1E, ENTRY_CopyBytes2Mod },                      // NOP/r multi byte nop, not documented by Intel, documented by AMD
+    { 0x1F, ENTRY_CopyBytes2Mod },                      // NOP/r multi byte nop
+    { 0x20, ENTRY_CopyBytes2Mod },                      // MOV/r
+    { 0x21, ENTRY_CopyBytes2Mod },                      // MOV/r
+    { 0x22, ENTRY_CopyBytes2Mod },                      // MOV/r
+    { 0x23, ENTRY_CopyBytes2Mod },                      // MOV/r
+#ifdef DETOURS_X64
+    { 0x24, ENTRY_Invalid },                            // _24
+#else
+    { 0x24, ENTRY_CopyBytes2Mod },                      // MOV/r,TR TR is test register on 80386 and 80486, removed in Pentium
+#endif
+    { 0x25, ENTRY_Invalid },                            // _25
+#ifdef DETOURS_X64
+    { 0x26, ENTRY_Invalid },                            // _26
+#else
+    { 0x26, ENTRY_CopyBytes2Mod },                      // MOV TR/r TR is test register on 80386 and 80486, removed in Pentium
+#endif
+    { 0x27, ENTRY_Invalid },                            // _27
+    { 0x28, ENTRY_CopyBytes2Mod },                      // MOVAPS MOVAPD
+    { 0x29, ENTRY_CopyBytes2Mod },                      // MOVAPS MOVAPD
+    { 0x2A, ENTRY_CopyBytes2Mod },                      // CVPI2PS &
+    { 0x2B, ENTRY_CopyBytes2Mod },                      // MOVNTPS MOVNTPD
+    { 0x2C, ENTRY_CopyBytes2Mod },                      // CVTTPS2PI &
+    { 0x2D, ENTRY_CopyBytes2Mod },                      // CVTPS2PI &
+    { 0x2E, ENTRY_CopyBytes2Mod },                      // UCOMISS UCOMISD
+    { 0x2F, ENTRY_CopyBytes2Mod },                      // COMISS COMISD
+    { 0x30, ENTRY_CopyBytes1 },                         // WRMSR
+    { 0x31, ENTRY_CopyBytes1 },                         // RDTSC
+    { 0x32, ENTRY_CopyBytes1 },                         // RDMSR
+    { 0x33, ENTRY_CopyBytes1 },                         // RDPMC
+    { 0x34, ENTRY_CopyBytes1 },                         // SYSENTER
+    { 0x35, ENTRY_CopyBytes1 },                         // SYSEXIT
+    { 0x36, ENTRY_Invalid },                            // _36
+    { 0x37, ENTRY_CopyBytes1 },                         // GETSEC
+    { 0x38, ENTRY_CopyBytes3Mod },                      // SSE3 Opcodes
+    { 0x39, ENTRY_Invalid },                            // _39
+    { 0x3A, ENTRY_CopyBytes3Mod1 },                      // SSE3 Opcodes
+    { 0x3B, ENTRY_Invalid },                            // _3B
+    { 0x3C, ENTRY_Invalid },                            // _3C
+    { 0x3D, ENTRY_Invalid },                            // _3D
+    { 0x3E, ENTRY_Invalid },                            // _3E
+    { 0x3F, ENTRY_Invalid },                            // _3F
+    { 0x40, ENTRY_CopyBytes2Mod },                      // CMOVO (0F 40)
+    { 0x41, ENTRY_CopyBytes2Mod },                      // CMOVNO (0F 41)
+    { 0x42, ENTRY_CopyBytes2Mod },                      // CMOVB & CMOVNE (0F 42)
+    { 0x43, ENTRY_CopyBytes2Mod },                      // CMOVAE & CMOVNB (0F 43)
+    { 0x44, ENTRY_CopyBytes2Mod },                      // CMOVE & CMOVZ (0F 44)
+    { 0x45, ENTRY_CopyBytes2Mod },                      // CMOVNE & CMOVNZ (0F 45)
+    { 0x46, ENTRY_CopyBytes2Mod },                      // CMOVBE & CMOVNA (0F 46)
+    { 0x47, ENTRY_CopyBytes2Mod },                      // CMOVA & CMOVNBE (0F 47)
+    { 0x48, ENTRY_CopyBytes2Mod },                      // CMOVS (0F 48)
+    { 0x49, ENTRY_CopyBytes2Mod },                      // CMOVNS (0F 49)
+    { 0x4A, ENTRY_CopyBytes2Mod },                      // CMOVP & CMOVPE (0F 4A)
+    { 0x4B, ENTRY_CopyBytes2Mod },                      // CMOVNP & CMOVPO (0F 4B)
+    { 0x4C, ENTRY_CopyBytes2Mod },                      // CMOVL & CMOVNGE (0F 4C)
+    { 0x4D, ENTRY_CopyBytes2Mod },                      // CMOVGE & CMOVNL (0F 4D)
+    { 0x4E, ENTRY_CopyBytes2Mod },                      // CMOVLE & CMOVNG (0F 4E)
+    { 0x4F, ENTRY_CopyBytes2Mod },                      // CMOVG & CMOVNLE (0F 4F)
+    { 0x50, ENTRY_CopyBytes2Mod },                      // MOVMSKPD MOVMSKPD
+    { 0x51, ENTRY_CopyBytes2Mod },                      // SQRTPS &
+    { 0x52, ENTRY_CopyBytes2Mod },                      // RSQRTTS RSQRTPS
+    { 0x53, ENTRY_CopyBytes2Mod },                      // RCPPS RCPSS
+    { 0x54, ENTRY_CopyBytes2Mod },                      // ANDPS ANDPD
+    { 0x55, ENTRY_CopyBytes2Mod },                      // ANDNPS ANDNPD
+    { 0x56, ENTRY_CopyBytes2Mod },                      // ORPS ORPD
+    { 0x57, ENTRY_CopyBytes2Mod },                      // XORPS XORPD
+    { 0x58, ENTRY_CopyBytes2Mod },                      // ADDPS &
+    { 0x59, ENTRY_CopyBytes2Mod },                      // MULPS &
+    { 0x5A, ENTRY_CopyBytes2Mod },                      // CVTPS2PD &
+    { 0x5B, ENTRY_CopyBytes2Mod },                      // CVTDQ2PS &
+    { 0x5C, ENTRY_CopyBytes2Mod },                      // SUBPS &
+    { 0x5D, ENTRY_CopyBytes2Mod },                      // MINPS &
+    { 0x5E, ENTRY_CopyBytes2Mod },                      // DIVPS &
+    { 0x5F, ENTRY_CopyBytes2Mod },                      // MASPS &
+    { 0x60, ENTRY_CopyBytes2Mod },                      // PUNPCKLBW/r
+    { 0x61, ENTRY_CopyBytes2Mod },                      // PUNPCKLWD/r
+    { 0x62, ENTRY_CopyBytes2Mod },                      // PUNPCKLWD/r
+    { 0x63, ENTRY_CopyBytes2Mod },                      // PACKSSWB/r
+    { 0x64, ENTRY_CopyBytes2Mod },                      // PCMPGTB/r
+    { 0x65, ENTRY_CopyBytes2Mod },                      // PCMPGTW/r
+    { 0x66, ENTRY_CopyBytes2Mod },                      // PCMPGTD/r
+    { 0x67, ENTRY_CopyBytes2Mod },                      // PACKUSWB/r
+    { 0x68, ENTRY_CopyBytes2Mod },                      // PUNPCKHBW/r
+    { 0x69, ENTRY_CopyBytes2Mod },                      // PUNPCKHWD/r
+    { 0x6A, ENTRY_CopyBytes2Mod },                      // PUNPCKHDQ/r
+    { 0x6B, ENTRY_CopyBytes2Mod },                      // PACKSSDW/r
+    { 0x6C, ENTRY_CopyBytes2Mod },                      // PUNPCKLQDQ
+    { 0x6D, ENTRY_CopyBytes2Mod },                      // PUNPCKHQDQ
+    { 0x6E, ENTRY_CopyBytes2Mod },                      // MOVD/r
+    { 0x6F, ENTRY_CopyBytes2Mod },                      // MOV/r
+    { 0x70, ENTRY_CopyBytes2Mod1 },                     // PSHUFW/r ib
+    { 0x71, ENTRY_CopyBytes2Mod1 },                     // PSLLW/6 ib,PSRAW/4 ib,PSRLW/2 ib
+    { 0x72, ENTRY_CopyBytes2Mod1 },                     // PSLLD/6 ib,PSRAD/4 ib,PSRLD/2 ib
+    { 0x73, ENTRY_CopyBytes2Mod1 },                     // PSLLQ/6 ib,PSRLQ/2 ib
+    { 0x74, ENTRY_CopyBytes2Mod },                      // PCMPEQB/r
+    { 0x75, ENTRY_CopyBytes2Mod },                      // PCMPEQW/r
+    { 0x76, ENTRY_CopyBytes2Mod },                      // PCMPEQD/r
+    { 0x77, ENTRY_CopyBytes1 },                         // EMMS
+    // extrq/insertq require mode=3 and are followed by two immediate bytes
+    { 0x78, ENTRY_Copy0F78 },                           // VMREAD/r, 66/EXTRQ/r/ib/ib, F2/INSERTQ/r/ib/ib
+    // extrq/insertq require mod=3, therefore ENTRY_CopyBytes2, but it ends up the same
+    { 0x79, ENTRY_CopyBytes2Mod },                      // VMWRITE/r, 66/EXTRQ/r, F2/INSERTQ/r
+    { 0x7A, ENTRY_Invalid },                            // _7A
+    { 0x7B, ENTRY_Invalid },                            // _7B
+    { 0x7C, ENTRY_CopyBytes2Mod },                      // HADDPS
+    { 0x7D, ENTRY_CopyBytes2Mod },                      // HSUBPS
+    { 0x7E, ENTRY_CopyBytes2Mod },                      // MOVD/r
+    { 0x7F, ENTRY_CopyBytes2Mod },                      // MOV/r
+    { 0x80, ENTRY_CopyBytes3Or5Target },                // JO
+    { 0x81, ENTRY_CopyBytes3Or5Target },                // JNO
+    { 0x82, ENTRY_CopyBytes3Or5Target },                // JB,JC,JNAE
+    { 0x83, ENTRY_CopyBytes3Or5Target },                // JAE,JNB,JNC
+    { 0x84, ENTRY_CopyBytes3Or5Target },                // JE,JZ,JZ
+    { 0x85, ENTRY_CopyBytes3Or5Target },                // JNE,JNZ
+    { 0x86, ENTRY_CopyBytes3Or5Target },                // JBE,JNA
+    { 0x87, ENTRY_CopyBytes3Or5Target },                // JA,JNBE
+    { 0x88, ENTRY_CopyBytes3Or5Target },                // JS
+    { 0x89, ENTRY_CopyBytes3Or5Target },                // JNS
+    { 0x8A, ENTRY_CopyBytes3Or5Target },                // JP,JPE
+    { 0x8B, ENTRY_CopyBytes3Or5Target },                // JNP,JPO
+    { 0x8C, ENTRY_CopyBytes3Or5Target },                // JL,NGE
+    { 0x8D, ENTRY_CopyBytes3Or5Target },                // JGE,JNL
+    { 0x8E, ENTRY_CopyBytes3Or5Target },                // JLE,JNG
+    { 0x8F, ENTRY_CopyBytes3Or5Target },                // JG,JNLE
+    { 0x90, ENTRY_CopyBytes2Mod },                      // CMOVO (0F 40)
+    { 0x91, ENTRY_CopyBytes2Mod },                      // CMOVNO (0F 41)
+    { 0x92, ENTRY_CopyBytes2Mod },                      // CMOVB & CMOVC & CMOVNAE (0F 42)
+    { 0x93, ENTRY_CopyBytes2Mod },                      // CMOVAE & CMOVNB & CMOVNC (0F 43)
+    { 0x94, ENTRY_CopyBytes2Mod },                      // CMOVE & CMOVZ (0F 44)
+    { 0x95, ENTRY_CopyBytes2Mod },                      // CMOVNE & CMOVNZ (0F 45)
+    { 0x96, ENTRY_CopyBytes2Mod },                      // CMOVBE & CMOVNA (0F 46)
+    { 0x97, ENTRY_CopyBytes2Mod },                      // CMOVA & CMOVNBE (0F 47)
+    { 0x98, ENTRY_CopyBytes2Mod },                      // CMOVS (0F 48)
+    { 0x99, ENTRY_CopyBytes2Mod },                      // CMOVNS (0F 49)
+    { 0x9A, ENTRY_CopyBytes2Mod },                      // CMOVP & CMOVPE (0F 4A)
+    { 0x9B, ENTRY_CopyBytes2Mod },                      // CMOVNP & CMOVPO (0F 4B)
+    { 0x9C, ENTRY_CopyBytes2Mod },                      // CMOVL & CMOVNGE (0F 4C)
+    { 0x9D, ENTRY_CopyBytes2Mod },                      // CMOVGE & CMOVNL (0F 4D)
+    { 0x9E, ENTRY_CopyBytes2Mod },                      // CMOVLE & CMOVNG (0F 4E)
+    { 0x9F, ENTRY_CopyBytes2Mod },                      // CMOVG & CMOVNLE (0F 4F)
+    { 0xA0, ENTRY_CopyBytes1 },                         // PUSH
+    { 0xA1, ENTRY_CopyBytes1 },                         // POP
+    { 0xA2, ENTRY_CopyBytes1 },                         // CPUID
+    { 0xA3, ENTRY_CopyBytes2Mod },                      // BT  (0F A3)
+    { 0xA4, ENTRY_CopyBytes2Mod1 },                     // SHLD
+    { 0xA5, ENTRY_CopyBytes2Mod },                      // SHLD
+    { 0xA6, ENTRY_CopyBytes2Mod },                      // XBTS
+    { 0xA7, ENTRY_CopyBytes2Mod },                      // IBTS
+    { 0xA8, ENTRY_CopyBytes1 },                         // PUSH
+    { 0xA9, ENTRY_CopyBytes1 },                         // POP
+    { 0xAA, ENTRY_CopyBytes1 },                         // RSM
+    { 0xAB, ENTRY_CopyBytes2Mod },                      // BTS (0F AB)
+    { 0xAC, ENTRY_CopyBytes2Mod1 },                     // SHRD
+    { 0xAD, ENTRY_CopyBytes2Mod },                      // SHRD
+
+    // 0F AE mod76=mem mod543=0 fxsave
+    // 0F AE mod76=mem mod543=1 fxrstor
+    // 0F AE mod76=mem mod543=2 ldmxcsr
+    // 0F AE mod76=mem mod543=3 stmxcsr
+    // 0F AE mod76=mem mod543=4 xsave
+    // 0F AE mod76=mem mod543=5 xrstor
+    // 0F AE mod76=mem mod543=6 saveopt
+    // 0F AE mod76=mem mod543=7 clflush
+    // 0F AE mod76=11b mod543=5 lfence
+    // 0F AE mod76=11b mod543=6 mfence
+    // 0F AE mod76=11b mod543=7 sfence
+    // F3 0F AE mod76=11b mod543=0 rdfsbase
+    // F3 0F AE mod76=11b mod543=1 rdgsbase
+    // F3 0F AE mod76=11b mod543=2 wrfsbase
+    // F3 0F AE mod76=11b mod543=3 wrgsbase
+    { 0xAE, ENTRY_CopyBytes2Mod },                      // fxsave fxrstor ldmxcsr stmxcsr xsave xrstor saveopt clflush lfence mfence sfence rdfsbase rdgsbase wrfsbase wrgsbase
+    { 0xAF, ENTRY_CopyBytes2Mod },                      // IMUL (0F AF)
+    { 0xB0, ENTRY_CopyBytes2Mod },                      // CMPXCHG (0F B0)
+    { 0xB1, ENTRY_CopyBytes2Mod },                      // CMPXCHG (0F B1)
+    { 0xB2, ENTRY_CopyBytes2Mod },                      // LSS/r
+    { 0xB3, ENTRY_CopyBytes2Mod },                      // BTR (0F B3)
+    { 0xB4, ENTRY_CopyBytes2Mod },                      // LFS/r
+    { 0xB5, ENTRY_CopyBytes2Mod },                      // LGS/r
+    { 0xB6, ENTRY_CopyBytes2Mod },                      // MOVZX/r
+    { 0xB7, ENTRY_CopyBytes2Mod },                      // MOVZX/r
+#ifdef DETOURS_X86
+    { 0xB8, ENTRY_Copy0FB8 },                           // jmpe f3/popcnt
+#else
+    { 0xB8, ENTRY_CopyBytes2Mod },                      // f3/popcnt
+#endif
+    { 0xB9, ENTRY_Invalid },                            // _B9
+    { 0xBA, ENTRY_CopyBytes2Mod1 },                     // BT & BTC & BTR & BTS (0F BA)
+    { 0xBB, ENTRY_CopyBytes2Mod },                      // BTC (0F BB)
+    { 0xBC, ENTRY_CopyBytes2Mod },                      // BSF (0F BC)
+    { 0xBD, ENTRY_CopyBytes2Mod },                      // BSR (0F BD)
+    { 0xBE, ENTRY_CopyBytes2Mod },                      // MOVSX/r
+    { 0xBF, ENTRY_CopyBytes2Mod },                      // MOVSX/r
+    { 0xC0, ENTRY_CopyBytes2Mod },                      // XADD/r
+    { 0xC1, ENTRY_CopyBytes2Mod },                      // XADD/r
+    { 0xC2, ENTRY_CopyBytes2Mod1 },                     // CMPPS &
+    { 0xC3, ENTRY_CopyBytes2Mod },                      // MOVNTI
+    { 0xC4, ENTRY_CopyBytes2Mod1 },                     // PINSRW /r ib
+    { 0xC5, ENTRY_CopyBytes2Mod1 },                     // PEXTRW /r ib
+    { 0xC6, ENTRY_CopyBytes2Mod1 },                     // SHUFPS & SHUFPD
+    { 0xC7, ENTRY_CopyBytes2Mod },                      // CMPXCHG8B (0F C7)
+    { 0xC8, ENTRY_CopyBytes1 },                         // BSWAP 0F C8 + rd
+    { 0xC9, ENTRY_CopyBytes1 },                         // BSWAP 0F C8 + rd
+    { 0xCA, ENTRY_CopyBytes1 },                         // BSWAP 0F C8 + rd
+    { 0xCB, ENTRY_CopyBytes1 },                         // CVTPD2PI BSWAP 0F C8 + rd
+    { 0xCC, ENTRY_CopyBytes1 },                         // BSWAP 0F C8 + rd
+    { 0xCD, ENTRY_CopyBytes1 },                         // BSWAP 0F C8 + rd
+    { 0xCE, ENTRY_CopyBytes1 },                         // BSWAP 0F C8 + rd
+    { 0xCF, ENTRY_CopyBytes1 },                         // BSWAP 0F C8 + rd
+    { 0xD0, ENTRY_CopyBytes2Mod },                      // ADDSUBPS (untestd)
+    { 0xD1, ENTRY_CopyBytes2Mod },                      // PSRLW/r
+    { 0xD2, ENTRY_CopyBytes2Mod },                      // PSRLD/r
+    { 0xD3, ENTRY_CopyBytes2Mod },                      // PSRLQ/r
+    { 0xD4, ENTRY_CopyBytes2Mod },                      // PADDQ
+    { 0xD5, ENTRY_CopyBytes2Mod },                      // PMULLW/r
+    { 0xD6, ENTRY_CopyBytes2Mod },                      // MOVDQ2Q / MOVQ2DQ
+    { 0xD7, ENTRY_CopyBytes2Mod },                      // PMOVMSKB/r
+    { 0xD8, ENTRY_CopyBytes2Mod },                      // PSUBUSB/r
+    { 0xD9, ENTRY_CopyBytes2Mod },                      // PSUBUSW/r
+    { 0xDA, ENTRY_CopyBytes2Mod },                      // PMINUB/r
+    { 0xDB, ENTRY_CopyBytes2Mod },                      // PAND/r
+    { 0xDC, ENTRY_CopyBytes2Mod },                      // PADDUSB/r
+    { 0xDD, ENTRY_CopyBytes2Mod },                      // PADDUSW/r
+    { 0xDE, ENTRY_CopyBytes2Mod },                      // PMAXUB/r
+    { 0xDF, ENTRY_CopyBytes2Mod },                      // PANDN/r
+    { 0xE0, ENTRY_CopyBytes2Mod  },                     // PAVGB
+    { 0xE1, ENTRY_CopyBytes2Mod },                      // PSRAW/r
+    { 0xE2, ENTRY_CopyBytes2Mod },                      // PSRAD/r
+    { 0xE3, ENTRY_CopyBytes2Mod },                      // PAVGW
+    { 0xE4, ENTRY_CopyBytes2Mod },                      // PMULHUW/r
+    { 0xE5, ENTRY_CopyBytes2Mod },                      // PMULHW/r
+    { 0xE6, ENTRY_CopyBytes2Mod },                      // CTDQ2PD &
+    { 0xE7, ENTRY_CopyBytes2Mod },                      // MOVNTQ
+    { 0xE8, ENTRY_CopyBytes2Mod },                      // PSUBB/r
+    { 0xE9, ENTRY_CopyBytes2Mod },                      // PSUBW/r
+    { 0xEA, ENTRY_CopyBytes2Mod },                      // PMINSW/r
+    { 0xEB, ENTRY_CopyBytes2Mod },                      // POR/r
+    { 0xEC, ENTRY_CopyBytes2Mod },                      // PADDSB/r
+    { 0xED, ENTRY_CopyBytes2Mod },                      // PADDSW/r
+    { 0xEE, ENTRY_CopyBytes2Mod },                      // PMAXSW /r
+    { 0xEF, ENTRY_CopyBytes2Mod },                      // PXOR/r
+    { 0xF0, ENTRY_CopyBytes2Mod },                      // LDDQU
+    { 0xF1, ENTRY_CopyBytes2Mod },                      // PSLLW/r
+    { 0xF2, ENTRY_CopyBytes2Mod },                      // PSLLD/r
+    { 0xF3, ENTRY_CopyBytes2Mod },                      // PSLLQ/r
+    { 0xF4, ENTRY_CopyBytes2Mod },                      // PMULUDQ/r
+    { 0xF5, ENTRY_CopyBytes2Mod },                      // PMADDWD/r
+    { 0xF6, ENTRY_CopyBytes2Mod },                      // PSADBW/r
+    { 0xF7, ENTRY_CopyBytes2Mod },                      // MASKMOVQ
+    { 0xF8, ENTRY_CopyBytes2Mod },                      // PSUBB/r
+    { 0xF9, ENTRY_CopyBytes2Mod },                      // PSUBW/r
+    { 0xFA, ENTRY_CopyBytes2Mod },                      // PSUBD/r
+    { 0xFB, ENTRY_CopyBytes2Mod },                      // FSUBQ/r
+    { 0xFC, ENTRY_CopyBytes2Mod },                      // PADDB/r
+    { 0xFD, ENTRY_CopyBytes2Mod },                      // PADDW/r
+    { 0xFE, ENTRY_CopyBytes2Mod },                      // PADDD/r
+    { 0xFF, ENTRY_Invalid },                            // _FF
+    { 0, ENTRY_End },
+};
+
+BOOL CDetourDis::SanityCheckSystem()
+{
+    ULONG n = 0;
+    for (; n < 256; n++) {
+        REFCOPYENTRY pEntry = &s_rceCopyTable[n];
+
+        if (n != pEntry->nOpcode) {
+            ASSERT(n == pEntry->nOpcode);
+            return FALSE;
+        }
+    }
+    if (s_rceCopyTable[256].pfCopy != NULL) {
+        ASSERT(!"Missing end marker.");
+        return FALSE;
+    }
+
+    for (n = 0; n < 256; n++) {
+        REFCOPYENTRY pEntry = &s_rceCopyTable0F[n];
+
+        if (n != pEntry->nOpcode) {
+            ASSERT(n == pEntry->nOpcode);
+            return FALSE;
+        }
+    }
+    if (s_rceCopyTable0F[256].pfCopy != NULL) {
+        ASSERT(!"Missing end marker.");
+        return FALSE;
+    }
+
+    return TRUE;
+}
+#endif // defined(DETOURS_X64) || defined(DETOURS_X86)
+
+/////////////////////////////////////////////////////////// IA64 Disassembler.
+//
+#ifdef DETOURS_IA64
+
+#if defined(_IA64_) != defined(DETOURS_IA64_OFFLINE_LIBRARY)
+// Compile DETOUR_IA64_BUNDLE for native IA64 or cross, but not both -- we get duplicates otherwise.
+const DETOUR_IA64_BUNDLE::DETOUR_IA64_METADATA DETOUR_IA64_BUNDLE::s_rceCopyTable[33] =
+{
+    { 0x00, M_UNIT,      I_UNIT,      I_UNIT,   },
+    { 0x01, M_UNIT,      I_UNIT,      I_UNIT,   },
+    { 0x02, M_UNIT,      I_UNIT,      I_UNIT,   },
+    { 0x03, M_UNIT,      I_UNIT,      I_UNIT,   },
+    { 0x04, M_UNIT,      L_UNIT,      X_UNIT,   },
+    { 0x05, M_UNIT,      L_UNIT,      X_UNIT,   },
+    { 0x06, 0,           0,           0,        },
+    { 0x07, 0,           0,           0,        },
+    { 0x08, M_UNIT,      M_UNIT,      I_UNIT,   },
+    { 0x09, M_UNIT,      M_UNIT,      I_UNIT,   },
+    { 0x0a, M_UNIT,      M_UNIT,      I_UNIT,   },
+    { 0x0b, M_UNIT,      M_UNIT,      I_UNIT,   },
+    { 0x0c, M_UNIT,      F_UNIT,      I_UNIT,   },
+    { 0x0d, M_UNIT,      F_UNIT,      I_UNIT,   },
+    { 0x0e, M_UNIT,      M_UNIT,      F_UNIT,   },
+    { 0x0f, M_UNIT,      M_UNIT,      F_UNIT,   },
+    { 0x10, M_UNIT,      I_UNIT,      B_UNIT,   },
+    { 0x11, M_UNIT,      I_UNIT,      B_UNIT,   },
+    { 0x12, M_UNIT,      B_UNIT,      B_UNIT,   },
+    { 0x13, M_UNIT,      B_UNIT,      B_UNIT,   },
+    { 0x14, 0,           0,           0,        },
+    { 0x15, 0,           0,           0,        },
+    { 0x16, B_UNIT,      B_UNIT,      B_UNIT,   },
+    { 0x17, B_UNIT,      B_UNIT,      B_UNIT,   },
+    { 0x18, M_UNIT,      M_UNIT,      B_UNIT,   },
+    { 0x19, M_UNIT,      M_UNIT,      B_UNIT,   },
+    { 0x1a, 0,           0,           0,        },
+    { 0x1b, 0,           0,           0,        },
+    { 0x1c, M_UNIT,      F_UNIT,      B_UNIT,   },
+    { 0x1d, M_UNIT,      F_UNIT,      B_UNIT,   },
+    { 0x1e, 0,           0,           0,        },
+    { 0x1f, 0,           0,           0,        },
+    { 0x00, 0,           0,           0,        },
+};
+
+// 120 112 104 96 88 80 72 64 56 48 40 32 24 16  8  0
+//  f.  e.  d. c. b. a. 9. 8. 7. 6. 5. 4. 3. 2. 1. 0.
+
+//                                      00
+// f.e. d.c. b.a. 9.8. 7.6. 5.4. 3.2. 1.0.
+// 0000 0000 0000 0000 0000 0000 0000 001f : Template [4..0]
+// 0000 0000 0000 0000 0000 03ff ffff ffe0 : Zero [ 41..  5]
+// 0000 0000 0000 0000 0000 3c00 0000 0000 : Zero [ 45.. 42]
+// 0000 0000 0007 ffff ffff c000 0000 0000 : One  [ 82.. 46]
+// 0000 0000 0078 0000 0000 0000 0000 0000 : One  [ 86.. 83]
+// 0fff ffff ff80 0000 0000 0000 0000 0000 : Two  [123.. 87]
+// f000 0000 0000 0000 0000 0000 0000 0000 : Two  [127..124]
+BYTE DETOUR_IA64_BUNDLE::GetTemplate() const
+{
+    return (data[0] & 0x1f);
+}
+
+BYTE DETOUR_IA64_BUNDLE::GetInst0() const
+{
+    return ((data[5] & 0x3c) >> 2);
+}
+
+BYTE DETOUR_IA64_BUNDLE::GetInst1() const
+{
+    return ((data[10] & 0x78) >> 3);
+}
+
+BYTE DETOUR_IA64_BUNDLE::GetInst2() const
+{
+    return ((data[15] & 0xf0) >> 4);
+}
+
+BYTE DETOUR_IA64_BUNDLE::GetUnit(BYTE slot) const
+{
+    switch (slot) {
+    case 0: return GetUnit0();
+    case 1: return GetUnit1();
+    case 2: return GetUnit2();
+    }
+    __debugbreak();
+    return 0;
+}
+
+BYTE DETOUR_IA64_BUNDLE::GetUnit0() const
+{
+    return s_rceCopyTable[data[0] & 0x1f].nUnit0;
+}
+
+BYTE DETOUR_IA64_BUNDLE::GetUnit1() const
+{
+    return s_rceCopyTable[data[0] & 0x1f].nUnit1;
+}
+
+BYTE DETOUR_IA64_BUNDLE::GetUnit2() const
+{
+    return s_rceCopyTable[data[0] & 0x1f].nUnit2;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetData0() const
+{
+    return (((wide[0] & 0x000003ffffffffe0) >> 5));
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetData1() const
+{
+    return (((wide[0] & 0xffffc00000000000) >> 46) |
+            ((wide[1] & 0x000000000007ffff) << 18));
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetData2() const
+{
+    return (((wide[1] & 0x0fffffffff800000) >> 23));
+}
+
+VOID DETOUR_IA64_BUNDLE::SetInst(BYTE slot, BYTE nInst)
+{
+    switch (slot)
+    {
+    case 0: SetInst0(nInst); return;
+    case 1: SetInst1(nInst); return;
+    case 2: SetInst2(nInst); return;
+    }
+    __debugbreak();
+}
+
+VOID DETOUR_IA64_BUNDLE::SetInst0(BYTE nInst)
+{
+    data[5] = (data[5] & ~0x3c) | ((nInst << 2) & 0x3c);
+}
+
+VOID DETOUR_IA64_BUNDLE::SetInst1(BYTE nInst)
+{
+    data[10] = (data[10] & ~0x78) | ((nInst << 3) & 0x78);
+}
+
+VOID DETOUR_IA64_BUNDLE::SetInst2(BYTE nInst)
+{
+    data[15] = (data[15] & ~0xf0) | ((nInst << 4) & 0xf0);
+}
+
+VOID DETOUR_IA64_BUNDLE::SetData(BYTE slot, UINT64 nData)
+{
+    switch (slot)
+    {
+    case 0: SetData0(nData); return;
+    case 1: SetData1(nData); return;
+    case 2: SetData2(nData); return;
+    }
+    __debugbreak();
+}
+
+VOID DETOUR_IA64_BUNDLE::SetData0(UINT64 nData)
+{
+    wide[0] = (wide[0] & ~0x000003ffffffffe0) | (( nData << 5)  & 0x000003ffffffffe0);
+}
+
+VOID DETOUR_IA64_BUNDLE::SetData1(UINT64 nData)
+{
+    wide[0] = (wide[0] & ~0xffffc00000000000) | ((nData << 46) & 0xffffc00000000000);
+    wide[1] = (wide[1] & ~0x000000000007ffff) | ((nData >> 18) & 0x000000000007ffff);
+}
+
+VOID DETOUR_IA64_BUNDLE::SetData2(UINT64 nData)
+{
+    wide[1] = (wide[1] & ~0x0fffffffff800000) | ((nData << 23) & 0x0fffffffff800000);
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetInstruction(BYTE slot) const
+{
+    switch (slot) {
+    case 0: return GetInstruction0();
+    case 1: return GetInstruction1();
+    case 2: return GetInstruction2();
+    }
+    __debugbreak();
+    return 0;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetInstruction0() const
+{
+    // 41 bits from wide[0], skipping the 5 bit template.
+    return GetBits(wide[0], DETOUR_IA64_INSTRUCTION0_OFFSET, DETOUR_IA64_INSTRUCTION_SIZE);
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetInstruction1() const
+{
+    // 64-46 bits from wide[0] and the rest from wide[1].
+    const UINT count0 = 64 - DETOUR_IA64_INSTRUCTION1_OFFSET;
+    const UINT count1 = DETOUR_IA64_INSTRUCTION_SIZE - count0;
+    return GetBits(wide[0], DETOUR_IA64_INSTRUCTION1_OFFSET, count0) | (GetBits(wide[1], 0, count1) << count0);
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetInstruction2() const
+{
+    // Upper 41 bits of wide[1].
+    return wide[1] >> (64 - DETOUR_IA64_INSTRUCTION_SIZE);
+}
+
+void DETOUR_IA64_BUNDLE::SetInstruction(BYTE slot, UINT64 instruction)
+{
+    switch (slot) {
+    case 0: SetInstruction0(instruction); return;
+    case 1: SetInstruction1(instruction); return;
+    case 2: SetInstruction2(instruction); return;
+    }
+    __debugbreak();
+}
+
+void DETOUR_IA64_BUNDLE::SetInstruction0(UINT64 instruction)
+{
+    wide[0] = SetBits(wide[0], DETOUR_IA64_INSTRUCTION0_OFFSET, DETOUR_IA64_INSTRUCTION_SIZE, instruction);
+}
+
+void DETOUR_IA64_BUNDLE::SetInstruction1(UINT64 instruction)
+{
+    UINT const count0 = 64 - DETOUR_IA64_INSTRUCTION1_OFFSET;
+    UINT const count1 = DETOUR_IA64_INSTRUCTION_SIZE - count0;
+    UINT64 const wide0 = SetBits(wide[0], DETOUR_IA64_INSTRUCTION1_OFFSET, count0, instruction);
+    UINT64 const wide1 = SetBits(wide[1], 0, count1, instruction >> count0);
+    wide[0] = wide0;
+    wide[1] = wide1;
+}
+
+void DETOUR_IA64_BUNDLE::SetInstruction2(UINT64 instruction)
+{
+    // Set upper 41 bits of wide[1].
+    wide[1] = SetBits(wide[1], 64 - DETOUR_IA64_INSTRUCTION_SIZE, DETOUR_IA64_INSTRUCTION_SIZE, instruction);
+}
+
+UINT64 DETOUR_IA64_BUNDLE::SignExtend(UINT64 Value, UINT64 Offset)
+// This definition is from the IA64 manual.
+{
+    if ((Value & (((UINT64)1) << (Offset - 1))) == 0)
+        return Value;
+    UINT64 const new_value = Value | ((~(UINT64)0) << Offset);
+    return new_value;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetBits(UINT64 Value, UINT64 Offset, UINT64 Count)
+{
+    UINT64 const new_value = (Value >> Offset) & ~(~((UINT64)0) << Count);
+    return new_value;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::SetBits(UINT64 Value, UINT64 Offset, UINT64 Count, UINT64 Field)
+{
+    UINT64 const mask = (~((~(UINT64)0) << Count)) << Offset;
+    UINT64 const new_value = (Value & ~mask) | ((Field << Offset) & mask);
+    return new_value;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetOpcode(UINT64 instruction)
+// Get 4bit primary opcode.
+{
+    UINT64 const opcode = GetBits(instruction, DETOUR_IA64_INSTRUCTION_SIZE - 4, 4);
+    return opcode;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetX(UINT64 instruction)
+// Get 1bit opcode extension.
+{
+    UINT64 const x = GetBits(instruction, 33, 1);
+    return x;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetX3(UINT64 instruction)
+// Get 3bit opcode extension.
+{
+    UINT64 const x3 = GetBits(instruction, 33, 3);
+    return x3;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetX6(UINT64 instruction)
+// Get 6bit opcode extension.
+{
+    UINT64 const x6 = GetBits(instruction, 27, 6);
+    return x6;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetImm7a(UINT64 instruction)
+{
+    UINT64 const imm7a = GetBits(instruction, 6, 7);
+    return imm7a;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::SetImm7a(UINT64 instruction, UINT64 imm7a)
+{
+    UINT64 const new_instruction = SetBits(instruction, 6, 7, imm7a);
+    return new_instruction;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetImm13c(UINT64 instruction)
+{
+    UINT64 const imm13c = GetBits(instruction, 20, 13);
+    return imm13c;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::SetImm13c(UINT64 instruction, UINT64 imm13c)
+{
+    UINT64 const new_instruction = SetBits(instruction, 20, 13, imm13c);
+    return new_instruction;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetSignBit(UINT64 instruction)
+{
+    UINT64 const signBit = GetBits(instruction, 36, 1);
+    return signBit;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::SetSignBit(UINT64 instruction, UINT64 signBit)
+{
+    UINT64 const new_instruction = SetBits(instruction, 36, 1, signBit);
+    return new_instruction;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetImm20a(UINT64 instruction)
+{
+    UINT64 const imm20a = GetBits(instruction, 6, 20);
+    return imm20a;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::SetImm20a(UINT64 instruction, UINT64 imm20a)
+{
+    UINT64 const new_instruction = SetBits(instruction, 6, 20, imm20a);
+    return new_instruction;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetImm20b(UINT64 instruction)
+{
+    UINT64 const imm20b = GetBits(instruction, 13, 20);
+    return imm20b;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::SetImm20b(UINT64 instruction, UINT64 imm20b)
+{
+    UINT64 const new_instruction = SetBits(instruction, 13, 20, imm20b);
+    return new_instruction;
+}
+
+bool DETOUR_IA64_BUNDLE::RelocateInstruction(_Inout_ DETOUR_IA64_BUNDLE* pDst,
+                                             _In_ BYTE slot,
+                                             _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const
+/*
+    If pBundleExtra is provided and instruction is IP-relative,
+    this function relocates instruction to target pBundleExtra,
+    pBundleExtra is set to brl the original target, and return true.
+
+    [Not used] If pBundleExtra is not provided and instruction is IP-relative, return true.
+
+    Else return false.
+
+    The following IP-relative forms are recognized:
+        br and br.call
+        chk.s.m integer and float
+        chk.a.nc integer and float
+        chk.a.clr integer and float
+        chk.s.i
+        fchkf
+
+    Brl is handled elsewhere, because the code was previously written.
+
+    Branch prediction hints are not relocated.
+*/
+{
+    UINT64 const instruction = GetInstruction(slot);
+    UINT64 const opcode = GetOpcode(instruction);
+    size_t const dest = (size_t)pDst;
+    size_t const extra = (size_t)pBundleExtra;
+
+    switch (GetUnit(slot)) {
+    case F_UNIT:
+        // F14 fchkf
+        if (opcode == 0 && GetX(instruction) == 0 && GetX6(instruction) == 8) {
+            goto imm20a;
+        }
+        return false;
+
+    case M_UNIT:
+        // M20 x3 == 1 integer chk.s.m
+        // M21 x3 == 3 floating point chk.s
+        if (opcode == 1) {
+            UINT64 const x3 = GetX3(instruction);
+            if (x3 == 1 || x3 == 3) {
+                goto imm13_7;
+            }
+        }
+
+        // M22 x3 == 4 integer chk.a.nc
+        // M22 x3 == 5 integer chk.a.clr
+        // M23 x3 == 6 floating point chk.a.nc
+        // M23 x3 == 7 floating point chk.a.clr
+        if (opcode == 0) {
+            UINT64 const x3 = GetX3(instruction);
+            if (x3 == 4 || x3 == 5 || x3 == 6 || x3 == 7) {
+                goto imm20b;
+            }
+        }
+        return false;
+    case I_UNIT:
+        // I20
+        if (opcode == 0 && GetX3(instruction) == 1) { // chk.s.i
+            goto imm13_7;
+        }
+        return false;
+    case B_UNIT:
+        // B1 B2 B3
+        // 4 br
+        // 5 br.call
+        if (opcode == 4 || opcode == 5) {
+            goto imm20b;
+        }
+        return false;
+    }
+    return false;
+
+    UINT64 imm;
+    UINT64 new_instruction;
+
+imm13_7:
+    imm = SignExtend((GetSignBit(instruction) << 20) | (GetImm13c(instruction) << 7) | GetImm7a(instruction), 21) << 4;
+    new_instruction = SetSignBit(SetImm13c(SetImm7a(instruction, (extra - dest) >> 4), (extra - dest) >> 11), extra < dest);
+    goto set_brl;
+
+imm20a:
+    imm = SignExtend((GetSignBit(instruction) << 20) | GetImm20a(instruction), 21) << 4;
+    new_instruction = SetSignBit(SetImm20a(instruction, (extra - dest) >> 4), extra < dest);
+    goto set_brl;
+
+imm20b:
+    imm = SignExtend((GetSignBit(instruction) << 20) | GetImm20b(instruction), 21) << 4;
+    new_instruction = SetSignBit(SetImm20b(instruction, (extra - dest) >> 4), extra < dest);
+    goto set_brl;
+
+set_brl:
+    if (pBundleExtra != NULL) {
+        pDst->SetInstruction(slot, new_instruction);
+        pBundleExtra->SetBrl((size_t)this + imm);
+    }
+    return true;
+}
+
+UINT DETOUR_IA64_BUNDLE::RelocateBundle(_Inout_ DETOUR_IA64_BUNDLE* pDst,
+                                        _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const
+/*
+    Having already copied the bundle unchanged, then relocate its instructions one at a time.
+    Return how many extra bytes are required to relocate the bundle.
+*/
+{
+    UINT nExtraBytes = 0;
+    for (BYTE slot = 0; slot < DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE; ++slot) {
+        if (!RelocateInstruction(pDst, slot, pBundleExtra)) {
+            continue;
+        }
+        pBundleExtra -= !!pBundleExtra;
+        nExtraBytes += sizeof(DETOUR_IA64_BUNDLE);
+    }
+    return nExtraBytes;
+}
+
+BOOL DETOUR_IA64_BUNDLE::IsBrl() const
+{
+    // f.e. d.c. b.a. 9.8. 7.6. 5. 4. 3. 2. 1. 0.
+    // c000 0070 0000 0000 0000 00 01 00 00 00 05 : brl.sptk.few
+    // c8ff fff0 007f fff0 ffff 00 01 00 00 00 05 : brl.sptk.few
+    // c000 0048 0000 0000 0001 00 00 00 00 00 05 : brl.sptk.many
+    return ((wide[0] & 0x000000000000001e) == 0x0000000000000004 && // 4 or 5.
+            (wide[1] & 0xe000000000000000) == 0xc000000000000000);  // c or d.
+}
+
+VOID DETOUR_IA64_BUNDLE::SetBrl()
+{
+    wide[0] = 0x0000000100000005;   // few
+    //wide[0] = 0x0000000180000005; // many
+    wide[1] = 0xc000000800000000;
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetBrlImm() const
+{
+    return (
+            //          0x0000000000fffff0
+            ((wide[1] & 0x00fffff000000000) >> 32) |    // all 20 bits of imm20b.
+            //          0x000000ffff000000
+            ((wide[0] & 0xffff000000000000) >> 24) |    // bottom 16 bits of imm39.
+            //          0x7fffff0000000000
+            ((wide[1] & 0x00000000007fffff) << 40) |    // top 23 bits of imm39.
+            //          0x8000000000000000
+            ((wide[1] & 0x0800000000000000) <<  4)      // single bit of i.
+           );
+}
+
+VOID DETOUR_IA64_BUNDLE::SetBrlImm(UINT64 imm)
+{
+    wide[0] = ((wide[0] & ~0xffff000000000000) |
+               //      0xffff000000000000
+               ((imm & 0x000000ffff000000) << 24)       // bottom 16 bits of imm39.
+              );
+    wide[1] = ((wide[1] & ~0x08fffff0007fffff) |
+               //      0x00fffff000000000
+               ((imm & 0x0000000000fffff0) << 32) |     // all 20 bits of imm20b.
+               //      0x00000000007fffff
+               ((imm & 0x7fffff0000000000) >> 40) |     // top 23 bits of imm39.
+               //      0x0800000000000000
+               ((imm & 0x8000000000000000) >>  4)       // single bit of i.
+              );
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetBrlTarget() const
+{
+    return (UINT64)this + GetBrlImm();
+}
+
+VOID DETOUR_IA64_BUNDLE::SetBrl(UINT64 target)
+{
+    UINT64 imm = target - (UINT64)this;
+    SetBrl();
+    SetBrlImm(imm);
+}
+
+VOID DETOUR_IA64_BUNDLE::SetBrlTarget(UINT64 target)
+{
+    UINT64 imm = target - (UINT64)this;
+    SetBrlImm(imm);
+}
+
+BOOL DETOUR_IA64_BUNDLE::IsMovlGp() const
+{
+    // f.e. d.c. b.a. 9.8. 7.6. 5.4. 3.2. 1.0.
+    // 6fff f7f0 207f ffff ffff c001 0000 0004
+    // 6000 0000 2000 0000 0000 0001 0000 0004
+    return ((wide[0] & 0x00003ffffffffffe) == 0x0000000100000004 &&
+            (wide[1] & 0xf000080fff800000) == 0x6000000020000000);
+}
+
+UINT64 DETOUR_IA64_BUNDLE::GetMovlGp() const
+{
+    UINT64 raw = (
+                  //          0x0000000000000070
+                  ((wide[1] & 0x000007f000000000) >> 36) |
+                  //          0x000000000000ff80
+                  ((wide[1] & 0x07fc000000000000) >> 43) |
+                  //          0x00000000001f0000
+                  ((wide[1] & 0x0003e00000000000) >> 29) |
+                  //          0x0000000000200000
+                  ((wide[1] & 0x0000100000000000) >> 23) |
+                  //          0x000000ffffc00000
+                  ((wide[0] & 0xffffc00000000000) >> 24) |
+                  //          0x7fffff0000000000
+                  ((wide[1] & 0x00000000007fffff) << 40) |
+                  //          0x8000000000000000
+                  ((wide[1] & 0x0800000000000000) <<  4)
+                 );
+
+    return (INT64)raw;
+}
+
+VOID DETOUR_IA64_BUNDLE::SetMovlGp(UINT64 gp)
+{
+    UINT64 raw = (UINT64)gp;
+
+    wide[0] = (0x0000000100000005 |
+               //      0xffffc00000000000
+               ((raw & 0x000000ffffc00000) << 24)
+              );
+    wide[1] = (
+               0x6000000020000000 |
+               //      0x0000070000000000
+               ((raw & 0x0000000000000070) << 36) |
+               //      0x07fc000000000000
+               ((raw & 0x000000000000ff80) << 43) |
+               //      0x0003e00000000000
+               ((raw & 0x00000000001f0000) << 29) |
+               //      0x0000100000000000
+               ((raw & 0x0000000000200000) << 23) |
+               //      0x00000000007fffff
+               ((raw & 0x7fffff0000000000) >> 40) |
+               //      0x0800000000000000
+               ((raw & 0x8000000000000000) >>  4)
+              );
+}
+
+UINT DETOUR_IA64_BUNDLE::Copy(_Out_ DETOUR_IA64_BUNDLE *pDst,
+                              _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const
+{
+    // Copy the bytes unchanged.
+
+#pragma warning(suppress:6001) // using uninitialized *pDst
+    pDst->wide[0] = wide[0];
+    pDst->wide[1] = wide[1];
+
+    // Relocate if necessary.
+
+    UINT nExtraBytes = RelocateBundle(pDst, pBundleExtra);
+
+    if (GetUnit1() == L_UNIT && IsBrl()) {
+        pDst->SetBrlTarget(GetBrlTarget());
+    }
+
+    return nExtraBytes;
+}
+
+BOOL DETOUR_IA64_BUNDLE::SetNop(BYTE slot)
+{
+    switch (GetUnit(slot)) {
+      case I_UNIT:
+      case M_UNIT:
+      case F_UNIT:
+        SetInst(slot, 0);
+        SetData(slot, 0x8000000);
+        return true;
+      case B_UNIT:
+        SetInst(slot, 2);
+        SetData(slot, 0);
+        return true;
+    }
+    DebugBreak();
+    return false;
+}
+
+BOOL DETOUR_IA64_BUNDLE::SetNop0()
+{
+    return SetNop(0);
+}
+
+BOOL DETOUR_IA64_BUNDLE::SetNop1()
+{
+    return SetNop(1);
+}
+
+BOOL DETOUR_IA64_BUNDLE::SetNop2()
+{
+    return SetNop(2);
+}
+
+VOID DETOUR_IA64_BUNDLE::SetStop()
+{
+    data[0] |= 0x01;
+}
+
+#endif // DETOURS_IA64
+
+PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst,
+                                   _Inout_opt_ PVOID *ppDstPool,
+                                   _In_ PVOID pSrc,
+                                   _Out_opt_ PVOID *ppTarget,
+                                   _Out_opt_ LONG *plExtra)
+{
+    LONG nExtra;
+    DETOUR_IA64_BUNDLE bExtra;
+    DETOUR_IA64_BUNDLE *pbSrc = (DETOUR_IA64_BUNDLE *)pSrc;
+    DETOUR_IA64_BUNDLE *pbDst = pDst ? (DETOUR_IA64_BUNDLE *)pDst : &bExtra;
+
+    plExtra = plExtra ? plExtra : &nExtra;
+    *plExtra = 0;
+
+    if (ppTarget != NULL) {
+        if (pbSrc->IsBrl()) {
+            *ppTarget = (PVOID)pbSrc->GetBrlTarget();
+        }
+        else {
+            *ppTarget = DETOUR_INSTRUCTION_TARGET_NONE;
+        }
+    }
+    *plExtra = (LONG)pbSrc->Copy(pbDst, ppDstPool ? ((DETOUR_IA64_BUNDLE*)*ppDstPool) - 1 : (DETOUR_IA64_BUNDLE*)NULL);
+    return pbSrc + 1;
+}
+
+#endif // DETOURS_IA64
+
+#ifdef DETOURS_ARM
+
+#define DETOURS_PFUNC_TO_PBYTE(p)  ((PBYTE)(((ULONG_PTR)(p)) & ~(ULONG_PTR)1))
+#define DETOURS_PBYTE_TO_PFUNC(p)  ((PBYTE)(((ULONG_PTR)(p)) | (ULONG_PTR)1))
+
+#define c_PCAdjust  4       // The PC value of an instruction is the PC address plus 4.
+#define c_PC        15      // The register number for the Program Counter
+#define c_LR        14      // The register number for the Link Register
+#define c_SP        13      // The register number for the Stack Pointer
+#define c_NOP       0xbf00  // A nop instruction
+#define c_BREAK     0xdefe  // A nop instruction
+
+class CDetourDis
+{
+  public:
+    CDetourDis();
+
+    PBYTE   CopyInstruction(PBYTE pDst,
+                            PBYTE *ppDstPool,
+                            PBYTE pSrc,
+                            PBYTE *ppTarget,
+                            LONG *plExtra);
+
+  public:
+    typedef BYTE (CDetourDis::* COPYFUNC)(PBYTE pbDst, PBYTE pbSrc);
+
+    struct COPYENTRY {
+        USHORT      nOpcode;
+        COPYFUNC    pfCopy;
+    };
+
+    typedef const COPYENTRY * REFCOPYENTRY;
+
+    struct Branch5
+    {
+        DWORD Register : 3;
+        DWORD Imm5 : 5;
+        DWORD Padding : 1;
+        DWORD I : 1;
+        DWORD OpCode : 6;
+    };
+
+    struct Branch5Target
+    {
+        DWORD Padding : 1;
+        DWORD Imm5 : 5;
+        DWORD I : 1;
+        DWORD Padding2 : 25;
+    };
+
+    struct Branch8
+    {
+        DWORD Imm8 : 8;
+        DWORD Condition : 4;
+        DWORD OpCode : 4;
+    };
+
+    struct Branch8Target
+    {
+        DWORD Padding : 1;
+        DWORD Imm8 : 8;
+        DWORD Padding2 : 23;
+    };
+
+    struct Branch11
+    {
+        DWORD Imm11 : 11;
+        DWORD OpCode : 5;
+    };
+
+    struct Branch11Target
+    {
+        DWORD Padding : 1;
+        DWORD Imm11 : 11;
+        DWORD Padding2 : 20;
+    };
+
+    struct Branch20
+    {
+        DWORD Imm11 : 11;
+        DWORD J2 : 1;
+        DWORD IT : 1;
+        DWORD J1 : 1;
+        DWORD Other : 2;
+        DWORD Imm6 : 6;
+        DWORD Condition : 4;
+        DWORD Sign : 1;
+        DWORD OpCode : 5;
+    };
+
+    struct Branch20Target
+    {
+        DWORD Padding : 1;
+        DWORD Imm11 : 11;
+        DWORD Imm6 : 6;
+        DWORD J1 : 1;
+        DWORD J2 : 1;
+        DWORD Sign : 1;
+        INT32 Padding2 : 11;
+    };
+
+    struct Branch24
+    {
+        DWORD Imm11             : 11;
+        DWORD J2                : 1;
+        DWORD InstructionSet    : 1;
+        DWORD J1                : 1;
+        DWORD Link              : 1;
+        DWORD Branch            : 1;
+        DWORD Imm10             : 10;
+        DWORD Sign              : 1;
+        DWORD OpCode            : 5;
+    };
+
+    struct Branch24Target
+    {
+        DWORD Padding : 1;
+        DWORD Imm11 : 11;
+        DWORD Imm10 : 10;
+        DWORD I2 : 1;
+        DWORD I1 : 1;
+        DWORD Sign : 1;
+        INT32 Padding2 : 7;
+    };
+
+    struct LiteralLoad8
+    {
+        DWORD Imm8 : 8;
+        DWORD Register : 3;
+        DWORD OpCode : 5;
+    };
+
+    struct LiteralLoad8Target
+    {
+        DWORD Padding : 2;
+        DWORD Imm8 : 8;
+        DWORD Padding2 : 22;
+    };
+
+    struct LiteralLoad12
+    {
+        DWORD Imm12 : 12;
+        DWORD Register : 4;
+        DWORD OpCodeSuffix : 7;
+        DWORD Add : 1;
+        DWORD OpCodePrefix : 8;
+    };
+
+    struct LiteralLoad12Target
+    {
+        DWORD Imm12 : 12;
+        DWORD Padding : 20;
+    };
+
+    struct ImmediateRegisterLoad32
+    {
+        DWORD Imm12 : 12;
+        DWORD DestinationRegister : 4;
+        DWORD SourceRegister: 4;
+        DWORD OpCode : 12;
+    };
+
+    struct ImmediateRegisterLoad16
+    {
+        DWORD DestinationRegister : 3;
+        DWORD SourceRegister: 3;
+        DWORD OpCode : 10;
+    };
+
+    struct TableBranch
+    {
+        DWORD IndexRegister : 4;
+        DWORD HalfWord : 1;
+        DWORD OpCodeSuffix : 11;
+        DWORD BaseRegister : 4;
+        DWORD OpCodePrefix : 12;
+    };
+
+    struct Shift
+    {
+        DWORD Imm2 : 2;
+        DWORD Imm3 : 3;
+    };
+
+    struct Add32
+    {
+        DWORD SecondOperandRegister : 4;
+        DWORD Type : 2;
+        DWORD Imm2 : 2;
+        DWORD DestinationRegister : 4;
+        DWORD Imm3 : 3;
+        DWORD Padding : 1;
+        DWORD FirstOperandRegister : 4;
+        DWORD SetFlags : 1;
+        DWORD OpCode : 11;
+    };
+
+    struct LogicalShiftLeft32
+    {
+        DWORD SourceRegister : 4;
+        DWORD Padding : 2;
+        DWORD Imm2 : 2;
+        DWORD DestinationRegister : 4;
+        DWORD Imm3 : 3;
+        DWORD Padding2 : 5;
+        DWORD SetFlags : 1;
+        DWORD OpCode : 11;
+    };
+
+    struct StoreImmediate12
+    {
+        DWORD Imm12 : 12;
+        DWORD SourceRegister : 4;
+        DWORD BaseRegister : 4;
+        DWORD OpCode : 12;
+    };
+
+  protected:
+    BYTE    PureCopy16(BYTE* pSource, BYTE* pDest);
+    BYTE    PureCopy32(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyMiscellaneous16(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyConditionalBranchOrOther16(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyUnConditionalBranch16(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyLiteralLoad16(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyBranchExchangeOrDataProcessing16(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyBranch24(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyBranchOrMiscellaneous32(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyLiteralLoad32(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyLoadAndStoreSingle(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyLoadAndStoreMultipleAndSRS(BYTE* pSource, BYTE* pDest);
+    BYTE    CopyTableBranch(BYTE* pSource, BYTE* pDest);
+    BYTE    BeginCopy32(BYTE* pSource, BYTE* pDest);
+
+    LONG    DecodeBranch5(ULONG opcode);
+    USHORT  EncodeBranch5(ULONG originalOpCode, LONG delta);
+    LONG    DecodeBranch8(ULONG opcode);
+    USHORT  EncodeBranch8(ULONG originalOpCode, LONG delta);
+    LONG    DecodeBranch11(ULONG opcode);
+    USHORT  EncodeBranch11(ULONG originalOpCode, LONG delta);
+    BYTE    EmitBranch11(PUSHORT& pDest, LONG relativeAddress);
+    LONG    DecodeBranch20(ULONG opcode);
+    ULONG   EncodeBranch20(ULONG originalOpCode, LONG delta);
+    LONG    DecodeBranch24(ULONG opcode, BOOL& fLink);
+    ULONG   EncodeBranch24(ULONG originalOpCode, LONG delta, BOOL fLink);
+    LONG    DecodeLiteralLoad8(ULONG instruction);
+    LONG    DecodeLiteralLoad12(ULONG instruction);
+    BYTE    EmitLiteralLoad8(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral);
+    BYTE    EmitLiteralLoad12(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral);
+    BYTE    EmitImmediateRegisterLoad32(PUSHORT& pDest, BYTE reg);
+    BYTE    EmitImmediateRegisterLoad16(PUSHORT& pDest, BYTE reg);
+    BYTE    EmitLongLiteralLoad(PUSHORT& pDest, BYTE reg, PVOID pTarget);
+    BYTE    EmitLongBranch(PUSHORT& pDest, PVOID pTarget);
+    USHORT  CalculateExtra(BYTE sourceLength, BYTE* pDestStart, BYTE* pDestEnd);
+
+  protected:
+    ULONG GetLongInstruction(BYTE* pSource)
+    {
+        return (((PUSHORT)pSource)[0] << 16) | (((PUSHORT)pSource)[1]);
+    }
+
+    BYTE EmitLongInstruction(PUSHORT& pDstInst, ULONG instruction)
+    {
+        *pDstInst++ = (USHORT)(instruction >> 16);
+        *pDstInst++ = (USHORT)instruction;
+        return sizeof(ULONG);
+    }
+
+    BYTE EmitShortInstruction(PUSHORT& pDstInst, USHORT instruction)
+    {
+        *pDstInst++ = instruction;
+        return sizeof(USHORT);
+    }
+
+    PBYTE Align4(PBYTE pValue)
+    {
+        return (PBYTE)(((size_t)pValue) & ~(ULONG)3u);
+    }
+
+    PBYTE CalculateTarget(PBYTE pSource, LONG delta)
+    {
+        return (pSource + delta + c_PCAdjust);
+    }
+
+    LONG CalculateNewDelta(PBYTE pTarget, BYTE* pDest)
+    {
+        return (LONG)(pTarget - (pDest + c_PCAdjust));
+    }
+
+    BYTE    EmitAdd32(PUSHORT& pDstInst, BYTE op1Reg, BYTE op2Reg, BYTE dstReg, BYTE shiftAmount)
+    {
+        Shift& shift = (Shift&)(shiftAmount);
+        const BYTE shiftType = 0x00; // LSL
+        Add32 add = { op2Reg, shiftType, shift.Imm2, dstReg, shift.Imm3,
+                      0x0, op1Reg, 0x0, 0x758 };
+        return EmitLongInstruction(pDstInst, (ULONG&)add);
+    }
+
+    BYTE    EmitLogicalShiftLeft32(PUSHORT& pDstInst, BYTE srcReg, BYTE dstReg, BYTE shiftAmount)
+    {
+        Shift& shift = (Shift&)(shiftAmount);
+        LogicalShiftLeft32 shiftLeft = { srcReg, 0x00, shift.Imm2, dstReg, shift.Imm3, 0x1E,
+                                         0x00, 0x752 };
+        return EmitLongInstruction(pDstInst, (ULONG&)shiftLeft);
+    }
+
+    BYTE    EmitStoreImmediate12(PUSHORT& pDstInst, BYTE srcReg, BYTE baseReg, USHORT offset)
+    {
+        StoreImmediate12 store = { offset, srcReg, baseReg, 0xF8C };
+        return EmitLongInstruction(pDstInst, (ULONG&)store);
+    }
+
+  protected:
+    PBYTE   m_pbTarget;
+    PBYTE   m_pbPool;
+    LONG    m_lExtra;
+
+    BYTE    m_rbScratchDst[64]; // matches or exceeds rbCode
+
+    static const COPYENTRY s_rceCopyTable[33];
+};
+
+LONG CDetourDis::DecodeBranch5(ULONG opcode)
+{
+    Branch5& branch = (Branch5&)(opcode);
+
+    Branch5Target target;
+    ZeroMemory(&target, sizeof(target));
+    target.Imm5 = branch.Imm5;
+    target.I = branch.I;
+
+    // Return zero-extended value
+    return (LONG&)target;
+}
+
+USHORT CDetourDis::EncodeBranch5(ULONG originalOpCode, LONG delta)
+{
+    // Too large for a 5 bit branch (5 bit branches can be up to 7 bits due to I and the trailing 0)
+    if (delta < 0 || delta > 0x7F) {
+        return 0;
+    }
+
+    Branch5& branch = (Branch5&)(originalOpCode);
+    Branch5Target& target = (Branch5Target&)(delta);
+
+    branch.Imm5 = target.Imm5;
+    branch.I = target.I;
+
+    return (USHORT&)branch;
+}
+
+LONG CDetourDis::DecodeBranch8(ULONG opcode)
+{
+    Branch8& branch = (Branch8&)(opcode);
+
+    Branch8Target target;
+    ZeroMemory(&target, sizeof(target));
+    target.Imm8 = branch.Imm8;
+
+    // Return sign extended value
+    return (((LONG&)target) << 23) >> 23;
+}
+
+USHORT CDetourDis::EncodeBranch8(ULONG originalOpCode, LONG delta)
+{
+    // Too large for 8 bit branch (8 bit branches can be up to 9 bits due to the trailing 0)
+    if (delta < (-(int)0x100) || delta > 0xFF) {
+        return 0;
+    }
+
+    Branch8& branch = (Branch8&)(originalOpCode);
+    Branch8Target& target = (Branch8Target&)(delta);
+
+    branch.Imm8 = target.Imm8;
+
+    return (USHORT&)branch;
+}
+
+LONG CDetourDis::DecodeBranch11(ULONG opcode)
+{
+    Branch11& branch = (Branch11&)(opcode);
+
+    Branch11Target target;
+    ZeroMemory(&target, sizeof(target));
+    target.Imm11 = branch.Imm11;
+
+    // Return sign extended value
+    return (((LONG&)target) << 20) >> 20;
+}
+
+USHORT CDetourDis::EncodeBranch11(ULONG originalOpCode, LONG delta)
+{
+    // Too large for an 11 bit branch (11 bit branches can be up to 12 bits due to the trailing 0)
+    if (delta < (-(int)0x800) || delta > 0x7FF) {
+        return 0;
+    }
+
+    Branch11& branch = (Branch11&)(originalOpCode);
+    Branch11Target& target = (Branch11Target&)(delta);
+
+    branch.Imm11 = target.Imm11;
+
+    return (USHORT&)branch;
+}
+
+BYTE CDetourDis::EmitBranch11(PUSHORT& pDest, LONG relativeAddress)
+{
+    Branch11Target& target = (Branch11Target&)(relativeAddress);
+    Branch11 branch11 = { target.Imm11, 0x1C };
+
+    *pDest++ = (USHORT&)branch11;
+    return sizeof(USHORT);
+}
+
+LONG CDetourDis::DecodeBranch20(ULONG opcode)
+{
+    Branch20& branch = (Branch20&)(opcode);
+
+    Branch20Target target;
+    ZeroMemory(&target, sizeof(target));
+    target.Imm11 = branch.Imm11;
+    target.Imm6 = branch.Imm6;
+    target.Sign = branch.Sign;
+    target.J1 = branch.J1;
+    target.J2 = branch.J2;
+
+    // Sign extend
+    if (target.Sign) {
+        target.Padding2 = -1;
+    }
+
+    return (LONG&)target;
+}
+
+ULONG CDetourDis::EncodeBranch20(ULONG originalOpCode, LONG delta)
+{
+    // Too large for 20 bit branch (20 bit branches can be up to 21 bits due to the trailing 0)
+    if (delta < (-(int)0x100000) || delta > 0xFFFFF) {
+        return 0;
+    }
+
+    Branch20& branch = (Branch20&)(originalOpCode);
+    Branch20Target& target = (Branch20Target&)(delta);
+
+    branch.Imm11 = target.Imm11;
+    branch.Imm6 = target.Imm6;
+    branch.Sign = target.Sign;
+    branch.J1 = target.J1;
+    branch.J2 = target.J2;
+
+    return (ULONG&)branch;
+}
+
+LONG CDetourDis::DecodeBranch24(ULONG opcode, BOOL& fLink)
+{
+    Branch24& branch = (Branch24&)(opcode);
+
+    Branch24Target target;
+    ZeroMemory(&target, sizeof(target));
+    target.Imm11 = branch.Imm11;
+    target.Imm10 = branch.Imm10;
+    target.Sign = branch.Sign;
+    target.I1 = ~(branch.J1 ^ target.Sign);
+    target.I2 = ~(branch.J2 ^ target.Sign);
+    fLink = branch.Link;
+
+    // Sign extend
+    if (target.Sign) {
+        target.Padding2 = -1;
+    }
+
+    return (LONG&)target;
+}
+
+ULONG CDetourDis::EncodeBranch24(ULONG originalOpCode, LONG delta, BOOL fLink)
+{
+    // Too large for 24 bit branch (24 bit branches can be up to 25 bits due to the trailing 0)
+    if (delta < static_cast<int>(0xFF000000) || delta > static_cast<int>(0xFFFFFF)) {
+        return 0;
+    }
+
+    Branch24& branch = (Branch24&)(originalOpCode);
+    Branch24Target& target = (Branch24Target&)(delta);
+
+    branch.Imm11 = target.Imm11;
+    branch.Imm10 = target.Imm10;
+    branch.Link = fLink;
+    branch.Sign = target.Sign;
+    branch.J1 = ~(target.I1 ^ branch.Sign);
+    branch.J2 = ~(target.I2 ^ branch.Sign);
+
+    return (ULONG&)branch;
+}
+
+LONG CDetourDis::DecodeLiteralLoad8(ULONG instruction)
+{
+    LiteralLoad8& load = (LiteralLoad8&)(instruction);
+
+    LiteralLoad8Target target;
+    ZeroMemory(&target, sizeof(target));
+    target.Imm8 = load.Imm8;
+
+    return (LONG&)target;
+}
+
+BYTE CDetourDis::EmitLiteralLoad8(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral)
+{
+    // Note: We add 2 (which gets rounded down) because literals must be 32-bit
+    //       aligned, but the ldr can be 16-bit aligned.
+    LONG newDelta = CalculateNewDelta((PBYTE)pLiteral + 2, (PBYTE)pDest);
+    LONG relative = ((newDelta > 0 ? newDelta : -newDelta) & 0x3FF);
+
+    LiteralLoad8Target& target = (LiteralLoad8Target&)(relative);
+    LiteralLoad8 load = { target.Imm8, targetRegister, 0x9 };
+
+    return EmitShortInstruction(pDest, (USHORT&)load);
+}
+
+LONG CDetourDis::DecodeLiteralLoad12(ULONG instruction)
+{
+    LiteralLoad12& load = (LiteralLoad12&)(instruction);
+
+    LiteralLoad12Target target;
+    ZeroMemory(&target, sizeof(target));
+    target.Imm12 = load.Imm12;
+
+    return (LONG&)target;
+}
+
+BYTE CDetourDis::EmitLiteralLoad12(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral)
+{
+    // Note: We add 2 (which gets rounded down) because literals must be 32-bit
+    //       aligned, but the ldr can be 16-bit aligned.
+    LONG newDelta = CalculateNewDelta((PBYTE)pLiteral + 2, (PBYTE)pDest);
+    LONG relative = ((newDelta > 0 ? newDelta : -newDelta) & 0xFFF);
+
+    LiteralLoad12Target& target = (LiteralLoad12Target&)(relative);
+    target.Imm12 -= target.Imm12 & 3;
+    LiteralLoad12 load = { target.Imm12, targetRegister, 0x5F, (DWORD)(newDelta > 0),  0xF8 };
+
+    return EmitLongInstruction(pDest, (ULONG&)load);
+}
+
+BYTE CDetourDis::EmitImmediateRegisterLoad32(PUSHORT& pDest, BYTE reg)
+{
+    ImmediateRegisterLoad32 load = { 0, reg, reg, 0xF8D };
+    return EmitLongInstruction(pDest, (ULONG&)load);
+}
+
+BYTE CDetourDis::EmitImmediateRegisterLoad16(PUSHORT& pDest, BYTE reg)
+{
+    ImmediateRegisterLoad16 load = { reg, reg, 0x680 >> 2 };
+    return EmitShortInstruction(pDest, (USHORT&)load);
+}
+
+BYTE CDetourDis::EmitLongLiteralLoad(PUSHORT& pDest, BYTE targetRegister, PVOID pTarget)
+{
+    *--((PULONG&)m_pbPool) = (ULONG)(size_t)pTarget;
+
+    // ldr rn, target.
+    BYTE size = EmitLiteralLoad12(pDest, targetRegister, m_pbPool);
+
+    // This only makes sense if targetRegister != PC;
+    // otherwise, we would have branched with the previous instruction anyway
+    if (targetRegister != c_PC) {
+        // ldr rn, [rn]
+        if (targetRegister <= 7) {
+            size = (BYTE)(size + EmitImmediateRegisterLoad16(pDest, targetRegister));
+        }
+        else {
+            size = (BYTE)(size + EmitImmediateRegisterLoad32(pDest, targetRegister));
+        }
+    }
+
+    return size;
+}
+
+BYTE CDetourDis::EmitLongBranch(PUSHORT& pDest, PVOID pTarget)
+{
+    // Emit a long literal load into PC
+    BYTE size = EmitLongLiteralLoad(pDest, c_PC, DETOURS_PBYTE_TO_PFUNC(pTarget));
+    return size;
+}
+
+BYTE CDetourDis::PureCopy16(BYTE* pSource, BYTE* pDest)
+{
+    *(USHORT *)pDest = *(USHORT *)pSource;
+    return sizeof(USHORT);
+}
+
+BYTE CDetourDis::PureCopy32(BYTE* pSource, BYTE* pDest)
+{
+    *(UNALIGNED ULONG *)pDest = *(UNALIGNED ULONG*)pSource;
+    return sizeof(DWORD);
+}
+
+USHORT CDetourDis::CalculateExtra(BYTE sourceLength, BYTE* pDestStart, BYTE* pDestEnd)
+{
+    ULONG destinationLength = (ULONG)(pDestEnd - pDestStart);
+    return static_cast<USHORT>((destinationLength > sourceLength) ? (destinationLength - sourceLength) : 0);
+}
+
+BYTE CDetourDis::CopyMiscellaneous16(BYTE* pSource, BYTE* pDest)
+{
+    USHORT instruction = *(PUSHORT)(pSource);
+
+    // Compare and branch imm5 (CBZ, CBNZ)
+    if ((instruction & 0x100) && !(instruction & 0x400)) { // (1011x0x1xxxxxxxx)
+        LONG oldDelta = DecodeBranch5(instruction);
+        PBYTE pTarget = CalculateTarget(pSource, oldDelta);
+        m_pbTarget = pTarget;
+
+        LONG newDelta = CalculateNewDelta(pTarget, pDest);
+        instruction = EncodeBranch5(instruction, newDelta);
+
+        if (instruction) {
+            // Copy the 16 bit instruction over
+            *(PUSHORT)(pDest) = instruction;
+            return sizeof(USHORT); // The source instruction was 16 bits
+        }
+
+        // If that fails, re-encode with 'conditional branch' logic, without using the condition flags
+        // For example, cbz r2,+0x56 (0x90432) becomes:
+        //
+        //  001df73a b92a     cbnz        r2,001df748
+        //  001df73c e002     b           001df744
+        //  001df73e bf00     nop
+        //  001df740 0432     dc.h        0432
+        //  001df742 0009     dc.h        0009
+        //  001df744 f85ff008 ldr         pc,=0x90432
+        //
+
+        // Store where we will be writing our conditional branch, and move past it so we can emit a long branch
+        PUSHORT pDstInst = (PUSHORT)(pDest);
+        PUSHORT pConditionalBranchInstruction = pDstInst++;
+
+        // Emit the long branch instruction
+        BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget);
+
+        // Invert the CBZ/CBNZ instruction to move past our 'long branch' if the inverse comparison succeeds
+        // Write the CBZ/CBNZ instruction *before* the long branch we emitted above
+        // This had to be done out of order, since the size of a long branch can vary due to alignment restrictions
+        instruction = EncodeBranch5(*(PUSHORT)(pSource), longBranchSize - c_PCAdjust + sizeof(USHORT));
+        Branch5& branch = (Branch5&)(instruction);
+        branch.OpCode = (branch.OpCode & 0x02) ? 0x2C : 0x2E; // Invert the CBZ/CBNZ comparison
+        *pConditionalBranchInstruction = instruction;
+
+        // Compute the extra space needed for the branch sequence
+        m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst));
+        return sizeof(USHORT); // The source instruction was 16 bits
+    }
+
+    // If-Then Instruction (IT)
+    if ((instruction >> 8 == 0xBF) && (instruction & 0xF)) { //(10111111xxxx(mask != 0b0000))
+        // ToDo: Implement IT handler
+        ASSERT(false);
+        return sizeof(USHORT);
+    }
+
+    // ADD/SUB, SXTH, SXTB, UXTH, UXTB, CBZ, CBNZ, PUSH, POP, REV, REV15, REVSH, NOP, YIELD, WFE, WFI, SEV, etc.
+    return PureCopy16(pSource, pDest);
+}
+
+BYTE CDetourDis::CopyConditionalBranchOrOther16(BYTE* pSource, BYTE* pDest)
+{
+    USHORT instruction = *(PUSHORT)(pSource);
+
+    // Could be a conditional branch, an Undefined instruction or a Service System Call
+    // Only the former needs special logic
+    if ((instruction & 0xE00) != 0xE00) { // 1101(!=111x)xxxxxxxx
+        LONG oldDelta = DecodeBranch8(instruction);
+        PBYTE pTarget = CalculateTarget(pSource, oldDelta);
+        m_pbTarget = pTarget;
+
+        LONG newDelta = CalculateNewDelta(pTarget, pDest);
+        instruction = EncodeBranch8(instruction, newDelta);
+        if (instruction) {
+            // Copy the 16 bit instruction over
+            *(PUSHORT)(pDest) = instruction;
+            return sizeof(USHORT); // The source instruction was 16 bits
+        }
+
+        // If that fails, re-encode as a sequence of branches
+        // For example, bne +0x6E (0x90452) becomes:
+        //
+        // 001df758 d100     bne         001df75c
+        // 001df75a e005     b           001df768
+        // 001df75c e002     b           001df764
+        // 001df75e bf00     nop
+        // 001df760 0452     dc.h        0452
+        // 001df762 0009     dc.h        0009
+        // 001df764 f85ff008 ldr         pc,=0x90452
+        //
+
+        // First, reuse the existing conditional branch to, if successful, branch down to a 'long branch' that we will emit below
+        USHORT newInstruction = EncodeBranch8(*(PUSHORT)(pSource), 0); // Due to the size of c_PCAdjust a zero-length branch moves 4 bytes forward, past the following unconditional branch
+        ASSERT(newInstruction);
+        PUSHORT pDstInst = (PUSHORT)(pDest);
+        *pDstInst++ = newInstruction;
+
+        // Next, prepare to insert an unconditional branch that will be hit if the condition above is not met.  This branch will branch over the following 'long branch'
+        // We can't actually encode this branch yet though, because 'long branches' can vary in size
+        PUSHORT pUnconditionalBranchInstruction = pDstInst++;
+
+        // Then, emit a 'long branch' that will be hit if the original condition is met
+        BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget);
+
+        // Finally, encode and emit the unconditional branch that will be used to branch past the 'long branch' if the initial condition was not met
+        Branch11 branch11 = { 0x00, 0x1C };
+        newInstruction = EncodeBranch11(*(DWORD*)(&branch11), longBranchSize - c_PCAdjust + sizeof(USHORT));
+        ASSERT(newInstruction);
+        *pUnconditionalBranchInstruction = newInstruction;
+
+        // Compute the extra space needed for the branch sequence
+        m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst));
+        return sizeof(USHORT); // The source instruction was 16 bits
+    }
+
+    return PureCopy16(pSource, pDest);
+}
+
+BYTE CDetourDis::CopyUnConditionalBranch16(BYTE* pSource, BYTE* pDest)
+{
+    ULONG instruction = *(PUSHORT)(pSource);
+
+    LONG oldDelta = DecodeBranch11(instruction);
+    PBYTE pTarget = CalculateTarget(pSource, oldDelta);
+    m_pbTarget = pTarget;
+
+    LONG newDelta = CalculateNewDelta(pTarget, pDest);
+    instruction = EncodeBranch11(instruction, newDelta);
+    if (instruction) {
+        // Copy the 16 bit instruction over
+        *(PUSHORT)(pDest) = (USHORT)instruction;
+        return sizeof(USHORT); // The source instruction was 16 bits
+    }
+
+    // If that fails, re-encode as 32-bit
+    PUSHORT pDstInst = (PUSHORT)(pDest);
+    instruction = EncodeBranch24(0xf0009000, newDelta, FALSE);
+    if (instruction) {
+        // Copy both bytes of the instruction
+        EmitLongInstruction(pDstInst, instruction);
+
+        m_lExtra = sizeof(DWORD) - sizeof(USHORT); // The destination instruction was 32 bits
+        return sizeof(USHORT); // The source instruction was 16 bits
+    }
+
+    // If that fails, emit as a 'long branch'
+    if (!instruction) {
+        // For example, b +0x7FE (00090be6) becomes:
+        // 003f6d02 e001     b           003f6d08
+        // 003f6d04 0be6     dc.h        0be6
+        // 003f6d06 0009     dc.h        0009
+        // 003f6d08 f85ff008 ldr         pc,=0x90BE6
+        EmitLongBranch(pDstInst, pTarget);
+
+        // Compute the extra space needed for the branch sequence
+        m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst));
+        return sizeof(USHORT); // The source instruction was 16 bits
+    }
+
+    return sizeof(USHORT); // The source instruction was 16 bits
+}
+
+BYTE CDetourDis::CopyLiteralLoad16(BYTE* pSource, BYTE* pDest)
+{
+    PBYTE pStart = pDest;
+    USHORT instruction = *(PUSHORT)(pSource);
+
+    LONG oldDelta = DecodeLiteralLoad8(instruction);
+    PBYTE pTarget = CalculateTarget(Align4(pSource), oldDelta);
+
+    // Re-encode as a 'long literal load'
+    // For example, ldr r0, [PC + 1E0] (0x905B4) becomes:
+    //
+    // 001df72c f85f0008 ldr         r0,=0x905B4
+    // 001df730 f8d00000 ldr.w       r0,[r0]
+    LiteralLoad8& load8 = (LiteralLoad8&)(instruction);
+    EmitLongLiteralLoad((PUSHORT&)pDest, load8.Register, pTarget);
+
+    m_lExtra = (LONG)(pDest - pStart - sizeof(USHORT));
+    return sizeof(USHORT); // The source instruction was 16 bits
+}
+
+BYTE CDetourDis::CopyBranchExchangeOrDataProcessing16(BYTE* pSource, BYTE* pDest)
+{
+    ULONG instruction = *(PUSHORT)(pSource);
+
+    // BX
+    if ((instruction & 0xff80) == 0x4700) {
+        // The target is stored in a register
+        m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+    }
+
+    // AND, LSR, TST, ADD, CMP, MOV
+    return PureCopy16(pSource, pDest);
+}
+
+const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable[33] =
+{
+    // Shift by immediate, move register
+    // ToDo: Not handling moves from PC
+    /* 0b00000 */ { 0x00, &CDetourDis::PureCopy16 },
+    /* 0b00001 */ { 0x01, &CDetourDis::PureCopy16 },
+    /* 0b00010 */ { 0x02, &CDetourDis::PureCopy16 },
+
+    // Add/subtract register
+    // Add/subtract immediate
+    /* 0b00011 */ { 0x03, &CDetourDis::PureCopy16},
+
+    // Add/subtract/compare/move immediate
+    /* 0b00100 */ { 0x04, &CDetourDis::PureCopy16 },
+    /* 0b00101 */ { 0x05, &CDetourDis::PureCopy16 },
+    /* 0b00110 */ { 0x06, &CDetourDis::PureCopy16 },
+    /* 0b00111 */ { 0x07, &CDetourDis::PureCopy16 },
+
+    // Data-processing register
+    // Special data processing
+    // Branch/exchange instruction set
+    /* 0b01000 */ { 0x08, &CDetourDis::CopyBranchExchangeOrDataProcessing16 },
+
+    // Load from literal pool
+    /* 0b01001 */ { 0x09, &CDetourDis::CopyLiteralLoad16 },
+
+    // Load/store register offset
+    /* 0b01010 */ { 0x0a, &CDetourDis::PureCopy16 },
+    /* 0b01011 */ { 0x0b, &CDetourDis::PureCopy16 },
+
+    //  Load/store word/byte immediate offset.
+    /* 0b01100 */ { 0x0c, &CDetourDis::PureCopy16 },
+    /* 0b01101 */ { 0x0d, &CDetourDis::PureCopy16 },
+    /* 0b01110 */ { 0x0e, &CDetourDis::PureCopy16 },
+    /* 0b01111 */ { 0x0f, &CDetourDis::PureCopy16 },
+
+    //  Load/store halfword immediate offset.
+    /* 0b10000 */ { 0x10, &CDetourDis::PureCopy16 },
+    /* 0b10001 */ { 0x11, &CDetourDis::PureCopy16 },
+
+    // Load from or store to stack
+    /* 0b10010 */ { 0x12, &CDetourDis::PureCopy16 },
+    /* 0b10011 */ { 0x13, &CDetourDis::PureCopy16 },
+
+    // Add to SP or PC
+    /* 0b10100 */ { 0x14, &CDetourDis::PureCopy16 },
+    //   ToDo: Is ADR (T1) blitt-able?
+    //     It adds a value to PC and stores the result in a register.
+    //     Does this count as a 'target' for detours?
+    /* 0b10101 */ { 0x15, &CDetourDis::PureCopy16 },
+
+    // Miscellaneous
+    /* 0b10110 */ { 0x16, &CDetourDis::CopyMiscellaneous16 },
+    /* 0b10111 */ { 0x17, &CDetourDis::CopyMiscellaneous16 },
+
+    // Load/store multiple
+    /* 0b11000 */ { 0x18, &CDetourDis::PureCopy16 },
+    /* 0b11001 */ { 0x19, &CDetourDis::PureCopy16 },
+    //   ToDo: Are we sure these are all safe?
+    //     LDMIA, for example, can include an 'embedded' branch.
+    //     Does this count as a 'target' for detours?
+
+    // Conditional branch
+    /* 0b11010 */ { 0x1a, &CDetourDis::CopyConditionalBranchOrOther16 },
+
+    // Conditional branch
+    // Undefined instruction
+    // Service (system) call
+    /* 0b11011 */ { 0x1b, &CDetourDis::CopyConditionalBranchOrOther16 },
+
+    // Unconditional branch
+    /* 0b11100 */ { 0x1c, &CDetourDis::CopyUnConditionalBranch16 },
+
+    // 32-bit instruction
+    /* 0b11101 */ { 0x1d, &CDetourDis::BeginCopy32 },
+    /* 0b11110 */ { 0x1e, &CDetourDis::BeginCopy32 },
+    /* 0b11111 */ { 0x1f, &CDetourDis::BeginCopy32 },
+    { 0, NULL }
+};
+
+BYTE CDetourDis::CopyBranch24(BYTE* pSource, BYTE* pDest)
+{
+    ULONG instruction = GetLongInstruction(pSource);
+    BOOL fLink;
+    LONG oldDelta = DecodeBranch24(instruction, fLink);
+    PBYTE pTarget = CalculateTarget(pSource, oldDelta);
+    m_pbTarget = pTarget;
+
+    // Re-encode as 32-bit
+    PUSHORT pDstInst = (PUSHORT)(pDest);
+    LONG newDelta = CalculateNewDelta(pTarget, pDest);
+    instruction = EncodeBranch24(instruction, newDelta, fLink);
+    if (instruction) {
+        // Copy both bytes of the instruction
+        EmitLongInstruction(pDstInst, instruction);
+        return sizeof(DWORD);
+    }
+
+    // If that fails, re-encode as a 'long branch'
+    EmitLongBranch(pDstInst, pTarget);
+
+    // Compute the extra space needed for the instruction
+    m_lExtra = CalculateExtra(sizeof(DWORD), pDest, (BYTE*)(pDstInst));
+    return sizeof(DWORD); // The source instruction was 32 bits
+}
+
+BYTE CDetourDis::CopyBranchOrMiscellaneous32(BYTE* pSource, BYTE* pDest)
+{
+    ULONG instruction = GetLongInstruction(pSource);
+    if ((instruction & 0xf800d000) == 0xf0008000) { // B<c>.W <label>
+        LONG oldDelta = DecodeBranch20(instruction);
+        PBYTE pTarget = CalculateTarget(pSource, oldDelta);
+        m_pbTarget = pTarget;
+
+        // Re-encode as 32-bit
+        PUSHORT pDstInst = (PUSHORT)(pDest);
+        LONG newDelta = CalculateNewDelta(pTarget, pDest);
+        instruction = EncodeBranch20(instruction, newDelta);
+        if (instruction) {
+            // Copy both bytes of the instruction
+            EmitLongInstruction(pDstInst, instruction);
+            return sizeof(DWORD);
+        }
+
+        // If that fails, re-encode as a sequence of branches
+        // For example, bls.w +0x86 (00090480)| becomes:
+        //
+        // 001df788 f2408001 bls.w       001df78e
+        // 001df78c e004     b           001df798
+        // 001df78e e001     b           001df794
+        // 001df790 0480     dc.h        0480
+        // 001df792 0009     dc.h        0009
+        // 001df794 f85ff008 ldr         pc,=0x90480
+        //
+
+        // First, reuse the existing conditional branch to, if successful,
+        // branch down to a 'long branch' that we will emit below
+        instruction = EncodeBranch20(GetLongInstruction(pSource), 2);
+        // Due to the size of c_PCAdjust a two-length branch moves 6 bytes forward,
+        // past the following unconditional branch
+        ASSERT(instruction);
+        EmitLongInstruction(pDstInst, instruction);
+
+        // Next, prepare to insert an unconditional branch that will be hit
+        // if the condition above is not met.  This branch will branch over
+        // the following 'long branch'
+        // We can't actually encode this branch yet though, because
+        // 'long branches' can vary in size
+        PUSHORT pUnconditionalBranchInstruction = pDstInst++;
+
+        // Then, emit a 'long branch' that will be hit if the original condition is met
+        BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget);
+
+        // Finally, encode and emit the unconditional branch that will be used
+        // to branch past the 'long branch' if the initial condition was not met
+        Branch11 branch11 = { 0x00, 0x1C };
+        instruction = EncodeBranch11(*(DWORD*)(&branch11), longBranchSize - c_PCAdjust + sizeof(USHORT));
+        ASSERT(instruction);
+        *pUnconditionalBranchInstruction = static_cast<USHORT>(instruction);
+
+        // Compute the extra space needed for the instruction
+        m_lExtra = CalculateExtra(sizeof(DWORD), pDest, (BYTE*)(pDstInst));
+        return sizeof(DWORD); // The source instruction was 32 bits
+    }
+
+    if ((instruction & 0xf800d000) == 0xf0009000) { // B.W <label>
+        // B <label>  11110xxxxxxxxxxx10xxxxxxxxxxxxxx
+        return CopyBranch24(pSource, pDest);
+    }
+
+    if ((instruction & 0xf800d000) == 0xf000d000) { // BL.W <label>
+        // B <label>  11110xxxxxxxxxxx10xxxxxxxxxxxxxx
+
+        PUSHORT pDstInst = (PUSHORT)(pDest);
+        BOOL fLink;
+        LONG oldDelta = DecodeBranch24(instruction, fLink);
+        PBYTE pTarget = CalculateTarget(pSource, oldDelta);
+        m_pbTarget = pTarget;
+
+        *--((PULONG&)m_pbPool) = (ULONG)(size_t)DETOURS_PBYTE_TO_PFUNC(pTarget);
+
+        // ldr lr, target.
+        EmitLiteralLoad12(pDstInst, c_LR, m_pbPool);
+        // blx lr
+        EmitShortInstruction(pDstInst, 0x47f0);
+
+        // Compute the extra space needed for the instruction
+        m_lExtra = CalculateExtra(sizeof(DWORD), pDest, (BYTE*)(pDstInst));
+        return sizeof(DWORD); // The source instruction was 32 bits
+    }
+
+    if ((instruction & 0xFFF0FFFF) == 0xF3C08F00) {
+        // BXJ 111100111100xxxx1000111100000000
+        // BXJ switches to Jazelle mode, which is not supported
+        ASSERT(false);
+    }
+
+    if ((instruction & 0xFFFFFF00) == 0xF3DE8F00) {
+        // SUBS PC, LR 111100111101111010001111xxxxxxxx
+        m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+    }
+
+    // Everything else should be blitt-able
+    return PureCopy32(pSource, pDest);
+}
+
+BYTE CDetourDis::CopyLiteralLoad32(BYTE* pSource, BYTE* pDest)
+{
+    BYTE* pStart = pDest;
+    ULONG instruction = GetLongInstruction(pSource);
+
+    LONG oldDelta = DecodeLiteralLoad12(instruction);
+    PBYTE pTarget = CalculateTarget(Align4(pSource), oldDelta);
+
+    LiteralLoad12& load = (LiteralLoad12&)(instruction);
+
+    EmitLongLiteralLoad((PUSHORT&)pDest, load.Register, pTarget);
+
+    m_lExtra = (LONG)(pDest - pStart - sizeof(DWORD));
+
+    return sizeof(DWORD); // The source instruction was 32 bits
+}
+
+BYTE CDetourDis::CopyLoadAndStoreSingle(BYTE* pSource, BYTE* pDest)
+{
+    ULONG instruction = GetLongInstruction(pSource);
+
+    // Note: The following masks only look at the interesting bits
+    // (not the opCode prefix, since that check was performed in
+    // order to get to this function)
+    if (!(instruction & 0x100000)) {
+        // 1111 100x xxx0 xxxxxxxxxxxxxxxxxxxx : STR, STRB, STRH, etc.
+        return PureCopy32(pSource, pDest);
+    }
+
+    if ((instruction & 0xF81F0000) == 0xF81F0000) {
+        // 1111100xxxx11111xxxxxxxxxxxxxxxx : PC +/- Imm12
+        return CopyLiteralLoad32(pSource, pDest);
+    }
+
+    if ((instruction & 0xFE70F000) == 0xF81FF000) {
+        // 1111100xx001xxxx1111xxxxxxxxxxxx : PLD, PLI
+        // Convert PC-Relative PLD/PLI instructions to noops (1111100Xx00111111111xxxxxxxxxxxx)
+        if ((instruction & 0xFE7FF000) == 0xF81FF000) {
+            PUSHORT pDstInst = (PUSHORT)(pDest);
+            *pDstInst++ = c_NOP;
+            *pDstInst++ = c_NOP;
+            return sizeof(DWORD);  // The source instruction was 32 bits
+        }
+
+        // All other PLD/PLI instructions are blitt-able
+        return PureCopy32(pSource, pDest);
+    }
+
+    // If the load is writing to PC
+    if ((instruction & 0xF950F000) == 0xF850F000) {
+        m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+    }
+
+    // All other loads LDR (immediate), etc.
+    return PureCopy32(pSource, pDest);
+}
+
+BYTE CDetourDis::CopyLoadAndStoreMultipleAndSRS(BYTE* pSource, BYTE* pDest)
+{
+    // Probably all blitt-able, although not positive since some of these can result in a branch (LDMIA, POP, etc.)
+    return PureCopy32(pSource, pDest);
+}
+
+BYTE CDetourDis::CopyTableBranch(BYTE* pSource, BYTE* pDest)
+{
+    m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+    ULONG instruction = GetLongInstruction(pSource);
+    TableBranch& tableBranch = (TableBranch&)(instruction);
+
+    // If the base register is anything other than PC, we can simply copy the instruction
+    if (tableBranch.BaseRegister != c_PC) {
+        return PureCopy32(pSource, pDest);
+    }
+
+    __debugbreak();
+
+    // If the base register is PC, we need to manually perform the table lookup
+    // For example, this:
+    //
+    //        7ef40000 e8dff002 tbb         [pc,r2]
+    //
+    // becomes this:
+    //
+    //        7ef40404 b401     push        {r0}            ; pushed as a placeholder for the target address
+    //        7ef40406 e92d0005 push.w      {r0,r2}         ; scratch register and another register are pushed; there's a minimum of two registers in the list for push.w
+    //        7ef40410 4820     ldr         r0,=0x7EF40004  ; load the table address from the literal pool
+    //        7ef40414 eb000042 add         r0,r0,r2,lsl #1 ; add the index value to the address of the table to get the table entry; lsl only used if it's a TBH instruction
+    //        7ef40418 f8d00000 ldr.w       r0,[r0]         ; dereference the table entry to get the value of the target
+    //        7ef4041c ea4f0040 lsl         r0,r0,#1        ; multiply the offset by 2 (per the spec)
+    //        7ef40420 eb00000f add.w       r0,r0,pc        ; Add the offset to pc to get the target address
+    //        7ef40424 f8cd000c str.w       r0,[sp,#0xC]    ; store the target address on the stack (into the first push)
+    //        7ef40428 e8bd0005 pop.w       {r0,r2}         ; scratch register and another register are popped; there's a minimum of two registers in the list for pop.w
+    //        7ef4042c bd00     pop         {pc}            ; pop the address into pc
+    //
+
+    // Push r0 to make room for our jump address on the stack
+    PUSHORT pDstInst = (PUSHORT)(pDest);
+    *pDstInst++ = 0xb401;
+
+    // Locate a scratch register
+    BYTE scrReg = 0;
+    while (scrReg == tableBranch.IndexRegister) {
+        ++scrReg;
+    }
+
+    // Push scrReg and tableBranch.IndexRegister (push.w doesn't support pushing just 1 register)
+    DWORD pushInstruction = 0xe92d0000;
+    pushInstruction |= 1 << scrReg;
+    pushInstruction |= 1 << tableBranch.IndexRegister;
+    EmitLongInstruction(pDstInst, pushInstruction);
+
+    // Write the target address out to the 'literal pool';
+    // when the base register of a TBB/TBH is PC,
+    // the branch table immediately follows the instruction
+    BYTE* pTarget = CalculateTarget(pSource, 0);
+    *--((PUSHORT&)m_pbPool) = (USHORT)((size_t)pTarget & 0xffff);
+    *--((PUSHORT&)m_pbPool) = (USHORT)((size_t)pTarget >> 16);
+
+    // Load the literal pool value into our scratch register (this contains the address of the branch table)
+    // ldr rn, target
+    EmitLiteralLoad8(pDstInst, scrReg, m_pbPool);
+
+    // Add the index offset to the address of the branch table; the result will be the value within the table that contains the branch offset
+    // We need to multiply the index by two if we are using halfword indexing
+    // Will shift tableBranch.IndexRegister by 1 (multiply by 2) if using a TBH
+    EmitAdd32(pDstInst, scrReg, tableBranch.IndexRegister, scrReg, tableBranch.HalfWord);
+
+    // Dereference rn into rn, to load the value within the table
+    // ldr rn, [rn]
+    if (scrReg < 0x7) {
+        EmitImmediateRegisterLoad16(pDstInst, scrReg);
+    }
+    else {
+        EmitImmediateRegisterLoad32(pDstInst, scrReg);
+    }
+
+    // Multiply the offset by two to get the true offset value (as per the spec)
+    EmitLogicalShiftLeft32(pDstInst, scrReg, scrReg, 1);
+
+    // Add the offset to PC to get the target
+    EmitAdd32(pDstInst, scrReg, c_PC, scrReg, 0);
+
+    // Now write the contents of scrReg to the stack, so we can pop it into PC
+    // Write the address of the branch table entry to the stack, so we can pop it into PC
+    EmitStoreImmediate12(pDstInst, scrReg, c_SP, sizeof(DWORD) * 3);
+
+    // Pop scrReg and tableBranch.IndexRegister (pop.w doesn't support popping just 1 register)
+    DWORD popInstruction = 0xe8bd0000;
+    popInstruction |= 1 << scrReg;
+    popInstruction |= 1 << tableBranch.IndexRegister;
+    EmitLongInstruction(pDstInst, popInstruction);
+
+    // Pop PC
+    *pDstInst++ = 0xbd00;
+
+    // Compute the extra space needed for the branch sequence
+    m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst));
+    return sizeof(DWORD);
+}
+
+BYTE CDetourDis::BeginCopy32(BYTE* pSource, BYTE* pDest)
+{
+    ULONG instruction = GetLongInstruction(pSource);
+
+    // Immediate data processing instructions; ADD, SUB, MOV, MOVN, ADR, MOVT, BFC, SSAT16, etc.
+    if ((instruction & 0xF8008000) == 0xF0000000) { // 11110xxxxxxxxxxx0xxxxxxxxxxxxxxx
+        // Should all be blitt-able
+        // ToDo: What about ADR?  Is it safe to do a straight-copy?
+        // ToDo: Not handling moves to or from PC
+        return PureCopy32(pSource, pDest);
+    }
+
+    // Non-Immediate data processing instructions; ADD, EOR, TST, etc.
+    if ((instruction & 0xEE000000) == 0xEA000000) { // 111x101xxxxxxxxxxxxxxxxxxxxxxx
+        // Should all be blitt-able
+        return PureCopy32(pSource, pDest);
+    }
+
+    // Load and store single data item, memory hints
+    if ((instruction & 0xFE000000) == 0xF8000000) { // 1111100xxxxxxxxxxxxxxxxxxxxxxxxx
+        return CopyLoadAndStoreSingle(pSource, pDest);
+    }
+
+    // Load and store, double and exclusive, and table branch
+    if ((instruction & 0xFE400000) == 0xE8400000) { // 1110100xx1xxxxxxxxxxxxxxxxxxxxxx
+        // Load and store double
+        if (instruction & 0x1200000) {
+            // LDRD, STRD (immediate) : xxxxxxxPxxWxxxxxxxxxxxxxxxxxxxxx where PW != 0b00
+            // The source register is PC
+            if ((instruction & 0xF0000) == 0xF0000) {
+                // ToDo: If the source register is PC, what should we do?
+                ASSERT(false);
+            }
+
+            // If either target registers are PC
+            if (((instruction & 0xF000) == 0xF000) ||
+                ((instruction & 0xF00) == 0xF00)) {
+                m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+            }
+
+            return PureCopy32(pSource, pDest);
+        }
+
+        // Load and store exclusive
+        if (!(instruction & 0x800000)) { // LDREX, STREX : xxxxxxxx0xxxxxxxxxxxxxxxxxxxxxxx
+            if ((instruction & 0xF000) == 0xF000) { // xxxxxxxxxxxx1111xxxxxxxxxxxx
+                m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
+            }
+            return PureCopy32(pSource, pDest);
+        }
+
+        // Table branch
+        if ((instruction & 0x1000F0) == 0x100000 ||  // TBB : xxxxxxxxxxx1xxxxxxxxxxxx0000xxxx
+            (instruction & 0x1000F0) == 0x100010) { // TBH : xxxxxxxxxxx1xxxxxxxxxxxx0001xxxx
+            return CopyTableBranch(pSource, pDest);
+        }
+
+        // Load and store exclusive byte, halfword, doubleword (LDREXB, LDREXH, LDREXD, STREXB, STREXH, STREXD, etc.)
+        return PureCopy32(pSource, pDest);
+    }
+
+    // Load and store multiple, RFE and SRS
+    if ((instruction & 0xFE400000) == 0xE8000000) { // 1110100xx0xxxxxxxxxxxxxxxxxxxxxx
+        // Return from exception (RFE)
+        if ((instruction & 0xE9900000) == 0xE9900000 || // 1110100110x1xxxxxxxxxxxxxxxxxxxx
+            (instruction & 0xE8100000) == 0xE8100000) { // 1110100000x1xxxxxxxxxxxxxxxxxxxx
+            return PureCopy32(pSource, pDest);
+        }
+
+        return CopyLoadAndStoreMultipleAndSRS(pSource, pDest);
+    }
+
+    // Branches, miscellaneous control
+    if ((instruction & 0xF8008000) == 0xF0008000) { // 11110xxxxxxxxxxx0xxxxxxxxxxxxxxx
+        // Branches, miscellaneous control
+        return CopyBranchOrMiscellaneous32(pSource, pDest);
+    }
+
+    // Coprocessor instructions
+    if ((instruction & 0xEC000000) == 0xEC000000) { // 111x11xxxxxxxxxxxxxxxxxxxxxxxxxx
+        return PureCopy32(pSource, pDest);
+    }
+
+    // Unhandled instruction; should never make it this far
+    ASSERT(false);
+    return PureCopy32(pSource, pDest);
+}
+
+/////////////////////////////////////////////////////////// Disassembler Code.
+//
+CDetourDis::CDetourDis()
+{
+    m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_NONE;
+    m_pbPool = NULL;
+    m_lExtra = 0;
+}
+
+PBYTE CDetourDis::CopyInstruction(PBYTE pDst,
+                                  PBYTE *ppDstPool,
+                                  PBYTE pSrc,
+                                  PBYTE *ppTarget,
+                                  LONG *plExtra)
+{
+    if (pDst && ppDstPool && ppDstPool != NULL) {
+        m_pbPool = (PBYTE)*ppDstPool;
+    }
+    else {
+        pDst = m_rbScratchDst;
+        m_pbPool = m_rbScratchDst + sizeof(m_rbScratchDst);
+    }
+    // Make sure the constant pool is 32-bit aligned.
+    m_pbPool -= ((ULONG_PTR)m_pbPool) & 3;
+
+    REFCOPYENTRY pEntry = &s_rceCopyTable[pSrc[1] >> 3];
+    ULONG size = (this->*pEntry->pfCopy)(pSrc, pDst);
+
+    pSrc += size;
+
+    // If the target is needed, store our target
+    if (ppTarget) {
+        *ppTarget = m_pbTarget;
+    }
+    if (plExtra) {
+        *plExtra = m_lExtra;
+    }
+    if (ppDstPool) {
+        *ppDstPool = m_pbPool;
+    }
+
+    return pSrc;
+}
+
+
+PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst,
+                                   _Inout_opt_ PVOID *ppDstPool,
+                                   _In_ PVOID pSrc,
+                                   _Out_opt_ PVOID *ppTarget,
+                                   _Out_opt_ LONG *plExtra)
+{
+    CDetourDis state;
+    return (PVOID)state.CopyInstruction((PBYTE)pDst,
+                                        (PBYTE*)ppDstPool,
+                                        (PBYTE)pSrc,
+                                        (PBYTE*)ppTarget,
+                                        plExtra);
+}
+
+#endif // DETOURS_ARM
+
+#ifdef DETOURS_ARM64
+
+#define c_LR        30          // The register number for the Link Register
+#define c_SP        31          // The register number for the Stack Pointer
+#define c_NOP       0xd503201f  // A nop instruction
+#define c_BREAK     (0xd4200000 | (0xf000 << 5)) // A break instruction
+
+//
+// Problematic instructions:
+//
+// ADR     0ll10000 hhhhhhhh hhhhhhhh hhhddddd  & 0x9f000000 == 0x10000000  (l = low, h = high, d = Rd)
+// ADRP    1ll10000 hhhhhhhh hhhhhhhh hhhddddd  & 0x9f000000 == 0x90000000  (l = low, h = high, d = Rd)
+//
+// B.cond  01010100 iiiiiiii iiiiiiii iii0cccc  & 0xff000010 == 0x54000000  (i = delta = SignExtend(imm19:00, 64), c = cond)
+//
+// B       000101ii iiiiiiii iiiiiiii iiiiiiii  & 0xfc000000 == 0x14000000  (i = delta = SignExtend(imm26:00, 64))
+// BL      100101ii iiiiiiii iiiiiiii iiiiiiii  & 0xfc000000 == 0x94000000  (i = delta = SignExtend(imm26:00, 64))
+//
+// CBNZ    z0110101 iiiiiiii iiiiiiii iiittttt  & 0x7f000000 == 0x35000000  (z = size, i = delta = SignExtend(imm19:00, 64), t = Rt)
+// CBZ     z0110100 iiiiiiii iiiiiiii iiittttt  & 0x7f000000 == 0x34000000  (z = size, i = delta = SignExtend(imm19:00, 64), t = Rt)
+//
+// LDR Wt  00011000 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0x18000000  (i = SignExtend(imm19:00, 64), t = Rt)
+// LDR Xt  01011000 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0x58000000  (i = SignExtend(imm19:00, 64), t = Rt)
+// LDRSW   10011000 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0x98000000  (i = SignExtend(imm19:00, 64), t = Rt)
+// PRFM    11011000 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0xd8000000  (i = SignExtend(imm19:00, 64), t = Rt)
+// LDR St  00011100 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0x1c000000  (i = SignExtend(imm19:00, 64), t = Rt)
+// LDR Dt  01011100 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0x5c000000  (i = SignExtend(imm19:00, 64), t = Rt)
+// LDR Qt  10011100 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0x9c000000  (i = SignExtend(imm19:00, 64), t = Rt)
+// LDR inv 11011100 iiiiiiii iiiiiiii iiittttt  & 0xff000000 == 0xdc000000  (i = SignExtend(imm19:00, 64), t = Rt)
+//
+// TBNZ    z0110111 bbbbbiii iiiiiiii iiittttt  & 0x7f000000 == 0x37000000  (z = size, b = bitnum, i = SignExtend(imm14:00, 64), t = Rt)
+// TBZ     z0110110 bbbbbiii iiiiiiii iiittttt  & 0x7f000000 == 0x36000000  (z = size, b = bitnum, i = SignExtend(imm14:00, 64), t = Rt)
+//
+
+class CDetourDis
+{
+  public:
+    CDetourDis();
+
+    PBYTE   CopyInstruction(PBYTE pDst,
+                            PBYTE pSrc,
+                            PBYTE *ppTarget,
+                            LONG *plExtra);
+
+  public:
+    typedef BYTE (CDetourDis::* COPYFUNC)(PBYTE pbDst, PBYTE pbSrc);
+
+    union AddImm12
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Rd : 5;           // Destination register
+            DWORD Rn : 5;           // Source register
+            DWORD Imm12 : 12;       // 12-bit immediate
+            DWORD Shift : 2;        // shift (must be 0 or 1)
+            DWORD Opcode1 : 7;      // Must be 0010001 == 0x11
+            DWORD Size : 1;         // 0 = 32-bit, 1 = 64-bit
+        } s;
+        static DWORD Assemble(DWORD size, DWORD rd, DWORD rn, ULONG imm, DWORD shift)
+        {
+            AddImm12 temp;
+            temp.s.Rd = rd;
+            temp.s.Rn = rn;
+            temp.s.Imm12 = imm & 0xfff;
+            temp.s.Shift = shift;
+            temp.s.Opcode1 = 0x11;
+            temp.s.Size = size;
+            return temp.Assembled;
+        }
+        static DWORD AssembleAdd32(DWORD rd, DWORD rn, ULONG imm, DWORD shift) { return Assemble(0, rd, rn, imm, shift); }
+        static DWORD AssembleAdd64(DWORD rd, DWORD rn, ULONG imm, DWORD shift) { return Assemble(1, rd, rn, imm, shift); }
+    };
+
+    union Adr19
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Rd : 5;           // Destination register
+            DWORD Imm19 : 19;       // 19-bit upper immediate
+            DWORD Opcode1 : 5;      // Must be 10000 == 0x10
+            DWORD Imm2 : 2;         // 2-bit lower immediate
+            DWORD Type : 1;         // 0 = ADR, 1 = ADRP
+        } s;
+        inline LONG Imm() const { DWORD Imm = (s.Imm19 << 2) | s.Imm2; return (LONG)(Imm << 11) >> 11; }
+        static DWORD Assemble(DWORD type, DWORD rd, LONG delta)
+        {
+            Adr19 temp;
+            temp.s.Rd = rd;
+            temp.s.Imm19 = (delta >> 2) & 0x7ffff;
+            temp.s.Opcode1 = 0x10;
+            temp.s.Imm2 = delta & 3;
+            temp.s.Type = type;
+            return temp.Assembled;
+        }
+        static DWORD AssembleAdr(DWORD rd, LONG delta) { return Assemble(0, rd, delta); }
+        static DWORD AssembleAdrp(DWORD rd, LONG delta) { return Assemble(1, rd, delta); }
+    };
+
+    union Bcc19
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Condition : 4;    // Condition
+            DWORD Opcode1 : 1;      // Must be 0
+            DWORD Imm19 : 19;       // 19-bit immediate
+            DWORD Opcode2 : 8;      // Must be 01010100 == 0x54
+        } s;
+        inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; }
+        static DWORD AssembleBcc(DWORD condition, LONG delta)
+        {
+            Bcc19 temp;
+            temp.s.Condition = condition;
+            temp.s.Opcode1 = 0;
+            temp.s.Imm19 = delta >> 2;
+            temp.s.Opcode2 = 0x54;
+            return temp.Assembled;
+        }
+    };
+
+    union Branch26
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Imm26 : 26;       // 26-bit immediate
+            DWORD Opcode1 : 5;      // Must be 00101 == 0x5
+            DWORD Link : 1;         // 0 = B, 1 = BL
+        } s;
+        inline LONG Imm() const { return (LONG)(s.Imm26 << 6) >> 4; }
+        static DWORD Assemble(DWORD link, LONG delta)
+        {
+            Branch26 temp;
+            temp.s.Imm26 = delta >> 2;
+            temp.s.Opcode1 = 0x5;
+            temp.s.Link = link;
+            return temp.Assembled;
+        }
+        static DWORD AssembleB(LONG delta) { return Assemble(0, delta); }
+        static DWORD AssembleBl(LONG delta) { return Assemble(1, delta); }
+    };
+
+    union Br
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Opcode1 : 5;      // Must be 00000 == 0
+            DWORD Rn : 5;           // Register number
+            DWORD Opcode2 : 22;     // Must be 1101011000011111000000 == 0x3587c0 for Br
+                                    //                                   0x358fc0 for Brl
+        } s;
+        static DWORD Assemble(DWORD rn, bool link)
+        {
+            Br temp;
+            temp.s.Opcode1 = 0;
+            temp.s.Rn = rn;
+            temp.s.Opcode2 = 0x3587c0;
+            if (link)
+                temp.Assembled |= 0x00200000;
+            return temp.Assembled;
+        }
+        static DWORD AssembleBr(DWORD rn)
+        {
+            return Assemble(rn, false);
+        }
+        static DWORD AssembleBrl(DWORD rn)
+        {
+            return Assemble(rn, true);
+        }
+    };
+
+    union Cbz19
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Rt : 5;           // Register to test
+            DWORD Imm19 : 19;       // 19-bit immediate
+            DWORD Nz : 1;           // 0 = CBZ, 1 = CBNZ
+            DWORD Opcode1 : 6;      // Must be 011010 == 0x1a
+            DWORD Size : 1;         // 0 = 32-bit, 1 = 64-bit
+        } s;
+        inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; }
+        static DWORD Assemble(DWORD size, DWORD nz, DWORD rt, LONG delta)
+        {
+            Cbz19 temp;
+            temp.s.Rt = rt;
+            temp.s.Imm19 = delta >> 2;
+            temp.s.Nz = nz;
+            temp.s.Opcode1 = 0x1a;
+            temp.s.Size = size;
+            return temp.Assembled;
+        }
+    };
+
+    union LdrLit19
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Rt : 5;           // Destination register
+            DWORD Imm19 : 19;       // 19-bit immediate
+            DWORD Opcode1 : 2;      // Must be 0
+            DWORD FpNeon : 1;       // 0 = LDR Wt/LDR Xt/LDRSW/PRFM, 1 = LDR St/LDR Dt/LDR Qt
+            DWORD Opcode2 : 3;      // Must be 011 = 3
+            DWORD Size : 2;         // 00 = LDR Wt/LDR St, 01 = LDR Xt/LDR Dt, 10 = LDRSW/LDR Qt, 11 = PRFM/invalid
+        } s;
+        inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; }
+        static DWORD Assemble(DWORD size, DWORD fpneon, DWORD rt, LONG delta)
+        {
+            LdrLit19 temp;
+            temp.s.Rt = rt;
+            temp.s.Imm19 = delta >> 2;
+            temp.s.Opcode1 = 0;
+            temp.s.FpNeon = fpneon;
+            temp.s.Opcode2 = 3;
+            temp.s.Size = size;
+            return temp.Assembled;
+        }
+    };
+
+    union LdrFpNeonImm9
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Rt : 5;           // Destination register
+            DWORD Rn : 5;           // Base register
+            DWORD Imm12 : 12;       // 12-bit immediate
+            DWORD Opcode1 : 1;      // Must be 1 == 1
+            DWORD Opc : 1;          // Part of size
+            DWORD Opcode2 : 6;      // Must be 111101 == 0x3d
+            DWORD Size : 2;         // Size (0=8-bit, 1=16-bit, 2=32-bit, 3=64-bit, 4=128-bit)
+        } s;
+        static DWORD Assemble(DWORD size, DWORD rt, DWORD rn, ULONG imm)
+        {
+            LdrFpNeonImm9 temp;
+            temp.s.Rt = rt;
+            temp.s.Rn = rn;
+            temp.s.Imm12 = imm;
+            temp.s.Opcode1 = 1;
+            temp.s.Opc = size >> 2;
+            temp.s.Opcode2 = 0x3d;
+            temp.s.Size = size & 3;
+            return temp.Assembled;
+        }
+    };
+
+    union Mov16
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Rd : 5;           // Destination register
+            DWORD Imm16 : 16;       // Immediate
+            DWORD Shift : 2;        // Shift amount (0=0, 1=16, 2=32, 3=48)
+            DWORD Opcode : 6;       // Must be 100101 == 0x25
+            DWORD Type : 2;         // 0 = MOVN, 1 = reserved, 2 = MOVZ, 3 = MOVK
+            DWORD Size : 1;         // 0 = 32-bit, 1 = 64-bit
+        } s;
+        static DWORD Assemble(DWORD size, DWORD type, DWORD rd, DWORD imm, DWORD shift)
+        {
+            Mov16 temp;
+            temp.s.Rd = rd;
+            temp.s.Imm16 = imm;
+            temp.s.Shift = shift;
+            temp.s.Opcode = 0x25;
+            temp.s.Type = type;
+            temp.s.Size = size;
+            return temp.Assembled;
+        }
+        static DWORD AssembleMovn32(DWORD rd, DWORD imm, DWORD shift) { return Assemble(0, 0, rd, imm, shift); }
+        static DWORD AssembleMovn64(DWORD rd, DWORD imm, DWORD shift) { return Assemble(1, 0, rd, imm, shift); }
+        static DWORD AssembleMovz32(DWORD rd, DWORD imm, DWORD shift) { return Assemble(0, 2, rd, imm, shift); }
+        static DWORD AssembleMovz64(DWORD rd, DWORD imm, DWORD shift) { return Assemble(1, 2, rd, imm, shift); }
+        static DWORD AssembleMovk32(DWORD rd, DWORD imm, DWORD shift) { return Assemble(0, 3, rd, imm, shift); }
+        static DWORD AssembleMovk64(DWORD rd, DWORD imm, DWORD shift) { return Assemble(1, 3, rd, imm, shift); }
+    };
+
+    union Tbz14
+    {
+        DWORD Assembled;
+        struct
+        {
+            DWORD Rt : 5;           // Register to test
+            DWORD Imm14 : 14;       // 14-bit immediate
+            DWORD Bit : 5;          // 5-bit index
+            DWORD Nz : 1;           // 0 = TBZ, 1 = TBNZ
+            DWORD Opcode1 : 6;      // Must be 011011 == 0x1b
+            DWORD Size : 1;         // 0 = 32-bit, 1 = 64-bit
+        } s;
+        inline LONG Imm() const { return (LONG)(s.Imm14 << 18) >> 16; }
+        static DWORD Assemble(DWORD size, DWORD nz, DWORD rt, DWORD bit, LONG delta)
+        {
+            Tbz14 temp;
+            temp.s.Rt = rt;
+            temp.s.Imm14 = delta >> 2;
+            temp.s.Bit = bit;
+            temp.s.Nz = nz;
+            temp.s.Opcode1 = 0x1b;
+            temp.s.Size = size;
+            return temp.Assembled;
+        }
+    };
+
+
+  protected:
+    BYTE    PureCopy32(BYTE* pSource, BYTE* pDest);
+    BYTE    EmitMovImmediate(PULONG& pDstInst, BYTE rd, UINT64 immediate);
+    BYTE    CopyAdr(BYTE* pSource, BYTE* pDest, ULONG instruction);
+    BYTE    CopyBcc(BYTE* pSource, BYTE* pDest, ULONG instruction);
+    BYTE    CopyB(BYTE* pSource, BYTE* pDest, ULONG instruction);
+    BYTE    CopyBl(BYTE* pSource, BYTE* pDest, ULONG instruction);
+    BYTE    CopyB_or_Bl(BYTE* pSource, BYTE* pDest, ULONG instruction, bool link);
+    BYTE    CopyCbz(BYTE* pSource, BYTE* pDest, ULONG instruction);
+    BYTE    CopyTbz(BYTE* pSource, BYTE* pDest, ULONG instruction);
+    BYTE    CopyLdrLiteral(BYTE* pSource, BYTE* pDest, ULONG instruction);
+
+  protected:
+    ULONG GetInstruction(BYTE* pSource)
+    {
+        return ((PULONG)pSource)[0];
+    }
+
+    BYTE EmitInstruction(PULONG& pDstInst, ULONG instruction)
+    {
+        *pDstInst++ = instruction;
+        return sizeof(ULONG);
+    }
+
+  protected:
+    PBYTE   m_pbTarget;
+    BYTE    m_rbScratchDst[128]; // matches or exceeds rbCode
+};
+
+BYTE CDetourDis::PureCopy32(BYTE* pSource, BYTE* pDest)
+{
+    *(ULONG *)pDest = *(ULONG*)pSource;
+    return sizeof(DWORD);
+}
+
+/////////////////////////////////////////////////////////// Disassembler Code.
+//
+CDetourDis::CDetourDis()
+{
+    m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_NONE;
+}
+
+PBYTE CDetourDis::CopyInstruction(PBYTE pDst,
+                                  PBYTE pSrc,
+                                  PBYTE *ppTarget,
+                                  LONG *plExtra)
+{
+    if (pDst == NULL) {
+        pDst = m_rbScratchDst;
+    }
+
+    DWORD Instruction = GetInstruction(pSrc);
+
+    ULONG CopiedSize;
+    if ((Instruction & 0x1f000000) == 0x10000000) {
+        CopiedSize = CopyAdr(pSrc, pDst, Instruction);
+    } else if ((Instruction & 0xff000010) == 0x54000000) {
+        CopiedSize = CopyBcc(pSrc, pDst, Instruction);
+    } else if ((Instruction & 0x7c000000) == 0x14000000) {
+        CopiedSize = CopyB_or_Bl(pSrc, pDst, Instruction, (Instruction & 0x80000000) != 0);
+    } else if ((Instruction & 0x7e000000) == 0x34000000) {
+        CopiedSize = CopyCbz(pSrc, pDst, Instruction);
+    } else if ((Instruction & 0x7e000000) == 0x36000000) {
+        CopiedSize = CopyTbz(pSrc, pDst, Instruction);
+    } else if ((Instruction & 0x3b000000) == 0x18000000) {
+        CopiedSize = CopyLdrLiteral(pSrc, pDst, Instruction);
+    } else {
+        CopiedSize = PureCopy32(pSrc, pDst);
+    }
+
+    // If the target is needed, store our target
+    if (ppTarget) {
+        *ppTarget = m_pbTarget;
+    }
+    if (plExtra) {
+        *plExtra = CopiedSize - sizeof(DWORD);
+    }
+
+    return pSrc + 4;
+}
+
+BYTE CDetourDis::EmitMovImmediate(PULONG& pDstInst, BYTE rd, UINT64 immediate)
+{
+    DWORD piece[4];
+    piece[3] = (DWORD)((immediate >> 48) & 0xffff);
+    piece[2] = (DWORD)((immediate >> 32) & 0xffff);
+    piece[1] = (DWORD)((immediate >> 16) & 0xffff);
+    piece[0] = (DWORD)((immediate >> 0) & 0xffff);
+    int count = 0;
+
+    // special case: MOVN with 32-bit dest
+    if (piece[3] == 0 && piece[2] == 0 && piece[1] == 0xffff)
+    {
+        EmitInstruction(pDstInst, Mov16::AssembleMovn32(rd, piece[0] ^ 0xffff, 0));
+        count++;
+    }
+
+    // MOVN/MOVZ with 64-bit dest
+    else
+    {
+        int zero_pieces = (piece[3] == 0x0000) + (piece[2] == 0x0000) + (piece[1] == 0x0000) + (piece[0] == 0x0000);
+        int ffff_pieces = (piece[3] == 0xffff) + (piece[2] == 0xffff) + (piece[1] == 0xffff) + (piece[0] == 0xffff);
+        DWORD defaultPiece = (ffff_pieces > zero_pieces) ? 0xffff : 0x0000;
+        bool first = true;
+        for (int pieceNum = 3; pieceNum >= 0; pieceNum--)
+        {
+            DWORD curPiece = piece[pieceNum];
+            if (curPiece != defaultPiece || (pieceNum == 0 && first))
+            {
+                count++;
+                if (first)
+                {
+                    if (defaultPiece == 0xffff)
+                    {
+                        EmitInstruction(pDstInst, Mov16::AssembleMovn64(rd, curPiece ^ 0xffff, pieceNum));
+                    }
+                    else
+                    {
+                        EmitInstruction(pDstInst, Mov16::AssembleMovz64(rd, curPiece, pieceNum));
+                    }
+                    first = false;
+                }
+                else
+                {
+                    EmitInstruction(pDstInst, Mov16::AssembleMovk64(rd, curPiece, pieceNum));
+                }
+            }
+        }
+    }
+    return (BYTE)(count * sizeof(DWORD));
+}
+
+BYTE CDetourDis::CopyAdr(BYTE* pSource, BYTE* pDest, ULONG instruction)
+{
+    Adr19& decoded = (Adr19&)(instruction);
+    PULONG pDstInst = (PULONG)(pDest);
+
+    // ADR case
+    if (decoded.s.Type == 0)
+    {
+        BYTE* pTarget = pSource + decoded.Imm();
+        LONG64 delta = pTarget - pDest;
+        LONG64 deltaPage = ((ULONG_PTR)pTarget >> 12) - ((ULONG_PTR)pDest >> 12);
+
+        // output as ADR
+        if (delta >= -(1 << 20) && delta < (1 << 20))
+        {
+            EmitInstruction(pDstInst, Adr19::AssembleAdr(decoded.s.Rd, (LONG)delta));
+        }
+
+        // output as ADRP; ADD
+        else if (deltaPage >= -(1 << 20) && (deltaPage < (1 << 20)))
+        {
+            EmitInstruction(pDstInst, Adr19::AssembleAdrp(decoded.s.Rd, (LONG)deltaPage));
+            EmitInstruction(pDstInst, AddImm12::AssembleAdd32(decoded.s.Rd, decoded.s.Rd, ((ULONG)(ULONG_PTR)pTarget) & 0xfff, 0));
+        }
+
+        // output as immediate move
+        else
+        {
+            EmitMovImmediate(pDstInst, decoded.s.Rd, (ULONG_PTR)pTarget);
+        }
+    }
+
+    // ADRP case
+    else
+    {
+        BYTE* pTarget = (BYTE*)((((ULONG_PTR)pSource >> 12) + decoded.Imm()) << 12);
+        LONG64 deltaPage = ((ULONG_PTR)pTarget >> 12) - ((ULONG_PTR)pDest >> 12);
+
+        // output as ADRP
+        if (deltaPage >= -(1 << 20) && (deltaPage < (1 << 20)))
+        {
+            EmitInstruction(pDstInst, Adr19::AssembleAdrp(decoded.s.Rd, (LONG)deltaPage));
+        }
+
+        // output as immediate move
+        else
+        {
+            EmitMovImmediate(pDstInst, decoded.s.Rd, (ULONG_PTR)pTarget);
+        }
+    }
+
+    return (BYTE)((BYTE*)pDstInst - pDest);
+}
+
+BYTE CDetourDis::CopyBcc(BYTE* pSource, BYTE* pDest, ULONG instruction)
+{
+    Bcc19& decoded = (Bcc19&)(instruction);
+    PULONG pDstInst = (PULONG)(pDest);
+
+    BYTE* pTarget = pSource + decoded.Imm();
+    m_pbTarget = pTarget;
+    LONG64 delta = pTarget - pDest;
+    LONG64 delta4 = pTarget - (pDest + 4);
+
+    // output as BCC
+    if (delta >= -(1 << 20) && delta < (1 << 20))
+    {
+        EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition, (LONG)delta));
+    }
+
+    // output as BCC <skip>; B
+    else if (delta4 >= -(1 << 27) && (delta4 < (1 << 27)))
+    {
+        EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition ^ 1, 8));
+        EmitInstruction(pDstInst, Branch26::AssembleB((LONG)delta4));
+    }
+
+    // output as MOV x17, Target; BCC <skip>; BR x17 (BIG assumption that x17 isn't being used for anything!!)
+    else
+    {
+        EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
+        EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition ^ 1, 8));
+        EmitInstruction(pDstInst, Br::AssembleBr(17));
+    }
+
+    return (BYTE)((BYTE*)pDstInst - pDest);
+}
+
+BYTE CDetourDis::CopyB_or_Bl(BYTE* pSource, BYTE* pDest, ULONG instruction, bool link)
+{
+    Branch26& decoded = (Branch26&)(instruction);
+    PULONG pDstInst = (PULONG)(pDest);
+
+    BYTE* pTarget = pSource + decoded.Imm();
+    m_pbTarget = pTarget;
+    LONG64 delta = pTarget - pDest;
+
+    // output as B or BRL
+    if (delta >= -(1 << 27) && (delta < (1 << 27)))
+    {
+        EmitInstruction(pDstInst, Branch26::Assemble(link, (LONG)delta));
+    }
+
+    // output as MOV x17, Target; BR or BRL x17 (BIG assumption that x17 isn't being used for anything!!)
+    else
+    {
+        EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
+        EmitInstruction(pDstInst, Br::Assemble(17, link));
+    }
+
+    return (BYTE)((BYTE*)pDstInst - pDest);
+}
+
+BYTE CDetourDis::CopyB(BYTE* pSource, BYTE* pDest, ULONG instruction)
+{
+    return CopyB_or_Bl(pSource, pDest, instruction, false);
+}
+
+BYTE CDetourDis::CopyBl(BYTE* pSource, BYTE* pDest, ULONG instruction)
+{
+    return CopyB_or_Bl(pSource, pDest, instruction, true);
+}
+
+BYTE CDetourDis::CopyCbz(BYTE* pSource, BYTE* pDest, ULONG instruction)
+{
+    Cbz19& decoded = (Cbz19&)(instruction);
+    PULONG pDstInst = (PULONG)(pDest);
+
+    BYTE* pTarget = pSource + decoded.Imm();
+    m_pbTarget = pTarget;
+    LONG64 delta = pTarget - pDest;
+    LONG64 delta4 = pTarget - (pDest + 4);
+
+    // output as CBZ/NZ
+    if (delta >= -(1 << 20) && delta < (1 << 20))
+    {
+        EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz, decoded.s.Rt, (LONG)delta));
+    }
+
+    // output as CBNZ/Z <skip>; B
+    else if (delta4 >= -(1 << 27) && (delta4 < (1 << 27)))
+    {
+        EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, 8));
+        EmitInstruction(pDstInst, Branch26::AssembleB((LONG)delta4));
+    }
+
+    // output as MOV x17, Target; CBNZ/Z <skip>; BR x17 (BIG assumption that x17 isn't being used for anything!!)
+    else
+    {
+        EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
+        EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, 8));
+        EmitInstruction(pDstInst, Br::AssembleBr(17));
+    }
+
+    return (BYTE)((BYTE*)pDstInst - pDest);
+}
+
+BYTE CDetourDis::CopyTbz(BYTE* pSource, BYTE* pDest, ULONG instruction)
+{
+    Tbz14& decoded = (Tbz14&)(instruction);
+    PULONG pDstInst = (PULONG)(pDest);
+
+    BYTE* pTarget = pSource + decoded.Imm();
+    m_pbTarget = pTarget;
+    LONG64 delta = pTarget - pDest;
+    LONG64 delta4 = pTarget - (pDest + 4);
+
+    // output as TBZ/NZ
+    if (delta >= -(1 << 13) && delta < (1 << 13))
+    {
+        EmitInstruction(pDstInst, Tbz14::Assemble(decoded.s.Size, decoded.s.Nz, decoded.s.Rt, decoded.s.Bit, (LONG)delta));
+    }
+
+    // output as TBNZ/Z <skip>; B
+    else if (delta4 >= -(1 << 27) && (delta4 < (1 << 27)))
+    {
+        EmitInstruction(pDstInst, Tbz14::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, decoded.s.Bit, 8));
+        EmitInstruction(pDstInst, Branch26::AssembleB((LONG)delta4));
+    }
+
+    // output as MOV x17, Target; TBNZ/Z <skip>; BR x17 (BIG assumption that x17 isn't being used for anything!!)
+    else
+    {
+        EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
+        EmitInstruction(pDstInst, Tbz14::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, decoded.s.Bit, 8));
+        EmitInstruction(pDstInst, Br::AssembleBr(17));
+    }
+
+    return (BYTE)((BYTE*)pDstInst - pDest);
+}
+
+BYTE CDetourDis::CopyLdrLiteral(BYTE* pSource, BYTE* pDest, ULONG instruction)
+{
+    LdrLit19& decoded = (LdrLit19&)(instruction);
+    PULONG pDstInst = (PULONG)(pDest);
+
+    BYTE* pTarget = pSource + decoded.Imm();
+    LONG64 delta = pTarget - pDest;
+
+    // output as LDR
+    if (delta >= -(1 << 21) && delta < (1 << 21))
+    {
+        EmitInstruction(pDstInst, LdrLit19::Assemble(decoded.s.Size, decoded.s.FpNeon, decoded.s.Rt, (LONG)delta));
+    }
+
+    // output as move immediate
+    else if (decoded.s.FpNeon == 0)
+    {
+        UINT64 value = 0;
+        switch (decoded.s.Size)
+        {
+            case 0: value = *(ULONG*)pTarget;       break;
+            case 1: value = *(UINT64*)pTarget;   break;
+            case 2: value = *(LONG*)pTarget;        break;
+        }
+        EmitMovImmediate(pDstInst, decoded.s.Rt, value);
+    }
+
+    // FP/NEON register: compute address in x17 and load from there (BIG assumption that x17 isn't being used for anything!!)
+    else
+    {
+        EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
+        EmitInstruction(pDstInst, LdrFpNeonImm9::Assemble(2 + decoded.s.Size, decoded.s.Rt, 17, 0));
+    }
+
+    return (BYTE)((BYTE*)pDstInst - pDest);
+}
+
+
+PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst,
+                                   _Inout_opt_ PVOID *ppDstPool,
+                                   _In_ PVOID pSrc,
+                                   _Out_opt_ PVOID *ppTarget,
+                                   _Out_opt_ LONG *plExtra)
+{
+    UNREFERENCED_PARAMETER(ppDstPool);
+
+    CDetourDis state;
+    return (PVOID)state.CopyInstruction((PBYTE)pDst,
+                                        (PBYTE)pSrc,
+                                        (PBYTE*)ppTarget,
+                                        plExtra);
+}
+
+#endif // DETOURS_ARM64
+
+BOOL WINAPI DetourSetCodeModule(_In_ HMODULE hModule,
+                                _In_ BOOL fLimitReferencesToModule)
+{
+#if defined(DETOURS_X64) || defined(DETOURS_X86)
+    PBYTE pbBeg = NULL;
+    PBYTE pbEnd = (PBYTE)~(ULONG_PTR)0;
+
+    if (hModule != NULL) {
+        ULONG cbModule = DetourGetModuleSize(hModule);
+
+        pbBeg = (PBYTE)hModule;
+        pbEnd = (PBYTE)hModule + cbModule;
+    }
+
+    return CDetourDis::SetCodeModule(pbBeg, pbEnd, fLimitReferencesToModule);
+#elif defined(DETOURS_ARM) || defined(DETOURS_ARM64) || defined(DETOURS_IA64)
+    (void)hModule;
+    (void)fLimitReferencesToModule;
+    return TRUE;
+#else
+#error unknown architecture (x86, x64, arm, arm64, ia64)
+#endif
+}
+
+//
+///////////////////////////////////////////////////////////////// End of File.
diff --git a/src/detours/disolarm.cpp b/src/detours/disolarm.cpp
new file mode 100644
index 0000000..57e3a2c
--- /dev/null
+++ b/src/detours/disolarm.cpp
@@ -0,0 +1,2 @@
+#define DETOURS_ARM_OFFLINE_LIBRARY
+#include "disasm.cpp"
diff --git a/src/detours/disolarm64.cpp b/src/detours/disolarm64.cpp
new file mode 100644
index 0000000..f3a6aeb
--- /dev/null
+++ b/src/detours/disolarm64.cpp
@@ -0,0 +1,2 @@
+#define DETOURS_ARM64_OFFLINE_LIBRARY
+#include "disasm.cpp"
diff --git a/src/detours/disolia64.cpp b/src/detours/disolia64.cpp
new file mode 100644
index 0000000..9dd2410
--- /dev/null
+++ b/src/detours/disolia64.cpp
@@ -0,0 +1,2 @@
+#define DETOURS_IA64_OFFLINE_LIBRARY
+#include "disasm.cpp"
diff --git a/src/detours/disolx64.cpp b/src/detours/disolx64.cpp
new file mode 100644
index 0000000..cd05a00
--- /dev/null
+++ b/src/detours/disolx64.cpp
@@ -0,0 +1,2 @@
+#define DETOURS_X64_OFFLINE_LIBRARY
+#include "disasm.cpp"
diff --git a/src/detours/disolx86.cpp b/src/detours/disolx86.cpp
new file mode 100644
index 0000000..91ff7d9
--- /dev/null
+++ b/src/detours/disolx86.cpp
@@ -0,0 +1,2 @@
+#define DETOURS_X86_OFFLINE_LIBRARY
+#include "disasm.cpp"
diff --git a/src/detours/image.cpp b/src/detours/image.cpp
new file mode 100644
index 0000000..0219b72
--- /dev/null
+++ b/src/detours/image.cpp
@@ -0,0 +1,2247 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Image manipulation functions (image.cpp of detours.lib)
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+//  Used for for payloads, byways, and imports.
+//
+
+#if _MSC_VER >= 1900
+#pragma warning(push)
+#pragma warning(disable:4091) // empty typedef
+#endif
+#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1
+#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1
+#include <windows.h>
+#if _MSC_VER >= 1310
+#pragma warning(push)
+#if _MSC_VER > 1400
+#pragma warning(disable:6102 6103) // /analyze warnings
+#endif
+#include <strsafe.h>
+#pragma warning(pop)
+#endif
+
+#if (_MSC_VER < 1299)
+#pragma warning(disable: 4710)
+#endif
+
+// #define DETOUR_DEBUG 1
+#define DETOURS_INTERNAL
+
+#include "detours.h"
+
+#if DETOURS_VERSION != 0x4c0c1   // 0xMAJORcMINORcPATCH
+#error detours.h version mismatch
+#endif
+
+#if _MSC_VER >= 1900
+#pragma warning(pop)
+#endif
+
+namespace Detour
+{
+//////////////////////////////////////////////////////////////////////////////
+//
+#ifndef _STRSAFE_H_INCLUDED_
+_Must_inspect_result_
+static inline HRESULT StringCchLengthA(
+    _In_reads_or_z_(cchMax) LPCSTR psz,
+    _In_
+    _In_range_(1, STRSAFE_MAX_CCH) size_t cchMax,
+    _Out_opt_
+    _Deref_out_range_(<, cchMax)
+    _Deref_out_range_(<=, _String_length_(psz))
+    _Out_ size_t* pcch)
+{
+    HRESULT hr = S_OK;
+    size_t cchMaxPrev = cchMax;
+
+    if (cchMax > 2147483647) {
+        return ERROR_INVALID_PARAMETER;
+    }
+
+    while (cchMax && (*psz != '\0')) {
+        psz++;
+        cchMax--;
+    }
+
+    if (cchMax == 0) {
+        // the string is longer than cchMax
+        hr = ERROR_INVALID_PARAMETER;
+    }
+
+    if (SUCCEEDED(hr) && pcch) {
+        *pcch = cchMaxPrev - cchMax;
+    }
+
+    return hr;
+}
+
+_Must_inspect_result_
+static inline HRESULT StringCchCopyA(
+    _Out_writes_(cchDest) _Always_(_Post_z_) LPSTR pszDest,
+    _In_ size_t cchDest,
+    _In_ LPCSTR pszSrc)
+{
+    HRESULT hr = S_OK;
+
+    if (cchDest == 0) {
+        // can not null terminate a zero-byte dest buffer
+        hr = ERROR_INVALID_PARAMETER;
+    }
+    else {
+        while (cchDest && (*pszSrc != '\0')) {
+            *pszDest++ = *pszSrc++;
+            cchDest--;
+        }
+
+        if (cchDest == 0) {
+            // we are going to truncate pszDest
+            pszDest--;
+            hr = ERROR_INVALID_PARAMETER;
+        }
+
+        *pszDest= '\0';
+    }
+
+    return hr;
+}
+
+_Must_inspect_result_
+static inline HRESULT StringCchCatA(
+    _Out_writes_(cchDest) _Always_(_Post_z_) LPSTR pszDest,
+    _In_ size_t cchDest,
+    _In_ LPCSTR pszSrc)
+{
+    HRESULT hr;
+    size_t cchDestCurrent;
+
+    if (cchDest > 2147483647){
+        return ERROR_INVALID_PARAMETER;
+    }
+
+    hr = StringCchLengthA(pszDest, cchDest, &cchDestCurrent);
+
+    if (SUCCEEDED(hr) && cchDestCurrent < cchDest) {
+        hr = StringCchCopyA(pszDest + cchDestCurrent,
+                            cchDest - cchDestCurrent,
+                            pszSrc);
+    }
+
+    return hr;
+}
+
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+//
+class CImageData
+{
+    friend class CImage;
+
+public:
+    CImageData(PBYTE pbData, DWORD cbData);
+    ~CImageData();
+
+    PBYTE                   Enumerate(GUID *pGuid, DWORD *pcbData, DWORD *pnIterator);
+    PBYTE                   Find(REFGUID rguid, DWORD *pcbData);
+    PBYTE                   Set(REFGUID rguid, PBYTE pbData, DWORD cbData);
+
+    BOOL                    Delete(REFGUID rguid);
+    BOOL                    Purge();
+
+    BOOL                    IsEmpty()           { return m_cbData == 0; }
+    BOOL                    IsValid();
+
+protected:
+    BOOL                    SizeTo(DWORD cbData);
+
+protected:
+    _Field_size_(m_cbAlloc)
+    PBYTE                   m_pbData;
+    DWORD                   m_cbData;
+    DWORD                   m_cbAlloc;
+};
+
+class CImageImportFile
+{
+    friend class CImage;
+    friend class CImageImportName;
+
+public:
+    CImageImportFile();
+    ~CImageImportFile();
+
+public:
+    CImageImportFile *      m_pNextFile;
+    BOOL                    m_fByway;
+
+    _Field_size_(m_nImportNames)
+    CImageImportName *      m_pImportNames;
+    DWORD                   m_nImportNames;
+
+    DWORD                   m_rvaOriginalFirstThunk;
+    DWORD                   m_rvaFirstThunk;
+
+    DWORD                   m_nForwarderChain;
+    LPCSTR                  m_pszOrig;
+    LPCSTR                  m_pszName;
+};
+
+class CImageImportName
+{
+    friend class CImage;
+    friend class CImageImportFile;
+
+public:
+    CImageImportName();
+    ~CImageImportName();
+
+public:
+    WORD        m_nHint;
+    ULONG       m_nOrig;
+    ULONG       m_nOrdinal;
+    LPCSTR      m_pszOrig;
+    LPCSTR      m_pszName;
+};
+
+class CImage
+{
+    friend class CImageThunks;
+    friend class CImageChars;
+    friend class CImageImportFile;
+    friend class CImageImportName;
+
+public:
+    CImage();
+    ~CImage();
+
+    static CImage *         IsValid(PDETOUR_BINARY pBinary);
+
+public:                                                 // File Functions
+    BOOL                    Read(HANDLE hFile);
+    BOOL                    Write(HANDLE hFile);
+    BOOL                    Close();
+
+public:                                                 // Manipulation Functions
+    PBYTE                   DataEnum(GUID *pGuid, DWORD *pcbData, DWORD *pnIterator);
+    PBYTE                   DataFind(REFGUID rguid, DWORD *pcbData);
+    PBYTE                   DataSet(REFGUID rguid, PBYTE pbData, DWORD cbData);
+    BOOL                    DataDelete(REFGUID rguid);
+    BOOL                    DataPurge();
+
+    BOOL                    EditImports(PVOID pContext,
+                                        PF_DETOUR_BINARY_BYWAY_CALLBACK pfBywayCallback,
+                                        PF_DETOUR_BINARY_FILE_CALLBACK pfFileCallback,
+                                        PF_DETOUR_BINARY_SYMBOL_CALLBACK pfSymbolCallback,
+                                        PF_DETOUR_BINARY_COMMIT_CALLBACK pfCommitCallback);
+
+protected:
+    BOOL                    WriteFile(HANDLE hFile,
+                                      LPCVOID lpBuffer,
+                                      DWORD nNumberOfBytesToWrite,
+                                      LPDWORD lpNumberOfBytesWritten);
+    BOOL                    CopyFileData(HANDLE hFile, DWORD nOldPos, DWORD cbData);
+    BOOL                    ZeroFileData(HANDLE hFile, DWORD cbData);
+    BOOL                    AlignFileData(HANDLE hFile);
+
+    BOOL                    SizeOutputBuffer(DWORD cbData);
+    PBYTE                   AllocateOutput(DWORD cbData, DWORD *pnVirtAddr);
+
+    PVOID                   RvaToVa(ULONG_PTR nRva);
+    DWORD                   RvaToFileOffset(DWORD nRva);
+
+    DWORD                   FileAlign(DWORD nAddr);
+    DWORD                   SectionAlign(DWORD nAddr);
+
+    BOOL                    CheckImportsNeeded(DWORD *pnTables,
+                                               DWORD *pnThunks,
+                                               DWORD *pnChars);
+
+    CImageImportFile *      NewByway(_In_ LPCSTR pszName);
+
+private:
+    DWORD                   m_dwValidSignature;
+    CImageData *            m_pImageData;               // Read & Write
+
+    HANDLE                  m_hMap;                     // Read & Write
+    PBYTE                   m_pMap;                     // Read & Write
+
+    DWORD                   m_nNextFileAddr;            // Write
+    DWORD                   m_nNextVirtAddr;            // Write
+
+    IMAGE_DOS_HEADER        m_DosHeader;                // Read & Write
+    IMAGE_NT_HEADERS        m_NtHeader;                 // Read & Write
+    IMAGE_SECTION_HEADER    m_SectionHeaders[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
+
+    DWORD                   m_nPrePE;
+    DWORD                   m_cbPrePE;
+    DWORD                   m_cbPostPE;
+
+    DWORD                   m_nPeOffset;
+    DWORD                   m_nSectionsOffset;
+    DWORD                   m_nExtraOffset;
+    DWORD                   m_nFileSize;
+
+    DWORD                   m_nOutputVirtAddr;
+    DWORD                   m_nOutputVirtSize;
+    DWORD                   m_nOutputFileAddr;
+
+    _Field_size_(m_cbOutputBuffer)
+    PBYTE                   m_pbOutputBuffer;
+    DWORD                   m_cbOutputBuffer;
+
+    CImageImportFile *      m_pImportFiles;
+    DWORD                   m_nImportFiles;
+
+    BOOL                    m_fHadDetourSection;
+
+private:
+    enum {
+        DETOUR_IMAGE_VALID_SIGNATURE = 0xfedcba01,      // "Dtr\0"
+    };
+};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+static BYTE s_rbDosCode[0x10] = {
+    0x0E,0x1F,0xBA,0x0E,0x00,0xB4,0x09,0xCD,
+    0x21,0xB8,0x01,0x4C,0xCD,0x21,'*','*'
+};
+
+static inline DWORD Max(DWORD a, DWORD b)
+{
+    return a > b ? a : b;
+}
+
+static inline DWORD Align(DWORD a, DWORD size)
+{
+    size--;
+    return (a + size) & ~size;
+}
+
+static inline DWORD QuadAlign(DWORD a)
+{
+    return Align(a, 8);
+}
+
+static LPCSTR DuplicateString(_In_ LPCSTR pszIn)
+{
+    if (pszIn == NULL) {
+        return NULL;
+    }
+
+    size_t cch;
+    HRESULT hr = StringCchLengthA(pszIn, 8192, &cch);
+    if (FAILED(hr)) {
+        SetLastError(ERROR_INVALID_PARAMETER);
+        return NULL;
+    }
+
+    PCHAR pszOut = new NOTHROW CHAR [cch + 1];
+    if (pszOut == NULL) {
+        SetLastError(ERROR_OUTOFMEMORY);
+        return NULL;
+    }
+
+    hr = StringCchCopyA(pszOut, cch + 1, pszIn);
+    if (FAILED(hr)) {
+        delete[] pszOut;
+        return NULL;
+    }
+
+    return pszOut;
+}
+
+static VOID ReleaseString(_In_opt_ LPCSTR psz)
+{
+    if (psz != NULL) {
+        delete[] psz;
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+CImageImportFile::CImageImportFile()
+{
+    m_pNextFile = NULL;
+    m_fByway = FALSE;
+
+    m_pImportNames = NULL;
+    m_nImportNames = 0;
+
+    m_rvaOriginalFirstThunk = 0;
+    m_rvaFirstThunk = 0;
+
+    m_nForwarderChain = (UINT)0;
+    m_pszName = NULL;
+    m_pszOrig = NULL;
+}
+
+CImageImportFile::~CImageImportFile()
+{
+    if (m_pNextFile) {
+        delete m_pNextFile;
+        m_pNextFile = NULL;
+    }
+    if (m_pImportNames) {
+        delete[] m_pImportNames;
+        m_pImportNames = NULL;
+        m_nImportNames = 0;
+    }
+    if (m_pszName) {
+        delete[] m_pszName;
+        m_pszName = NULL;
+    }
+    if (m_pszOrig) {
+        delete[] m_pszOrig;
+        m_pszOrig = NULL;
+    }
+}
+
+CImageImportName::CImageImportName()
+{
+    m_nOrig = 0;
+    m_nOrdinal = 0;
+    m_nHint = 0;
+    m_pszName = NULL;
+    m_pszOrig = NULL;
+}
+
+CImageImportName::~CImageImportName()
+{
+    if (m_pszName) {
+        delete[] m_pszName;
+        m_pszName = NULL;
+    }
+    if (m_pszOrig) {
+        delete[] m_pszOrig;
+        m_pszOrig = NULL;
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+CImageData::CImageData(PBYTE pbData, DWORD cbData)
+{
+    m_pbData = pbData;
+    m_cbData = cbData;
+    m_cbAlloc = 0;
+}
+
+CImageData::~CImageData()
+{
+    IsValid();
+
+    if (m_cbAlloc == 0) {
+        m_pbData = NULL;
+    }
+    if (m_pbData) {
+        delete[] m_pbData;
+        m_pbData = NULL;
+    }
+    m_cbData = 0;
+    m_cbAlloc = 0;
+}
+
+BOOL CImageData::SizeTo(DWORD cbData)
+{
+    IsValid();
+
+    if (cbData <= m_cbAlloc) {
+        return TRUE;
+    }
+
+    PBYTE pbNew = new NOTHROW BYTE [cbData];
+    if (pbNew == NULL) {
+        SetLastError(ERROR_OUTOFMEMORY);
+        return FALSE;
+    }
+
+    if (m_pbData) {
+        CopyMemory(pbNew, m_pbData, m_cbData);
+        if (m_cbAlloc > 0) {
+            delete[] m_pbData;
+        }
+        m_pbData = NULL;
+    }
+    m_pbData = pbNew;
+    m_cbAlloc = cbData;
+
+    IsValid();
+
+    return TRUE;
+}
+
+BOOL CImageData::Purge()
+{
+    m_cbData = 0;
+
+    IsValid();
+
+    return TRUE;
+}
+
+BOOL CImageData::IsValid()
+{
+    if (m_pbData == NULL) {
+        return TRUE;
+    }
+
+    PBYTE pbBeg = m_pbData;
+    PBYTE pbEnd = m_pbData + m_cbData;
+
+    for (PBYTE pbIter = pbBeg; pbIter < pbEnd;) {
+        PDETOUR_SECTION_RECORD pRecord = (PDETOUR_SECTION_RECORD)pbIter;
+
+        if (pRecord->cbBytes < sizeof(DETOUR_SECTION_RECORD)) {
+            return FALSE;
+        }
+        if (pRecord->nReserved != 0) {
+            return FALSE;
+        }
+
+        pbIter += pRecord->cbBytes;
+    }
+    return TRUE;
+}
+
+PBYTE CImageData::Enumerate(GUID *pGuid, DWORD *pcbData, DWORD *pnIterator)
+{
+    IsValid();
+
+    if (pnIterator == NULL ||
+        m_cbData < *pnIterator + sizeof(DETOUR_SECTION_RECORD)) {
+
+        if (pcbData) {
+            *pcbData = 0;
+        }
+        if (pGuid) {
+            ZeroMemory(pGuid, sizeof(*pGuid));
+        }
+        return NULL;
+    }
+
+    PDETOUR_SECTION_RECORD pRecord = (PDETOUR_SECTION_RECORD)(m_pbData + *pnIterator);
+
+    if (pGuid) {
+        *pGuid = pRecord->guid;
+    }
+    if (pcbData) {
+        *pcbData = pRecord->cbBytes - sizeof(DETOUR_SECTION_RECORD);
+    }
+    *pnIterator = (LONG)(((PBYTE)pRecord - m_pbData) + pRecord->cbBytes);
+
+    return (PBYTE)(pRecord + 1);
+}
+
+PBYTE CImageData::Find(REFGUID rguid, DWORD *pcbData)
+{
+    IsValid();
+
+    DWORD cbBytes = sizeof(DETOUR_SECTION_RECORD);
+    for (DWORD nOffset = 0; nOffset < m_cbData; nOffset += cbBytes) {
+        PDETOUR_SECTION_RECORD pRecord = (PDETOUR_SECTION_RECORD)(m_pbData + nOffset);
+
+        cbBytes = pRecord->cbBytes;
+        if (cbBytes > m_cbData) {
+            break;
+        }
+        if (cbBytes < sizeof(DETOUR_SECTION_RECORD)) {
+            continue;
+        }
+
+        if (pRecord->guid.Data1 == rguid.Data1 &&
+            pRecord->guid.Data2 == rguid.Data2 &&
+            pRecord->guid.Data3 == rguid.Data3 &&
+            pRecord->guid.Data4[0] == rguid.Data4[0] &&
+            pRecord->guid.Data4[1] == rguid.Data4[1] &&
+            pRecord->guid.Data4[2] == rguid.Data4[2] &&
+            pRecord->guid.Data4[3] == rguid.Data4[3] &&
+            pRecord->guid.Data4[4] == rguid.Data4[4] &&
+            pRecord->guid.Data4[5] == rguid.Data4[5] &&
+            pRecord->guid.Data4[6] == rguid.Data4[6] &&
+            pRecord->guid.Data4[7] == rguid.Data4[7]) {
+
+            *pcbData = cbBytes - sizeof(DETOUR_SECTION_RECORD);
+            return (PBYTE)(pRecord + 1);
+        }
+    }
+
+    if (pcbData) {
+        *pcbData = 0;
+    }
+    return NULL;
+}
+
+BOOL CImageData::Delete(REFGUID rguid)
+{
+    IsValid();
+
+    PBYTE pbFound = NULL;
+    DWORD cbFound = 0;
+
+    pbFound = Find(rguid, &cbFound);
+    if (pbFound == NULL) {
+        SetLastError(ERROR_MOD_NOT_FOUND);
+        return FALSE;
+    }
+
+    pbFound -= sizeof(DETOUR_SECTION_RECORD);
+    cbFound += sizeof(DETOUR_SECTION_RECORD);
+
+    PBYTE pbRestData = pbFound + cbFound;
+    DWORD cbRestData = m_cbData - (LONG)(pbRestData - m_pbData);
+
+    if (cbRestData) {
+        MoveMemory(pbFound, pbRestData, cbRestData);
+    }
+    m_cbData -= cbFound;
+
+    IsValid();
+    return TRUE;
+}
+
+PBYTE CImageData::Set(REFGUID rguid, PBYTE pbData, DWORD cbData)
+{
+    IsValid();
+    Delete(rguid);
+
+    DWORD cbAlloc = QuadAlign(cbData);
+
+    if (!SizeTo(m_cbData + cbAlloc + sizeof(DETOUR_SECTION_RECORD))) {
+        return NULL;
+    }
+
+    PDETOUR_SECTION_RECORD pRecord = (PDETOUR_SECTION_RECORD)(m_pbData + m_cbData);
+    pRecord->cbBytes = cbAlloc + sizeof(DETOUR_SECTION_RECORD);
+    pRecord->nReserved = 0;
+    pRecord->guid = rguid;
+
+    PBYTE pbDest = (PBYTE)(pRecord + 1);
+    if (pbData) {
+        CopyMemory(pbDest, pbData, cbData);
+        if (cbData < cbAlloc) {
+            ZeroMemory(pbDest + cbData, cbAlloc - cbData);
+        }
+    }
+    else {
+        if (cbAlloc > 0) {
+            ZeroMemory(pbDest, cbAlloc);
+        }
+    }
+
+    m_cbData += cbAlloc + sizeof(DETOUR_SECTION_RECORD);
+
+    IsValid();
+    return pbDest;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+class CImageThunks
+{
+private:
+    CImage *            m_pImage;
+    PIMAGE_THUNK_DATA   m_pThunks;
+    DWORD               m_nThunks;
+    DWORD               m_nThunksMax;
+    DWORD               m_nThunkVirtAddr;
+
+public:
+    CImageThunks(CImage *pImage, DWORD nThunksMax, DWORD *pnAddr)
+    {
+        m_pImage = pImage;
+        m_nThunks = 0;
+        m_nThunksMax = nThunksMax;
+        m_pThunks = (PIMAGE_THUNK_DATA)
+            m_pImage->AllocateOutput(sizeof(IMAGE_THUNK_DATA) * nThunksMax,
+                                     &m_nThunkVirtAddr);
+        *pnAddr = m_nThunkVirtAddr;
+    }
+
+    PIMAGE_THUNK_DATA Current(DWORD *pnVirtAddr)
+    {
+        if (m_nThunksMax > 1) {
+            *pnVirtAddr = m_nThunkVirtAddr;
+            return m_pThunks;
+        }
+        *pnVirtAddr = 0;
+        return NULL;
+    }
+
+    PIMAGE_THUNK_DATA Allocate(ULONG_PTR nData, DWORD *pnVirtAddr)
+    {
+        if (m_nThunks < m_nThunksMax) {
+            *pnVirtAddr = m_nThunkVirtAddr;
+
+            m_nThunks++;
+            m_nThunkVirtAddr += sizeof(IMAGE_THUNK_DATA);
+            m_pThunks->u1.Ordinal = nData;
+            return m_pThunks++;
+        }
+        *pnVirtAddr = 0;
+        return NULL;
+    }
+
+    DWORD   Size()
+    {
+        return m_nThunksMax * sizeof(IMAGE_THUNK_DATA);
+    }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+class CImageChars
+{
+private:
+    CImage *        m_pImage;
+    PCHAR           m_pChars;
+    DWORD           m_nChars;
+    DWORD           m_nCharsMax;
+    DWORD           m_nCharVirtAddr;
+
+public:
+    CImageChars(CImage *pImage, _In_ DWORD nCharsMax, _Out_ DWORD *pnAddr)
+    {
+        m_pImage = pImage;
+        m_nChars = 0;
+        m_nCharsMax = nCharsMax;
+        m_pChars = (PCHAR)m_pImage->AllocateOutput(nCharsMax, &m_nCharVirtAddr);
+        *pnAddr = m_nCharVirtAddr;
+    }
+
+    LPCSTR Allocate(_In_ LPCSTR pszString, _Out_ DWORD *pnVirtAddr)
+    {
+        DWORD nLen = (DWORD)strlen(pszString) + 1;
+        nLen += (nLen & 1);
+
+        if (m_nChars + nLen > m_nCharsMax) {
+            *pnVirtAddr = 0;
+            return NULL;
+        }
+
+        *pnVirtAddr = m_nCharVirtAddr;
+        HRESULT hrRet = StringCchCopyA(m_pChars, m_nCharsMax, pszString);
+
+        if (FAILED(hrRet)) {
+            return NULL;
+        }
+
+        pszString = m_pChars;
+
+        m_pChars += nLen;
+        m_nChars += nLen;
+        m_nCharVirtAddr += nLen;
+
+        return pszString;
+    }
+
+    LPCSTR Allocate(_In_ LPCSTR pszString, _In_ DWORD nHint, _Out_ DWORD *pnVirtAddr)
+    {
+        DWORD nLen = (DWORD)strlen(pszString) + 1 + sizeof(USHORT);
+        nLen += (nLen & 1);
+
+        if (m_nChars + nLen > m_nCharsMax) {
+            *pnVirtAddr = 0;
+            return NULL;
+        }
+
+        *pnVirtAddr = m_nCharVirtAddr;
+        *(USHORT *)m_pChars = (USHORT)nHint;
+
+        HRESULT hrRet = StringCchCopyA(m_pChars + sizeof(USHORT), m_nCharsMax, pszString);
+        if (FAILED(hrRet)) {
+            return NULL;
+        }
+
+        pszString = m_pChars + sizeof(USHORT);
+
+        m_pChars += nLen;
+        m_nChars += nLen;
+        m_nCharVirtAddr += nLen;
+
+        return pszString;
+    }
+
+    DWORD Size()
+    {
+        return m_nChars;
+    }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+CImage * CImage::IsValid(PDETOUR_BINARY pBinary)
+{
+    if (pBinary) {
+        CImage *pImage = (CImage *)pBinary;
+
+        if (pImage->m_dwValidSignature == DETOUR_IMAGE_VALID_SIGNATURE) {
+            return pImage;
+        }
+    }
+    SetLastError(ERROR_INVALID_HANDLE);
+    return NULL;
+}
+
+CImage::CImage()
+{
+    m_dwValidSignature = (DWORD)DETOUR_IMAGE_VALID_SIGNATURE;
+
+    m_hMap = NULL;
+    m_pMap = NULL;
+
+    m_nPeOffset = 0;
+    m_nSectionsOffset = 0;
+
+    m_pbOutputBuffer = NULL;
+    m_cbOutputBuffer = 0;
+
+    m_pImageData = NULL;
+
+    m_pImportFiles = NULL;
+    m_nImportFiles = 0;
+
+    m_fHadDetourSection = FALSE;
+}
+
+CImage::~CImage()
+{
+    Close();
+    m_dwValidSignature = 0;
+}
+
+BOOL CImage::Close()
+{
+    if (m_pImportFiles) {
+        delete m_pImportFiles;
+        m_pImportFiles = NULL;
+        m_nImportFiles = 0;
+    }
+
+    if (m_pImageData) {
+        delete m_pImageData;
+        m_pImageData = NULL;
+    }
+
+    if (m_pMap != NULL) {
+        UnmapViewOfFile(m_pMap);
+        m_pMap = NULL;
+    }
+
+    if (m_hMap) {
+        CloseHandle(m_hMap);
+        m_hMap = NULL;
+    }
+
+    if (m_pbOutputBuffer) {
+        delete[] m_pbOutputBuffer;
+        m_pbOutputBuffer = NULL;
+        m_cbOutputBuffer = 0;
+    }
+    return TRUE;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+PBYTE CImage::DataEnum(GUID *pGuid, DWORD *pcbData, DWORD *pnIterator)
+{
+    if (m_pImageData == NULL) {
+        return NULL;
+    }
+    return m_pImageData->Enumerate(pGuid, pcbData, pnIterator);
+}
+
+PBYTE CImage::DataFind(REFGUID rguid, DWORD *pcbData)
+{
+    if (m_pImageData == NULL) {
+        return NULL;
+    }
+    return m_pImageData->Find(rguid, pcbData);
+}
+
+PBYTE CImage::DataSet(REFGUID rguid, PBYTE pbData, DWORD cbData)
+{
+    if (m_pImageData == NULL) {
+        return NULL;
+    }
+    return m_pImageData->Set(rguid, pbData, cbData);
+}
+
+BOOL CImage::DataDelete(REFGUID rguid)
+{
+    if (m_pImageData == NULL) {
+        return FALSE;
+    }
+    return m_pImageData->Delete(rguid);
+}
+
+BOOL CImage::DataPurge()
+{
+    if (m_pImageData == NULL) {
+        return TRUE;
+    }
+    return m_pImageData->Purge();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+BOOL CImage::SizeOutputBuffer(DWORD cbData)
+{
+    if (m_cbOutputBuffer < cbData) {
+        if (cbData < 1024) {//65536
+            cbData = 1024;
+        }
+        cbData = FileAlign(cbData);
+
+        PBYTE pOutput = new NOTHROW BYTE [cbData];
+        if (pOutput == NULL) {
+            SetLastError(ERROR_OUTOFMEMORY);
+            return FALSE;
+        }
+
+        if (m_pbOutputBuffer) {
+            CopyMemory(pOutput, m_pbOutputBuffer, m_cbOutputBuffer);
+
+            delete[] m_pbOutputBuffer;
+            m_pbOutputBuffer = NULL;
+        }
+
+        ZeroMemory(pOutput + m_cbOutputBuffer, cbData - m_cbOutputBuffer),
+
+        m_pbOutputBuffer = pOutput;
+        m_cbOutputBuffer = cbData;
+    }
+    return TRUE;
+}
+
+PBYTE CImage::AllocateOutput(DWORD cbData, DWORD *pnVirtAddr)
+{
+    cbData = QuadAlign(cbData);
+
+    PBYTE pbData = m_pbOutputBuffer + m_nOutputVirtSize;
+
+    *pnVirtAddr = m_nOutputVirtAddr + m_nOutputVirtSize;
+    m_nOutputVirtSize += cbData;
+
+    if (m_nOutputVirtSize > m_cbOutputBuffer) {
+        SetLastError(ERROR_OUTOFMEMORY);
+        return NULL;
+    }
+
+    ZeroMemory(pbData, cbData);
+
+    return pbData;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+DWORD CImage::FileAlign(DWORD nAddr)
+{
+    return Align(nAddr, m_NtHeader.OptionalHeader.FileAlignment);
+}
+
+DWORD CImage::SectionAlign(DWORD nAddr)
+{
+    return Align(nAddr, m_NtHeader.OptionalHeader.SectionAlignment);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+PVOID CImage::RvaToVa(ULONG_PTR nRva)
+{
+    if (nRva == 0) {
+        return NULL;
+    }
+
+    for (DWORD n = 0; n < m_NtHeader.FileHeader.NumberOfSections; n++) {
+        DWORD vaStart = m_SectionHeaders[n].VirtualAddress;
+        DWORD vaEnd = vaStart + m_SectionHeaders[n].SizeOfRawData;
+
+        if (nRva >= vaStart && nRva < vaEnd) {
+            return (PBYTE)m_pMap
+                + m_SectionHeaders[n].PointerToRawData
+                + nRva - m_SectionHeaders[n].VirtualAddress;
+        }
+    }
+    return NULL;
+}
+
+DWORD CImage::RvaToFileOffset(DWORD nRva)
+{
+    DWORD n;
+    for (n = 0; n < m_NtHeader.FileHeader.NumberOfSections; n++) {
+        DWORD vaStart = m_SectionHeaders[n].VirtualAddress;
+        DWORD vaEnd = vaStart + m_SectionHeaders[n].SizeOfRawData;
+
+        if (nRva >= vaStart && nRva < vaEnd) {
+            return m_SectionHeaders[n].PointerToRawData
+                + nRva - m_SectionHeaders[n].VirtualAddress;
+        }
+    }
+    return 0;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+BOOL CImage::WriteFile(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite,
+                       LPDWORD lpNumberOfBytesWritten)
+{
+    return ::WriteFile(hFile,
+                       lpBuffer,
+                       nNumberOfBytesToWrite,
+                       lpNumberOfBytesWritten,
+                       NULL);
+}
+
+
+BOOL CImage::CopyFileData(HANDLE hFile, DWORD nOldPos, DWORD cbData)
+{
+    DWORD cbDone = 0;
+    return WriteFile(hFile, m_pMap + nOldPos, cbData, &cbDone);
+}
+
+BOOL CImage::ZeroFileData(HANDLE hFile, DWORD cbData)
+{
+    if (!SizeOutputBuffer(4096)) {
+        return FALSE;
+    }
+
+    ZeroMemory(m_pbOutputBuffer, 4096);
+
+    for (DWORD cbLeft = cbData; cbLeft > 0;) {
+        DWORD cbStep = cbLeft > sizeof(m_pbOutputBuffer)
+            ? sizeof(m_pbOutputBuffer) : cbLeft;
+        DWORD cbDone = 0;
+
+        if (!WriteFile(hFile, m_pbOutputBuffer, cbStep, &cbDone)) {
+            return FALSE;
+        }
+        if (cbDone == 0) {
+            break;
+        }
+
+        cbLeft -= cbDone;
+    }
+    return TRUE;
+}
+
+BOOL CImage::AlignFileData(HANDLE hFile)
+{
+    DWORD nLastFileAddr = m_nNextFileAddr;
+
+    m_nNextFileAddr = FileAlign(m_nNextFileAddr);
+    m_nNextVirtAddr = SectionAlign(m_nNextVirtAddr);
+
+    if (hFile != INVALID_HANDLE_VALUE) {
+        if (m_nNextFileAddr > nLastFileAddr) {
+            if (SetFilePointer(hFile, nLastFileAddr, NULL, FILE_BEGIN) == ~0u) {
+                return FALSE;
+            }
+            return ZeroFileData(hFile, m_nNextFileAddr - nLastFileAddr);
+        }
+    }
+    return TRUE;
+}
+
+BOOL CImage::Read(HANDLE hFile)
+{
+    DWORD n;
+    PBYTE pbData = NULL;
+    DWORD cbData = 0;
+
+    if (hFile == INVALID_HANDLE_VALUE) {
+        SetLastError(ERROR_INVALID_HANDLE);
+        return FALSE;
+    }
+
+    ///////////////////////////////////////////////////////// Create mapping.
+    //
+    m_nFileSize = GetFileSize(hFile, NULL);
+    if (m_nFileSize == (DWORD)-1) {
+        return FALSE;
+    }
+
+    m_hMap = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (m_hMap == NULL) {
+        return FALSE;
+    }
+
+    m_pMap = (PBYTE)MapViewOfFileEx(m_hMap, FILE_MAP_READ, 0, 0, 0, NULL);
+    if (m_pMap == NULL) {
+        return FALSE;
+    }
+
+    ////////////////////////////////////////////////////// Process DOS Header.
+    //
+    PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)m_pMap;
+    if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+        SetLastError(ERROR_BAD_EXE_FORMAT);
+        return FALSE;
+    }
+    m_nPeOffset = pDosHeader->e_lfanew;
+    m_nPrePE = 0;
+    m_cbPrePE = QuadAlign(pDosHeader->e_lfanew);
+
+    if (m_nPeOffset > m_nFileSize ||
+        m_nPeOffset + sizeof(m_NtHeader) > m_nFileSize) {
+
+        SetLastError(ERROR_BAD_EXE_FORMAT);
+        return FALSE;
+    }
+
+    CopyMemory(&m_DosHeader, m_pMap + m_nPrePE, sizeof(m_DosHeader));
+
+    /////////////////////////////////////////////////////// Process PE Header.
+    //
+    CopyMemory(&m_NtHeader, m_pMap + m_nPeOffset, sizeof(m_NtHeader));
+    if (m_NtHeader.Signature != IMAGE_NT_SIGNATURE) {
+        SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+        return FALSE;
+    }
+    if (m_NtHeader.FileHeader.SizeOfOptionalHeader == 0) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return FALSE;
+    }
+    m_nSectionsOffset = m_nPeOffset
+        + sizeof(m_NtHeader.Signature)
+        + sizeof(m_NtHeader.FileHeader)
+        + m_NtHeader.FileHeader.SizeOfOptionalHeader;
+
+    ///////////////////////////////////////////////// Process Section Headers.
+    //
+    if (m_NtHeader.FileHeader.NumberOfSections > ARRAYSIZE(m_SectionHeaders)) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return FALSE;
+    }
+    CopyMemory(&m_SectionHeaders,
+               m_pMap + m_nSectionsOffset,
+               sizeof(m_SectionHeaders[0]) * m_NtHeader.FileHeader.NumberOfSections);
+
+    /////////////////////////////////////////////////// Parse .detour Section.
+    //
+    DWORD rvaOriginalImageDirectory = 0;
+    DWORD rvaDetourBeg = 0;
+    DWORD rvaDetourEnd = 0;
+
+    _Analysis_assume_(m_NtHeader.FileHeader.NumberOfSections <= ARRAYSIZE(m_SectionHeaders));
+
+    for (n = 0; n < m_NtHeader.FileHeader.NumberOfSections; n++) {
+        if (strcmp((PCHAR)m_SectionHeaders[n].Name, ".detour") == 0) {
+            DETOUR_SECTION_HEADER dh;
+            CopyMemory(&dh,
+                       m_pMap + m_SectionHeaders[n].PointerToRawData,
+                       sizeof(dh));
+
+            rvaOriginalImageDirectory = dh.nOriginalImportVirtualAddress;
+            if (dh.cbPrePE != 0) {
+                m_nPrePE = m_SectionHeaders[n].PointerToRawData + sizeof(dh);
+                m_cbPrePE = dh.cbPrePE;
+            }
+            rvaDetourBeg = m_SectionHeaders[n].VirtualAddress;
+            rvaDetourEnd = rvaDetourBeg + m_SectionHeaders[n].SizeOfRawData;
+        }
+    }
+
+    //////////////////////////////////////////////////////// Get Import Table.
+    //
+    DWORD rvaImageDirectory = m_NtHeader.OptionalHeader
+        .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress;
+    PIMAGE_IMPORT_DESCRIPTOR iidp
+        = (PIMAGE_IMPORT_DESCRIPTOR)RvaToVa(rvaImageDirectory);
+    PIMAGE_IMPORT_DESCRIPTOR oidp
+        = (PIMAGE_IMPORT_DESCRIPTOR)RvaToVa(rvaOriginalImageDirectory);
+
+    if (oidp == NULL) {
+        oidp = iidp;
+    }
+    if (iidp == NULL || oidp == NULL) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return FALSE;
+    }
+
+    DWORD nFiles = 0;
+    for (; iidp[nFiles].OriginalFirstThunk != 0 || iidp[nFiles].FirstThunk != 0; nFiles++) {
+    }
+
+    CImageImportFile **ppLastFile = &m_pImportFiles;
+    m_pImportFiles = NULL;
+
+    for (n = 0; n < nFiles; n++, iidp++) {
+        ULONG_PTR rvaName = iidp->Name;
+        PCHAR pszName = (PCHAR)RvaToVa(rvaName);
+        if (pszName == NULL) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            goto fail;
+        }
+
+        CImageImportFile *pImportFile = new NOTHROW CImageImportFile;
+        if (pImportFile == NULL) {
+            SetLastError(ERROR_OUTOFMEMORY);
+            goto fail;
+        }
+
+        *ppLastFile = pImportFile;
+        ppLastFile = &pImportFile->m_pNextFile;
+        m_nImportFiles++;
+
+        pImportFile->m_pszName = DuplicateString(pszName);
+        if (pImportFile->m_pszName == NULL) {
+            goto fail;
+        }
+
+        pImportFile->m_rvaOriginalFirstThunk = iidp->OriginalFirstThunk;
+        pImportFile->m_rvaFirstThunk = iidp->FirstThunk;
+        pImportFile->m_nForwarderChain = iidp->ForwarderChain;
+        pImportFile->m_pImportNames = NULL;
+        pImportFile->m_nImportNames = 0;
+        pImportFile->m_fByway = FALSE;
+
+        if ((ULONG)iidp->FirstThunk >= rvaDetourBeg &&
+            (ULONG)iidp->FirstThunk < rvaDetourEnd) {
+
+            pImportFile->m_pszOrig = NULL;
+            pImportFile->m_fByway = TRUE;
+            continue;
+        }
+
+        rvaName = oidp->Name;
+        pszName = (PCHAR)RvaToVa(rvaName);
+        if (pszName == NULL) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            goto fail;
+        }
+        pImportFile->m_pszOrig = DuplicateString(pszName);
+        if (pImportFile->m_pszOrig == NULL) {
+            goto fail;
+        }
+
+        DWORD rvaThunk = iidp->OriginalFirstThunk;
+        if( !rvaThunk ) {
+            rvaThunk = iidp->FirstThunk;
+        }
+        PIMAGE_THUNK_DATA pAddrThunk = (PIMAGE_THUNK_DATA)RvaToVa(rvaThunk);
+        rvaThunk = oidp->OriginalFirstThunk;
+        if( !rvaThunk ) {
+            rvaThunk = oidp->FirstThunk;
+        }
+        PIMAGE_THUNK_DATA pLookThunk = (PIMAGE_THUNK_DATA)RvaToVa(rvaThunk);
+
+        DWORD nNames = 0;
+        if (pAddrThunk) {
+            for (; pAddrThunk[nNames].u1.Ordinal; nNames++) {
+            }
+        }
+
+        if (pAddrThunk && nNames) {
+            pImportFile->m_nImportNames = nNames;
+            pImportFile->m_pImportNames = new NOTHROW CImageImportName [nNames];
+            if (pImportFile->m_pImportNames == NULL) {
+                SetLastError(ERROR_OUTOFMEMORY);
+                goto fail;
+            }
+
+            CImageImportName *pImportName = &pImportFile->m_pImportNames[0];
+
+            for (DWORD f = 0; f < nNames; f++, pImportName++) {
+                pImportName->m_nOrig = 0;
+                pImportName->m_nOrdinal = 0;
+                pImportName->m_nHint = 0;
+                pImportName->m_pszName = NULL;
+                pImportName->m_pszOrig = NULL;
+
+                rvaName = pAddrThunk[f].u1.Ordinal;
+                if (rvaName & IMAGE_ORDINAL_FLAG) {
+                    pImportName->m_nOrig = (ULONG)IMAGE_ORDINAL(rvaName);
+                    pImportName->m_nOrdinal = pImportName->m_nOrig;
+                }
+                else {
+                    PIMAGE_IMPORT_BY_NAME pName
+                        = (PIMAGE_IMPORT_BY_NAME)RvaToVa(rvaName);
+                    if (pName) {
+                        pImportName->m_nHint = pName->Hint;
+                        pImportName->m_pszName = DuplicateString((PCHAR)pName->Name);
+                        if (pImportName->m_pszName == NULL) {
+                            goto fail;
+                        }
+                    }
+
+                    rvaName = pLookThunk[f].u1.Ordinal;
+                    if (rvaName & IMAGE_ORDINAL_FLAG) {
+                        pImportName->m_nOrig = (ULONG)IMAGE_ORDINAL(rvaName);
+                        pImportName->m_nOrdinal = (ULONG)IMAGE_ORDINAL(rvaName);
+                    }
+                    else {
+                        pName = (PIMAGE_IMPORT_BY_NAME)RvaToVa(rvaName);
+                        if (pName) {
+                            pImportName->m_pszOrig
+                                = DuplicateString((PCHAR)pName->Name);
+                            if (pImportName->m_pszOrig == NULL) {
+                                goto fail;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        oidp++;
+    }
+
+    ////////////////////////////////////////////////////////// Parse Sections.
+    //
+    m_nExtraOffset = 0;
+    for (n = 0; n < m_NtHeader.FileHeader.NumberOfSections; n++) {
+        m_nExtraOffset = Max(m_SectionHeaders[n].PointerToRawData +
+                             m_SectionHeaders[n].SizeOfRawData,
+                             m_nExtraOffset);
+
+        if (strcmp((PCHAR)m_SectionHeaders[n].Name, ".detour") == 0) {
+            DETOUR_SECTION_HEADER dh;
+            CopyMemory(&dh,
+                       m_pMap + m_SectionHeaders[n].PointerToRawData,
+                       sizeof(dh));
+
+            if (dh.nDataOffset == 0) {
+                dh.nDataOffset = dh.cbHeaderSize;
+            }
+
+            cbData = dh.cbDataSize - dh.nDataOffset;
+            pbData = (m_pMap +
+                      m_SectionHeaders[n].PointerToRawData +
+                      dh.nDataOffset);
+
+            m_nExtraOffset = Max(m_SectionHeaders[n].PointerToRawData +
+                                 m_SectionHeaders[n].SizeOfRawData,
+                                 m_nExtraOffset);
+
+            m_NtHeader.FileHeader.NumberOfSections--;
+
+            m_NtHeader.OptionalHeader
+                .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress
+                = dh.nOriginalImportVirtualAddress;
+            m_NtHeader.OptionalHeader
+                .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size
+                = dh.nOriginalImportSize;
+
+            m_NtHeader.OptionalHeader
+                .DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT].VirtualAddress
+                = dh.nOriginalBoundImportVirtualAddress;
+            m_NtHeader.OptionalHeader
+                .DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT].Size
+                = dh.nOriginalBoundImportSize;
+
+            m_NtHeader.OptionalHeader
+                .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress
+                = dh.nOriginalIatVirtualAddress;
+            m_NtHeader.OptionalHeader
+                .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].Size
+                = dh.nOriginalIatSize;
+
+            m_NtHeader.OptionalHeader.CheckSum = 0;
+            m_NtHeader.OptionalHeader.SizeOfImage
+                = dh.nOriginalSizeOfImage;
+
+            m_fHadDetourSection = TRUE;
+        }
+    }
+
+    m_pImageData = new NOTHROW CImageData(pbData, cbData);
+    if (m_pImageData == NULL) {
+        SetLastError(ERROR_OUTOFMEMORY);
+    }
+    return TRUE;
+
+fail:
+    return FALSE;
+}
+
+static inline BOOL strneq(_In_ LPCSTR pszOne, _In_ LPCSTR pszTwo)
+{
+    if (pszOne == pszTwo) {
+        return FALSE;
+    }
+    if (!pszOne || !pszTwo) {
+        return TRUE;
+    }
+    return (strcmp(pszOne, pszTwo) != 0);
+}
+
+BOOL CImage::CheckImportsNeeded(DWORD *pnTables, DWORD *pnThunks, DWORD *pnChars)
+{
+    DWORD nTables = 0;
+    DWORD nThunks = 0;
+    DWORD nChars = 0;
+    BOOL fNeedDetourSection = FALSE;
+
+    for (CImageImportFile *pImportFile = m_pImportFiles;
+         pImportFile != NULL; pImportFile = pImportFile->m_pNextFile) {
+
+        nChars += (int)strlen(pImportFile->m_pszName) + 1;
+        nChars += nChars & 1;
+
+        if (pImportFile->m_fByway) {
+            fNeedDetourSection = TRUE;
+            nThunks++;
+        }
+        else {
+            if (!fNeedDetourSection &&
+                strneq(pImportFile->m_pszName, pImportFile->m_pszOrig)) {
+
+                fNeedDetourSection = TRUE;
+            }
+            for (DWORD n = 0; n < pImportFile->m_nImportNames; n++) {
+                CImageImportName *pImportName = &pImportFile->m_pImportNames[n];
+
+                if (!fNeedDetourSection &&
+                    strneq(pImportName->m_pszName, pImportName->m_pszOrig)) {
+
+                    fNeedDetourSection = TRUE;
+                }
+
+                if (pImportName->m_pszName) {
+                    nChars += sizeof(WORD);             // Hint
+                    nChars += (int)strlen(pImportName->m_pszName) + 1;
+                    nChars += nChars & 1;
+                }
+                nThunks++;
+            }
+        }
+        nThunks++;
+        nTables++;
+    }
+    nTables++;
+
+    *pnTables = nTables;
+    *pnThunks = nThunks;
+    *pnChars = nChars;
+
+    return fNeedDetourSection;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+CImageImportFile * CImage::NewByway(_In_ LPCSTR pszName)
+{
+    CImageImportFile *pImportFile = new NOTHROW CImageImportFile;
+    if (pImportFile == NULL) {
+        SetLastError(ERROR_OUTOFMEMORY);
+        goto fail;
+    }
+
+    pImportFile->m_pNextFile = NULL;
+    pImportFile->m_fByway = TRUE;
+
+    pImportFile->m_pszName = DuplicateString(pszName);
+    if (pImportFile->m_pszName == NULL) {
+        goto fail;
+    }
+
+    pImportFile->m_rvaOriginalFirstThunk = 0;
+    pImportFile->m_rvaFirstThunk = 0;
+    pImportFile->m_nForwarderChain = (UINT)0;
+    pImportFile->m_pImportNames = NULL;
+    pImportFile->m_nImportNames = 0;
+
+    m_nImportFiles++;
+    return pImportFile;
+
+fail:
+    if (pImportFile) {
+        delete pImportFile;
+        pImportFile = NULL;
+    }
+    return NULL;
+}
+
+BOOL CImage::EditImports(PVOID pContext,
+                         PF_DETOUR_BINARY_BYWAY_CALLBACK pfBywayCallback,
+                         PF_DETOUR_BINARY_FILE_CALLBACK pfFileCallback,
+                         PF_DETOUR_BINARY_SYMBOL_CALLBACK pfSymbolCallback,
+                         PF_DETOUR_BINARY_COMMIT_CALLBACK pfCommitCallback)
+{
+    CImageImportFile *pImportFile = NULL;
+    CImageImportFile **ppLastFile = &m_pImportFiles;
+
+    SetLastError(ERROR_CALL_NOT_IMPLEMENTED);
+
+    while ((pImportFile = *ppLastFile) != NULL) {
+
+        if (pfBywayCallback != NULL) {
+            LPCSTR pszFile = NULL;
+            if (!(*pfBywayCallback)(pContext, NULL, &pszFile)) {
+                goto fail;
+            }
+
+            if (pszFile != NULL) {
+                // Insert a new Byway.
+                CImageImportFile *pByway = NewByway(pszFile);
+                if (pByway == NULL) {
+                    return FALSE;
+                }
+
+                pByway->m_pNextFile = pImportFile;
+                *ppLastFile = pByway;
+                ppLastFile = &pByway->m_pNextFile;
+                continue;                               // Retry after Byway.
+            }
+        }
+
+        if (pImportFile->m_fByway) {
+            if (pfBywayCallback != NULL) {
+                LPCSTR pszFile = NULL;
+
+                if (!(*pfBywayCallback)(pContext, pImportFile->m_pszName, &pszFile)) {
+                    goto fail;
+                }
+
+                if (pszFile != NULL) {
+                    // Replace? Byway
+                    if (pszFile != pImportFile->m_pszName) {
+                        LPCSTR pszLast = pImportFile->m_pszName;
+                        pImportFile->m_pszName = DuplicateString(pszFile);
+                        ReleaseString(pszLast);
+
+                        if (pImportFile->m_pszName == NULL) {
+                            goto fail;
+                        }
+                    }
+                }
+                else {                                  // Delete Byway
+                    *ppLastFile = pImportFile->m_pNextFile;
+                    pImportFile->m_pNextFile = NULL;
+                    delete pImportFile;
+                    m_nImportFiles--;
+                    continue;                           // Retry after delete.
+                }
+            }
+        }
+        else {
+            if (pfFileCallback != NULL) {
+                LPCSTR pszFile = NULL;
+
+                if (!(*pfFileCallback)(pContext,
+                                       pImportFile->m_pszOrig,
+                                       pImportFile->m_pszName,
+                                       &pszFile)) {
+                    goto fail;
+                }
+
+                if (pszFile != NULL) {
+                    if (pszFile != pImportFile->m_pszName) {
+                        LPCSTR pszLast = pImportFile->m_pszName;
+                        pImportFile->m_pszName = DuplicateString(pszFile);
+                        ReleaseString(pszLast);
+
+                        if (pImportFile->m_pszName == NULL) {
+                            goto fail;
+                        }
+                    }
+                }
+            }
+
+            if (pfSymbolCallback != NULL) {
+                for (DWORD n = 0; n < pImportFile->m_nImportNames; n++) {
+                    CImageImportName *pImportName = &pImportFile->m_pImportNames[n];
+
+                    LPCSTR pszName = NULL;
+                    ULONG nOrdinal = 0;
+                    if (!(*pfSymbolCallback)(pContext,
+                                             pImportName->m_nOrig,
+                                             pImportName->m_nOrdinal,
+                                             &nOrdinal,
+                                             pImportName->m_pszOrig,
+                                             pImportName->m_pszName,
+                                             &pszName)) {
+                        goto fail;
+                    }
+
+                    if (pszName != NULL) {
+                        if (pszName != pImportName->m_pszName) {
+                            pImportName->m_nOrdinal = 0;
+
+                            LPCSTR pszLast = pImportName->m_pszName;
+                            pImportName->m_pszName = DuplicateString(pszName);
+                            ReleaseString(pszLast);
+
+                            if (pImportName->m_pszName == NULL) {
+                                goto fail;
+                            }
+                        }
+                    }
+                    else if (nOrdinal != 0) {
+                        pImportName->m_nOrdinal = nOrdinal;
+
+                        if (pImportName->m_pszName != NULL) {
+                            delete[] pImportName->m_pszName;
+                            pImportName->m_pszName = NULL;
+                        }
+                    }
+                }
+            }
+        }
+
+        ppLastFile = &pImportFile->m_pNextFile;
+        pImportFile = pImportFile->m_pNextFile;
+    }
+
+    for (;;) {
+        if (pfBywayCallback != NULL) {
+            LPCSTR pszFile = NULL;
+            if (!(*pfBywayCallback)(pContext, NULL, &pszFile)) {
+                goto fail;
+            }
+            if (pszFile != NULL) {
+                // Insert a new Byway.
+                CImageImportFile *pByway = NewByway(pszFile);
+                if (pByway == NULL) {
+                    return FALSE;
+                }
+
+                pByway->m_pNextFile = pImportFile;
+                *ppLastFile = pByway;
+                ppLastFile = &pByway->m_pNextFile;
+                continue;                               // Retry after Byway.
+            }
+        }
+        break;
+    }
+
+    if (pfCommitCallback != NULL) {
+        if (!(*pfCommitCallback)(pContext)) {
+            goto fail;
+        }
+    }
+
+    SetLastError(NO_ERROR);
+    return TRUE;
+
+  fail:
+    return FALSE;
+}
+
+BOOL CImage::Write(HANDLE hFile)
+{
+    DWORD cbDone;
+
+    if (hFile == INVALID_HANDLE_VALUE) {
+        SetLastError(ERROR_INVALID_HANDLE);
+        return FALSE;
+    }
+
+    m_nNextFileAddr = 0;
+    m_nNextVirtAddr = 0;
+
+    DWORD nTables = 0;
+    DWORD nThunks = 0;
+    DWORD nChars = 0;
+    BOOL fNeedDetourSection = CheckImportsNeeded(&nTables, &nThunks, &nChars);
+
+    //////////////////////////////////////////////////////////// Copy Headers.
+    //
+    if (SetFilePointer(hFile, 0, NULL, FILE_BEGIN) == ~0u) {
+        return FALSE;
+    }
+    if (!CopyFileData(hFile, 0, m_NtHeader.OptionalHeader.SizeOfHeaders)) {
+        return FALSE;
+    }
+
+    if (fNeedDetourSection || !m_pImageData->IsEmpty()) {
+        // Replace the file's DOS header with our own.
+        m_nPeOffset = sizeof(m_DosHeader) + sizeof(s_rbDosCode);
+        m_nSectionsOffset = m_nPeOffset
+            + sizeof(m_NtHeader.Signature)
+            + sizeof(m_NtHeader.FileHeader)
+            + m_NtHeader.FileHeader.SizeOfOptionalHeader;
+        m_DosHeader.e_lfanew = m_nPeOffset;
+
+        if (SetFilePointer(hFile, 0, NULL, FILE_BEGIN) == ~0u) {
+            return FALSE;
+        }
+        if (!WriteFile(hFile, &m_DosHeader, sizeof(m_DosHeader), &cbDone)) {
+            return FALSE;
+        }
+        if (!WriteFile(hFile, &s_rbDosCode, sizeof(s_rbDosCode), &cbDone)) {
+            return FALSE;
+        }
+    }
+    else {
+        // Restore the file's original DOS header.
+        if (m_nPrePE != 0) {
+            m_nPeOffset = m_cbPrePE;
+            m_nSectionsOffset = m_nPeOffset
+                + sizeof(m_NtHeader.Signature)
+                + sizeof(m_NtHeader.FileHeader)
+                + m_NtHeader.FileHeader.SizeOfOptionalHeader;
+            m_DosHeader.e_lfanew = m_nPeOffset;
+
+
+            if (SetFilePointer(hFile, 0, NULL, FILE_BEGIN) == ~0u) {
+                return FALSE;
+            }
+            if (!CopyFileData(hFile, m_nPrePE, m_cbPrePE)) {
+                return FALSE;
+            }
+        }
+    }
+
+    m_nNextFileAddr = m_NtHeader.OptionalHeader.SizeOfHeaders;
+    m_nNextVirtAddr = 0;
+    if (!AlignFileData(hFile)) {
+        return FALSE;
+    }
+
+    /////////////////////////////////////////////////////////// Copy Sections.
+    //
+    DWORD n = 0;
+    for (; n < m_NtHeader.FileHeader.NumberOfSections; n++) {
+        if (m_SectionHeaders[n].SizeOfRawData) {
+            if (SetFilePointer(hFile,
+                               m_SectionHeaders[n].PointerToRawData,
+                               NULL, FILE_BEGIN) == ~0u) {
+                return FALSE;
+            }
+            if (!CopyFileData(hFile,
+                              m_SectionHeaders[n].PointerToRawData,
+                              m_SectionHeaders[n].SizeOfRawData)) {
+                return FALSE;
+            }
+        }
+        m_nNextFileAddr = Max(m_SectionHeaders[n].PointerToRawData +
+                              m_SectionHeaders[n].SizeOfRawData,
+                              m_nNextFileAddr);
+        // Old images have VirtualSize == 0 as a matter of course, e.g. NT 3.1.
+        // In which case, use SizeOfRawData instead.
+        m_nNextVirtAddr = Max(m_SectionHeaders[n].VirtualAddress +
+                              (m_SectionHeaders[n].Misc.VirtualSize
+                               ? m_SectionHeaders[n].Misc.VirtualSize
+                               : SectionAlign(m_SectionHeaders[n].SizeOfRawData)),
+                              m_nNextVirtAddr);
+
+        m_nExtraOffset = Max(m_nNextFileAddr, m_nExtraOffset);
+
+        if (!AlignFileData(hFile)) {
+            return FALSE;
+        }
+    }
+
+    if (fNeedDetourSection || !m_pImageData->IsEmpty()) {
+
+        if (m_NtHeader.FileHeader.NumberOfSections >= ARRAYSIZE(m_SectionHeaders)) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return FALSE;
+        }
+
+        ////////////////////////////////////////////// Insert .detour Section.
+        //
+        DWORD nSection = m_NtHeader.FileHeader.NumberOfSections++;
+        DETOUR_SECTION_HEADER dh;
+
+        ZeroMemory(&dh, sizeof(dh));
+        ZeroMemory(&m_SectionHeaders[nSection], sizeof(m_SectionHeaders[nSection]));
+
+        dh.cbHeaderSize = sizeof(DETOUR_SECTION_HEADER);
+        dh.nSignature = DETOUR_SECTION_HEADER_SIGNATURE;
+
+        dh.nOriginalImportVirtualAddress = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress;
+        dh.nOriginalImportSize = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size;
+
+        dh.nOriginalBoundImportVirtualAddress
+            = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT].VirtualAddress;
+        dh.nOriginalBoundImportSize = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT].Size;
+
+        dh.nOriginalIatVirtualAddress = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress;
+        dh.nOriginalIatSize = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].Size;
+
+        dh.nOriginalSizeOfImage = m_NtHeader.OptionalHeader.SizeOfImage;
+
+        DWORD clrAddr = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR].VirtualAddress;
+        DWORD clrSize = m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR].Size;
+        if (clrAddr && clrSize) {
+            PDETOUR_CLR_HEADER pHdr = (PDETOUR_CLR_HEADER)RvaToVa(clrAddr);
+            if (pHdr != NULL) {
+                DETOUR_CLR_HEADER hdr;
+                hdr = *pHdr;
+
+                dh.nOriginalClrFlags = hdr.Flags;
+            }
+        }
+
+        HRESULT hrRet = StringCchCopyA((PCHAR)m_SectionHeaders[nSection].Name, IMAGE_SIZEOF_SHORT_NAME , ".detour");
+        if (FAILED(hrRet))
+            return FALSE;
+
+        m_SectionHeaders[nSection].Characteristics
+            = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE;
+
+        m_nOutputVirtAddr = m_nNextVirtAddr;
+        m_nOutputVirtSize = 0;
+        m_nOutputFileAddr = m_nNextFileAddr;
+
+        dh.nDataOffset = 0;                     // pbData
+        dh.cbDataSize = m_pImageData->m_cbData;
+        dh.cbPrePE = m_cbPrePE;
+
+        //////////////////////////////////////////////////////////////////////////
+        //
+
+        DWORD rvaImportTable = 0;
+        DWORD rvaLookupTable = 0;
+        DWORD rvaBoundTable = 0;
+        DWORD rvaNameTable = 0;
+        DWORD nImportTableSize = nTables * sizeof(IMAGE_IMPORT_DESCRIPTOR);
+
+        if (!SizeOutputBuffer(QuadAlign(sizeof(dh))
+                              + m_cbPrePE
+                              + QuadAlign(m_pImageData->m_cbData)
+                              + QuadAlign(sizeof(IMAGE_THUNK_DATA) * nThunks)
+                              + QuadAlign(sizeof(IMAGE_THUNK_DATA) * nThunks)
+                              + QuadAlign(nChars)
+                              + QuadAlign(nImportTableSize))) {
+            return FALSE;
+        }
+
+        DWORD vaHead = 0;
+        PBYTE pbHead = NULL;
+        DWORD vaPrePE = 0;
+        PBYTE pbPrePE = NULL;
+        DWORD vaData = 0;
+        PBYTE pbData = NULL;
+
+        if ((pbHead = AllocateOutput(sizeof(dh), &vaHead)) == NULL) {
+            return FALSE;
+        }
+
+        if ((pbPrePE = AllocateOutput(m_cbPrePE, &vaPrePE)) == NULL) {
+            return FALSE;
+        }
+
+        CImageThunks lookupTable(this, nThunks, &rvaLookupTable);
+        CImageThunks boundTable(this, nThunks, &rvaBoundTable);
+        CImageChars nameTable(this, nChars, &rvaNameTable);
+
+        if ((pbData = AllocateOutput(m_pImageData->m_cbData, &vaData)) == NULL) {
+            return FALSE;
+        }
+
+        dh.nDataOffset = vaData - vaHead;
+        dh.cbDataSize = dh.nDataOffset + m_pImageData->m_cbData;
+        CopyMemory(pbHead, &dh, sizeof(dh));
+        CopyMemory(pbPrePE, m_pMap + m_nPrePE, m_cbPrePE);
+        CopyMemory(pbData, m_pImageData->m_pbData, m_pImageData->m_cbData);
+
+        PIMAGE_IMPORT_DESCRIPTOR piidDst = (PIMAGE_IMPORT_DESCRIPTOR)
+            AllocateOutput(nImportTableSize, &rvaImportTable);
+        if (piidDst == NULL) {
+            return FALSE;
+        }
+
+        //////////////////////////////////////////////// Step Through Imports.
+        //
+        for (CImageImportFile *pImportFile = m_pImportFiles;
+             pImportFile != NULL; pImportFile = pImportFile->m_pNextFile) {
+
+            ZeroMemory(piidDst, sizeof(piidDst));
+            nameTable.Allocate(pImportFile->m_pszName, (DWORD *)&piidDst->Name);
+            piidDst->TimeDateStamp = 0;
+            piidDst->ForwarderChain = pImportFile->m_nForwarderChain;
+
+            if (pImportFile->m_fByway) {
+                ULONG rvaIgnored;
+
+                lookupTable.Allocate(IMAGE_ORDINAL_FLAG+1,
+                                     (DWORD *)&piidDst->OriginalFirstThunk);
+                boundTable.Allocate(IMAGE_ORDINAL_FLAG+1,
+                                    (DWORD *)&piidDst->FirstThunk);
+
+                lookupTable.Allocate(0, &rvaIgnored);
+                boundTable.Allocate(0, &rvaIgnored);
+            }
+            else {
+                ULONG rvaIgnored;
+
+                piidDst->FirstThunk = (ULONG)pImportFile->m_rvaFirstThunk;
+                lookupTable.Current((DWORD *)&piidDst->OriginalFirstThunk);
+
+                for (n = 0; n < pImportFile->m_nImportNames; n++) {
+                    CImageImportName *pImportName = &pImportFile->m_pImportNames[n];
+
+                    if (pImportName->m_pszName) {
+                        ULONG nDstName = 0;
+
+                        nameTable.Allocate(pImportName->m_pszName,
+                                           pImportName->m_nHint,
+                                           &nDstName);
+                        lookupTable.Allocate(nDstName, &rvaIgnored);
+                    }
+                    else {
+                        lookupTable.Allocate(IMAGE_ORDINAL_FLAG + pImportName->m_nOrdinal,
+                                             &rvaIgnored);
+                    }
+                }
+                lookupTable.Allocate(0, &rvaIgnored);
+            }
+            piidDst++;
+        }
+        ZeroMemory(piidDst, sizeof(piidDst));
+
+        //////////////////////////////////////////////////////////////////////////
+        //
+        m_nNextVirtAddr += m_nOutputVirtSize;
+        m_nNextFileAddr += FileAlign(m_nOutputVirtSize);
+
+        if (!AlignFileData(hFile)) {
+            return FALSE;
+        }
+
+        //////////////////////////////////////////////////////////////////////////
+        //
+        m_SectionHeaders[nSection].VirtualAddress = m_nOutputVirtAddr;
+        m_SectionHeaders[nSection].Misc.VirtualSize = m_nOutputVirtSize;
+        m_SectionHeaders[nSection].PointerToRawData = m_nOutputFileAddr;
+        m_SectionHeaders[nSection].SizeOfRawData = FileAlign(m_nOutputVirtSize);
+
+        m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress
+            = rvaImportTable;
+        m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size
+            = nImportTableSize;
+
+        m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT].VirtualAddress = 0;
+        m_NtHeader.OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT].Size = 0;
+
+        //////////////////////////////////////////////////////////////////////////
+        //
+        if (SetFilePointer(hFile, m_SectionHeaders[nSection].PointerToRawData,
+                           NULL, FILE_BEGIN) == ~0u) {
+            return FALSE;
+        }
+        if (!WriteFile(hFile, m_pbOutputBuffer, m_SectionHeaders[nSection].SizeOfRawData,
+                       &cbDone)) {
+            return FALSE;
+        }
+    }
+
+    ///////////////////////////////////////////////////// Adjust Extra Data.
+    //
+    LONG nExtraAdjust = m_nNextFileAddr - m_nExtraOffset;
+    for (n = 0; n < m_NtHeader.FileHeader.NumberOfSections; n++) {
+        if (m_SectionHeaders[n].PointerToRawData > m_nExtraOffset) {
+            m_SectionHeaders[n].PointerToRawData += nExtraAdjust;
+        }
+        if (m_SectionHeaders[n].PointerToRelocations > m_nExtraOffset) {
+            m_SectionHeaders[n].PointerToRelocations += nExtraAdjust;
+        }
+        if (m_SectionHeaders[n].PointerToLinenumbers > m_nExtraOffset) {
+            m_SectionHeaders[n].PointerToLinenumbers += nExtraAdjust;
+        }
+    }
+    if (m_NtHeader.FileHeader.PointerToSymbolTable > m_nExtraOffset) {
+        m_NtHeader.FileHeader.PointerToSymbolTable += nExtraAdjust;
+    }
+
+    m_NtHeader.OptionalHeader.CheckSum = 0;
+    m_NtHeader.OptionalHeader.SizeOfImage = m_nNextVirtAddr;
+
+    ////////////////////////////////////////////////// Adjust Debug Directory.
+    //
+    DWORD debugAddr = m_NtHeader.OptionalHeader
+        .DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].VirtualAddress;
+    DWORD debugSize = m_NtHeader.OptionalHeader
+        .DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].Size;
+    if (debugAddr && debugSize) {
+        DWORD nFileOffset = RvaToFileOffset(debugAddr);
+        if (SetFilePointer(hFile, nFileOffset, NULL, FILE_BEGIN) == ~0u) {
+            return FALSE;
+        }
+
+        PIMAGE_DEBUG_DIRECTORY pDir = (PIMAGE_DEBUG_DIRECTORY)RvaToVa(debugAddr);
+        if (pDir == NULL) {
+            return FALSE;
+        }
+
+        DWORD nEntries = debugSize / sizeof(*pDir);
+        for (n = 0; n < nEntries; n++) {
+            IMAGE_DEBUG_DIRECTORY dir = pDir[n];
+
+            if (dir.PointerToRawData > m_nExtraOffset) {
+                dir.PointerToRawData += nExtraAdjust;
+            }
+            if (!WriteFile(hFile, &dir, sizeof(dir), &cbDone)) {
+                return FALSE;
+            }
+        }
+    }
+
+    /////////////////////////////////////////////////////// Adjust CLR Header.
+    //
+    DWORD clrAddr = m_NtHeader.OptionalHeader
+        .DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR].VirtualAddress;
+    DWORD clrSize = m_NtHeader.OptionalHeader
+        .DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR].Size;
+    if (clrAddr && clrSize && fNeedDetourSection) {
+        DWORD nFileOffset = RvaToFileOffset(clrAddr);
+        if (SetFilePointer(hFile, nFileOffset, NULL, FILE_BEGIN) == ~0u) {
+            return FALSE;
+        }
+
+        PDETOUR_CLR_HEADER pHdr = (PDETOUR_CLR_HEADER)RvaToVa(clrAddr);
+        if (pHdr == NULL) {
+            return FALSE;
+        }
+
+        DETOUR_CLR_HEADER hdr;
+        hdr = *pHdr;
+        hdr.Flags &= 0xfffffffe;    // Clear the IL_ONLY flag.
+
+        if (!WriteFile(hFile, &hdr, sizeof(hdr), &cbDone)) {
+            return FALSE;
+        }
+    }
+
+    ///////////////////////////////////////////////// Copy Left-over Data.
+    //
+    if (m_nFileSize > m_nExtraOffset) {
+        if (SetFilePointer(hFile, m_nNextFileAddr, NULL, FILE_BEGIN) == ~0u) {
+            return FALSE;
+        }
+        if (!CopyFileData(hFile, m_nExtraOffset, m_nFileSize - m_nExtraOffset)) {
+            return FALSE;
+        }
+    }
+
+
+    //////////////////////////////////////////////////// Finalize Headers.
+    //
+
+    if (SetFilePointer(hFile, m_nPeOffset, NULL, FILE_BEGIN) == ~0u) {
+        return FALSE;
+    }
+    if (!WriteFile(hFile, &m_NtHeader, sizeof(m_NtHeader), &cbDone)) {
+        return FALSE;
+    }
+
+    if (SetFilePointer(hFile, m_nSectionsOffset, NULL, FILE_BEGIN) == ~0u) {
+        return FALSE;
+    }
+    if (!WriteFile(hFile, &m_SectionHeaders,
+                   sizeof(m_SectionHeaders[0])
+                   * m_NtHeader.FileHeader.NumberOfSections,
+                   &cbDone)) {
+        return FALSE;
+    }
+
+    m_cbPostPE = SetFilePointer(hFile, 0, NULL, FILE_CURRENT);
+    if (m_cbPostPE == ~0u) {
+        return FALSE;
+    }
+    m_cbPostPE = m_NtHeader.OptionalHeader.SizeOfHeaders - m_cbPostPE;
+
+    return TRUE;
+}
+
+};                                                      // namespace Detour
+
+//////////////////////////////////////////////////////////////////////////////
+//
+PDETOUR_BINARY WINAPI DetourBinaryOpen(_In_ HANDLE hFile)
+{
+    Detour::CImage *pImage = new NOTHROW
+        Detour::CImage;
+    if (pImage == NULL) {
+        SetLastError(ERROR_OUTOFMEMORY);
+        return FALSE;
+    }
+
+    if (!pImage->Read(hFile)) {
+        delete pImage;
+        return FALSE;
+    }
+
+    return (PDETOUR_BINARY)pImage;
+}
+
+BOOL WINAPI DetourBinaryWrite(_In_ PDETOUR_BINARY pdi,
+                              _In_ HANDLE hFile)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pdi);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    return pImage->Write(hFile);
+}
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourBinaryEnumeratePayloads(_In_ PDETOUR_BINARY pBinary,
+                                           _Out_opt_ GUID *pGuid,
+                                           _Out_ DWORD *pcbData,
+                                           _Inout_ DWORD *pnIterator)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    return pImage->DataEnum(pGuid, pcbData, pnIterator);
+}
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourBinaryFindPayload(_In_ PDETOUR_BINARY pBinary,
+                                     _In_ REFGUID rguid,
+                                     _Out_ DWORD *pcbData)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    return pImage->DataFind(rguid, pcbData);
+}
+
+PVOID WINAPI DetourBinarySetPayload(_In_ PDETOUR_BINARY pBinary,
+                                    _In_ REFGUID rguid,
+                                    _In_reads_opt_(cbData) PVOID pvData,
+                                    _In_ DWORD cbData)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return NULL;
+    }
+
+    return pImage->DataSet(rguid, (PBYTE)pvData, cbData);
+}
+
+BOOL WINAPI DetourBinaryDeletePayload(_In_ PDETOUR_BINARY pBinary,
+                                      _In_ REFGUID rguid)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    return pImage->DataDelete(rguid);
+}
+
+BOOL WINAPI DetourBinaryPurgePayloads(_In_ PDETOUR_BINARY pBinary)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    return pImage->DataPurge();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+static BOOL CALLBACK ResetBywayCallback(_In_opt_ PVOID pContext,
+                                        _In_opt_ LPCSTR pszFile,
+                                        _Outptr_result_maybenull_ LPCSTR *ppszOutFile)
+{
+    UNREFERENCED_PARAMETER(pContext);
+    UNREFERENCED_PARAMETER(pszFile);
+
+    *ppszOutFile = NULL;
+    return TRUE;
+}
+
+static BOOL CALLBACK ResetFileCallback(_In_opt_ PVOID pContext,
+                                       _In_ LPCSTR pszOrigFile,
+                                       _In_ LPCSTR pszFile,
+                                       _Outptr_result_maybenull_ LPCSTR *ppszOutFile)
+{
+    UNREFERENCED_PARAMETER(pContext);
+    UNREFERENCED_PARAMETER(pszFile);
+
+    *ppszOutFile = pszOrigFile;
+    return TRUE;
+}
+
+static BOOL CALLBACK ResetSymbolCallback(_In_opt_ PVOID pContext,
+                                         _In_ ULONG nOrigOrdinal,
+                                         _In_ ULONG nOrdinal,
+                                         _Out_ ULONG *pnOutOrdinal,
+                                         _In_opt_ LPCSTR pszOrigSymbol,
+                                         _In_opt_ LPCSTR pszSymbol,
+                                         _Outptr_result_maybenull_ LPCSTR *ppszOutSymbol)
+{
+    UNREFERENCED_PARAMETER(pContext);
+    UNREFERENCED_PARAMETER(nOrdinal);
+    UNREFERENCED_PARAMETER(pszSymbol);
+
+    *pnOutOrdinal = nOrigOrdinal;
+    *ppszOutSymbol = pszOrigSymbol;
+    return TRUE;
+}
+
+BOOL WINAPI DetourBinaryResetImports(_In_ PDETOUR_BINARY pBinary)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    return pImage->EditImports(NULL,
+                               ResetBywayCallback,
+                               ResetFileCallback,
+                               ResetSymbolCallback,
+                               NULL);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+BOOL WINAPI DetourBinaryEditImports(_In_ PDETOUR_BINARY pBinary,
+                                    _In_opt_ PVOID pContext,
+                                    _In_opt_ PF_DETOUR_BINARY_BYWAY_CALLBACK pfByway,
+                                    _In_opt_ PF_DETOUR_BINARY_FILE_CALLBACK pfFile,
+                                    _In_opt_ PF_DETOUR_BINARY_SYMBOL_CALLBACK pfSymbol,
+                                    _In_opt_ PF_DETOUR_BINARY_COMMIT_CALLBACK pfCommit)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    return pImage->EditImports(pContext,
+                               pfByway,
+                               pfFile,
+                               pfSymbol,
+                               pfCommit);
+}
+
+BOOL WINAPI DetourBinaryClose(_In_ PDETOUR_BINARY pBinary)
+{
+    Detour::CImage *pImage = Detour::CImage::IsValid(pBinary);
+    if (pImage == NULL) {
+        return FALSE;
+    }
+
+    BOOL bSuccess = pImage->Close();
+    delete pImage;
+    pImage = NULL;
+
+    return bSuccess;
+}
+
+//
+///////////////////////////////////////////////////////////////// End of File.
diff --git a/src/detours/modules.cpp b/src/detours/modules.cpp
new file mode 100644
index 0000000..3416730
--- /dev/null
+++ b/src/detours/modules.cpp
@@ -0,0 +1,929 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Module Enumeration Functions (modules.cpp of detours.lib)
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+//  Module enumeration functions.
+//
+
+#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1
+
+#pragma warning(disable:4068) // unknown pragma (suppress)
+
+#if _MSC_VER >= 1900
+#pragma warning(push)
+#pragma warning(disable:4091) // empty typedef
+#endif
+
+#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1
+#include <windows.h>
+#if (_MSC_VER < 1310)
+#else
+#pragma warning(push)
+#if _MSC_VER > 1400
+#pragma warning(disable:6102 6103) // /analyze warnings
+#endif
+#include <strsafe.h>
+#pragma warning(pop)
+#endif
+
+// #define DETOUR_DEBUG 1
+#define DETOURS_INTERNAL
+#include "detours.h"
+
+#if DETOURS_VERSION != 0x4c0c1   // 0xMAJORcMINORcPATCH
+#error detours.h version mismatch
+#endif
+
+#if _MSC_VER >= 1900
+#pragma warning(pop)
+#endif
+
+#define CLR_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR]
+#define IAT_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT]
+
+//////////////////////////////////////////////////////////////////////////////
+//
+const GUID DETOUR_EXE_RESTORE_GUID = {
+    0x2ed7a3ff, 0x3339, 0x4a8d,
+    { 0x80, 0x5c, 0xd4, 0x98, 0x15, 0x3f, 0xc2, 0x8f }};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+PDETOUR_SYM_INFO DetourLoadImageHlp(VOID)
+{
+    static DETOUR_SYM_INFO symInfo;
+    static PDETOUR_SYM_INFO pSymInfo = NULL;
+    static BOOL failed = false;
+
+    if (failed) {
+        return NULL;
+    }
+    if (pSymInfo != NULL) {
+        return pSymInfo;
+    }
+
+    ZeroMemory(&symInfo, sizeof(symInfo));
+    // Create a real handle to the process.
+#if 0
+    DuplicateHandle(GetCurrentProcess(),
+                    GetCurrentProcess(),
+                    GetCurrentProcess(),
+                    &symInfo.hProcess,
+                    0,
+                    FALSE,
+                    DUPLICATE_SAME_ACCESS);
+#else
+    symInfo.hProcess = GetCurrentProcess();
+#endif
+
+    symInfo.hDbgHelp = LoadLibraryExW(L"dbghelp.dll", NULL, 0);
+    if (symInfo.hDbgHelp == NULL) {
+      abort:
+        failed = true;
+        if (symInfo.hDbgHelp != NULL) {
+            FreeLibrary(symInfo.hDbgHelp);
+        }
+        symInfo.pfImagehlpApiVersionEx = NULL;
+        symInfo.pfSymInitialize = NULL;
+        symInfo.pfSymSetOptions = NULL;
+        symInfo.pfSymGetOptions = NULL;
+        symInfo.pfSymLoadModule64 = NULL;
+        symInfo.pfSymGetModuleInfo64 = NULL;
+        symInfo.pfSymFromName = NULL;
+        return NULL;
+    }
+
+    symInfo.pfImagehlpApiVersionEx
+        = (PF_ImagehlpApiVersionEx)GetProcAddress(symInfo.hDbgHelp,
+                                                  "ImagehlpApiVersionEx");
+    symInfo.pfSymInitialize
+        = (PF_SymInitialize)GetProcAddress(symInfo.hDbgHelp, "SymInitialize");
+    symInfo.pfSymSetOptions
+        = (PF_SymSetOptions)GetProcAddress(symInfo.hDbgHelp, "SymSetOptions");
+    symInfo.pfSymGetOptions
+        = (PF_SymGetOptions)GetProcAddress(symInfo.hDbgHelp, "SymGetOptions");
+    symInfo.pfSymLoadModule64
+        = (PF_SymLoadModule64)GetProcAddress(symInfo.hDbgHelp, "SymLoadModule64");
+    symInfo.pfSymGetModuleInfo64
+        = (PF_SymGetModuleInfo64)GetProcAddress(symInfo.hDbgHelp, "SymGetModuleInfo64");
+    symInfo.pfSymFromName
+        = (PF_SymFromName)GetProcAddress(symInfo.hDbgHelp, "SymFromName");
+
+    API_VERSION av;
+    ZeroMemory(&av, sizeof(av));
+    av.MajorVersion = API_VERSION_NUMBER;
+
+    if (symInfo.pfImagehlpApiVersionEx == NULL ||
+        symInfo.pfSymInitialize == NULL ||
+        symInfo.pfSymLoadModule64 == NULL ||
+        symInfo.pfSymGetModuleInfo64 == NULL ||
+        symInfo.pfSymFromName == NULL) {
+        goto abort;
+    }
+
+    symInfo.pfImagehlpApiVersionEx(&av);
+    if (av.MajorVersion < API_VERSION_NUMBER) {
+        goto abort;
+    }
+
+    if (!symInfo.pfSymInitialize(symInfo.hProcess, NULL, FALSE)) {
+        // We won't retry the initialize if it fails.
+        goto abort;
+    }
+
+    if (symInfo.pfSymGetOptions != NULL && symInfo.pfSymSetOptions != NULL) {
+        DWORD dw = symInfo.pfSymGetOptions();
+
+        dw &= ~(SYMOPT_CASE_INSENSITIVE |
+                SYMOPT_UNDNAME |
+                SYMOPT_DEFERRED_LOADS |
+                0);
+        dw |= (
+#if defined(SYMOPT_EXACT_SYMBOLS)
+               SYMOPT_EXACT_SYMBOLS |
+#endif
+#if defined(SYMOPT_NO_UNQUALIFIED_LOADS)
+               SYMOPT_NO_UNQUALIFIED_LOADS |
+#endif
+               SYMOPT_DEFERRED_LOADS |
+#if defined(SYMOPT_FAIL_CRITICAL_ERRORS)
+               SYMOPT_FAIL_CRITICAL_ERRORS |
+#endif
+#if defined(SYMOPT_INCLUDE_32BIT_MODULES)
+               SYMOPT_INCLUDE_32BIT_MODULES |
+#endif
+               0);
+        symInfo.pfSymSetOptions(dw);
+    }
+
+    pSymInfo = &symInfo;
+    return pSymInfo;
+}
+
+PVOID WINAPI DetourFindFunction(_In_ PCSTR pszModule,
+                                _In_ PCSTR pszFunction)
+{
+    /////////////////////////////////////////////// First, try GetProcAddress.
+    //
+#pragma prefast(suppress:28752, "We don't do the unicode conversion for LoadLibraryExA.")
+    HMODULE hModule = LoadLibraryExA(pszModule, NULL, 0);
+    if (hModule == NULL) {
+        return NULL;
+    }
+
+    PBYTE pbCode = (PBYTE)GetProcAddress(hModule, pszFunction);
+    if (pbCode) {
+        return pbCode;
+    }
+
+    ////////////////////////////////////////////////////// Then try ImageHelp.
+    //
+    DETOUR_TRACE(("DetourFindFunction(%hs, %hs)\n", pszModule, pszFunction));
+    PDETOUR_SYM_INFO pSymInfo = DetourLoadImageHlp();
+    if (pSymInfo == NULL) {
+        DETOUR_TRACE(("DetourLoadImageHlp failed: %d\n",
+                      GetLastError()));
+        return NULL;
+    }
+
+    if (pSymInfo->pfSymLoadModule64(pSymInfo->hProcess, NULL,
+                                    (PCHAR)pszModule, NULL,
+                                    (DWORD64)hModule, 0) == 0) {
+        if (ERROR_SUCCESS != GetLastError()) {
+            DETOUR_TRACE(("SymLoadModule64(%p) failed: %d\n",
+                          pSymInfo->hProcess, GetLastError()));
+            return NULL;
+        }
+    }
+
+    HRESULT hrRet;
+    CHAR szFullName[512];
+    IMAGEHLP_MODULE64 modinfo;
+    ZeroMemory(&modinfo, sizeof(modinfo));
+    modinfo.SizeOfStruct = sizeof(modinfo);
+    if (!pSymInfo->pfSymGetModuleInfo64(pSymInfo->hProcess, (DWORD64)hModule, &modinfo)) {
+        DETOUR_TRACE(("SymGetModuleInfo64(%p, %p) failed: %d\n",
+                      pSymInfo->hProcess, hModule, GetLastError()));
+        return NULL;
+    }
+
+    hrRet = StringCchCopyA(szFullName, sizeof(szFullName)/sizeof(CHAR), modinfo.ModuleName);
+    if (FAILED(hrRet)) {
+        DETOUR_TRACE(("StringCchCopyA failed: %08x\n", hrRet));
+        return NULL;
+    }
+    hrRet = StringCchCatA(szFullName, sizeof(szFullName)/sizeof(CHAR), "!");
+    if (FAILED(hrRet)) {
+        DETOUR_TRACE(("StringCchCatA failed: %08x\n", hrRet));
+        return NULL;
+    }
+    hrRet = StringCchCatA(szFullName, sizeof(szFullName)/sizeof(CHAR), pszFunction);
+    if (FAILED(hrRet)) {
+        DETOUR_TRACE(("StringCchCatA failed: %08x\n", hrRet));
+        return NULL;
+    }
+
+    struct CFullSymbol : SYMBOL_INFO {
+        CHAR szRestOfName[512];
+    } symbol;
+    ZeroMemory(&symbol, sizeof(symbol));
+    //symbol.ModBase = (ULONG64)hModule;
+    symbol.SizeOfStruct = sizeof(SYMBOL_INFO);
+#ifdef DBHLPAPI
+    symbol.MaxNameLen = sizeof(symbol.szRestOfName)/sizeof(symbol.szRestOfName[0]);
+#else
+    symbol.MaxNameLength = sizeof(symbol.szRestOfName)/sizeof(symbol.szRestOfName[0]);
+#endif
+
+    if (!pSymInfo->pfSymFromName(pSymInfo->hProcess, szFullName, &symbol)) {
+        DETOUR_TRACE(("SymFromName(%hs) failed: %d\n", szFullName, GetLastError()));
+        return NULL;
+    }
+
+#if defined(DETOURS_IA64)
+    // On the IA64, we get a raw code pointer from the symbol engine
+    // and have to convert it to a wrapped [code pointer, global pointer].
+    //
+    PPLABEL_DESCRIPTOR pldEntry = (PPLABEL_DESCRIPTOR)DetourGetEntryPoint(hModule);
+    PPLABEL_DESCRIPTOR pldSymbol = new PLABEL_DESCRIPTOR;
+
+    pldSymbol->EntryPoint = symbol.Address;
+    pldSymbol->GlobalPointer = pldEntry->GlobalPointer;
+    return (PBYTE)pldSymbol;
+#elif defined(DETOURS_ARM)
+    // On the ARM, we get a raw code pointer, which we must convert into a
+    // valied Thumb2 function pointer.
+    return DETOURS_PBYTE_TO_PFUNC(symbol.Address);
+#else
+    return (PBYTE)symbol.Address;
+#endif
+}
+
+//////////////////////////////////////////////////// Module Image Functions.
+//
+
+HMODULE WINAPI DetourEnumerateModules(_In_opt_ HMODULE hModuleLast)
+{
+    PBYTE pbLast = (PBYTE)hModuleLast + MM_ALLOCATION_GRANULARITY;
+
+    MEMORY_BASIC_INFORMATION mbi;
+    ZeroMemory(&mbi, sizeof(mbi));
+
+    // Find the next memory region that contains a mapped PE image.
+    //
+    for (;; pbLast = (PBYTE)mbi.BaseAddress + mbi.RegionSize) {
+        if (VirtualQuery(pbLast, &mbi, sizeof(mbi)) <= 0) {
+            break;
+        }
+
+        // Skip uncommitted regions and guard pages.
+        //
+        if ((mbi.State != MEM_COMMIT) ||
+            ((mbi.Protect & 0xff) == PAGE_NOACCESS) ||
+            (mbi.Protect & PAGE_GUARD)) {
+            continue;
+        }
+
+        __try {
+            PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)pbLast;
+            if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE ||
+                (DWORD)pDosHeader->e_lfanew > mbi.RegionSize ||
+                (DWORD)pDosHeader->e_lfanew < sizeof(*pDosHeader)) {
+                continue;
+            }
+
+            PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                              pDosHeader->e_lfanew);
+            if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+                continue;
+            }
+
+            return (HMODULE)pDosHeader;
+        }
+#pragma prefast(suppress:28940, "A bad pointer means this probably isn't a PE header.")
+        __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+                 EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+            continue;
+        }
+    }
+    return NULL;
+}
+
+PVOID WINAPI DetourGetEntryPoint(_In_opt_ HMODULE hModule)
+{
+    PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)hModule;
+    if (hModule == NULL) {
+        pDosHeader = (PIMAGE_DOS_HEADER)GetModuleHandleW(NULL);
+    }
+
+    __try {
+#pragma warning(suppress:6011) // GetModuleHandleW(NULL) never returns NULL.
+        if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return NULL;
+        }
+
+        PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                          pDosHeader->e_lfanew);
+        if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+            SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+            return NULL;
+        }
+        if (pNtHeader->FileHeader.SizeOfOptionalHeader == 0) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return NULL;
+        }
+
+        PDETOUR_CLR_HEADER pClrHeader = NULL;
+        if (pNtHeader->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) {
+            if (((PIMAGE_NT_HEADERS32)pNtHeader)->CLR_DIRECTORY.VirtualAddress != 0 &&
+                ((PIMAGE_NT_HEADERS32)pNtHeader)->CLR_DIRECTORY.Size != 0) {
+                pClrHeader = (PDETOUR_CLR_HEADER)
+                    (((PBYTE)pDosHeader)
+                     + ((PIMAGE_NT_HEADERS32)pNtHeader)->CLR_DIRECTORY.VirtualAddress);
+            }
+        }
+        else if (pNtHeader->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) {
+            if (((PIMAGE_NT_HEADERS64)pNtHeader)->CLR_DIRECTORY.VirtualAddress != 0 &&
+                ((PIMAGE_NT_HEADERS64)pNtHeader)->CLR_DIRECTORY.Size != 0) {
+                pClrHeader = (PDETOUR_CLR_HEADER)
+                    (((PBYTE)pDosHeader)
+                     + ((PIMAGE_NT_HEADERS64)pNtHeader)->CLR_DIRECTORY.VirtualAddress);
+            }
+        }
+
+        if (pClrHeader != NULL) {
+            // For MSIL assemblies, we want to use the _Cor entry points.
+
+            HMODULE hClr = GetModuleHandleW(L"MSCOREE.DLL");
+            if (hClr == NULL) {
+                return NULL;
+            }
+
+            SetLastError(NO_ERROR);
+            return GetProcAddress(hClr, "_CorExeMain");
+        }
+
+        SetLastError(NO_ERROR);
+
+        // Pure resource DLLs have neither an entry point nor CLR information
+        // so handle them by returning NULL (LastError is NO_ERROR)
+        if (pNtHeader->OptionalHeader.AddressOfEntryPoint == 0) {
+            return NULL;
+        }
+
+        return ((PBYTE)pDosHeader) +
+            pNtHeader->OptionalHeader.AddressOfEntryPoint;
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return NULL;
+    }
+}
+
+ULONG WINAPI DetourGetModuleSize(_In_opt_ HMODULE hModule)
+{
+    PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)hModule;
+    if (hModule == NULL) {
+        pDosHeader = (PIMAGE_DOS_HEADER)GetModuleHandleW(NULL);
+    }
+
+    __try {
+#pragma warning(suppress:6011) // GetModuleHandleW(NULL) never returns NULL.
+        if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return NULL;
+        }
+
+        PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                          pDosHeader->e_lfanew);
+        if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+            SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+            return NULL;
+        }
+        if (pNtHeader->FileHeader.SizeOfOptionalHeader == 0) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return NULL;
+        }
+        SetLastError(NO_ERROR);
+
+        return (pNtHeader->OptionalHeader.SizeOfImage);
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return NULL;
+    }
+}
+
+HMODULE WINAPI DetourGetContainingModule(_In_ PVOID pvAddr)
+{
+    MEMORY_BASIC_INFORMATION mbi;
+    ZeroMemory(&mbi, sizeof(mbi));
+
+    __try {
+        if (VirtualQuery(pvAddr, &mbi, sizeof(mbi)) <= 0) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return NULL;
+        }
+
+        // Skip uncommitted regions and guard pages.
+        //
+        if ((mbi.State != MEM_COMMIT) ||
+            ((mbi.Protect & 0xff) == PAGE_NOACCESS) ||
+            (mbi.Protect & PAGE_GUARD)) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return NULL;
+        }
+
+        PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)mbi.AllocationBase;
+        if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return NULL;
+        }
+
+        PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                          pDosHeader->e_lfanew);
+        if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+            SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+            return NULL;
+        }
+        if (pNtHeader->FileHeader.SizeOfOptionalHeader == 0) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return NULL;
+        }
+        SetLastError(NO_ERROR);
+
+        return (HMODULE)pDosHeader;
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+        return NULL;
+    }
+}
+
+
+static inline PBYTE RvaAdjust(_Pre_notnull_ PIMAGE_DOS_HEADER pDosHeader, _In_ DWORD raddr)
+{
+    if (raddr != NULL) {
+        return ((PBYTE)pDosHeader) + raddr;
+    }
+    return NULL;
+}
+
+BOOL WINAPI DetourEnumerateExports(_In_ HMODULE hModule,
+                                   _In_opt_ PVOID pContext,
+                                   _In_ PF_DETOUR_ENUMERATE_EXPORT_CALLBACK pfExport)
+{
+    PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)hModule;
+    if (hModule == NULL) {
+        pDosHeader = (PIMAGE_DOS_HEADER)GetModuleHandleW(NULL);
+    }
+
+    __try {
+#pragma warning(suppress:6011) // GetModuleHandleW(NULL) never returns NULL.
+        if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return NULL;
+        }
+
+        PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                          pDosHeader->e_lfanew);
+        if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+            SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+            return FALSE;
+        }
+        if (pNtHeader->FileHeader.SizeOfOptionalHeader == 0) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return FALSE;
+        }
+
+        PIMAGE_EXPORT_DIRECTORY pExportDir
+            = (PIMAGE_EXPORT_DIRECTORY)
+            RvaAdjust(pDosHeader,
+                      pNtHeader->OptionalHeader
+                      .DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress);
+
+        if (pExportDir == NULL) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return FALSE;
+        }
+
+        PBYTE pExportDirEnd = (PBYTE)pExportDir + pNtHeader->OptionalHeader
+            .DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].Size;
+        PDWORD pdwFunctions = (PDWORD)RvaAdjust(pDosHeader, pExportDir->AddressOfFunctions);
+        PDWORD pdwNames = (PDWORD)RvaAdjust(pDosHeader, pExportDir->AddressOfNames);
+        PWORD pwOrdinals = (PWORD)RvaAdjust(pDosHeader, pExportDir->AddressOfNameOrdinals);
+
+        for (DWORD nFunc = 0; nFunc < pExportDir->NumberOfFunctions; nFunc++) {
+            PBYTE pbCode = (pdwFunctions != NULL)
+                ? (PBYTE)RvaAdjust(pDosHeader, pdwFunctions[nFunc]) : NULL;
+            PCHAR pszName = NULL;
+
+            // if the pointer is in the export region, then it is a forwarder.
+            if (pbCode > (PBYTE)pExportDir && pbCode < pExportDirEnd) {
+                pbCode = NULL;
+            }
+
+            for (DWORD n = 0; n < pExportDir->NumberOfNames; n++) {
+                if (pwOrdinals[n] == nFunc) {
+                    pszName = (pdwNames != NULL)
+                        ? (PCHAR)RvaAdjust(pDosHeader, pdwNames[n]) : NULL;
+                    break;
+                }
+            }
+            ULONG nOrdinal = pExportDir->Base + nFunc;
+
+            if (!pfExport(pContext, nOrdinal, pszName, pbCode)) {
+                break;
+            }
+        }
+        SetLastError(NO_ERROR);
+        return TRUE;
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return NULL;
+    }
+}
+
+BOOL WINAPI DetourEnumerateImportsEx(_In_opt_ HMODULE hModule,
+                                     _In_opt_ PVOID pContext,
+                                     _In_opt_ PF_DETOUR_IMPORT_FILE_CALLBACK pfImportFile,
+                                     _In_opt_ PF_DETOUR_IMPORT_FUNC_CALLBACK_EX pfImportFunc)
+{
+    PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)hModule;
+    if (hModule == NULL) {
+        pDosHeader = (PIMAGE_DOS_HEADER)GetModuleHandleW(NULL);
+    }
+
+    __try {
+#pragma warning(suppress:6011) // GetModuleHandleW(NULL) never returns NULL.
+        if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return FALSE;
+        }
+
+        PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                          pDosHeader->e_lfanew);
+        if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+            SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+            return FALSE;
+        }
+        if (pNtHeader->FileHeader.SizeOfOptionalHeader == 0) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return FALSE;
+        }
+
+        PIMAGE_IMPORT_DESCRIPTOR iidp
+            = (PIMAGE_IMPORT_DESCRIPTOR)
+            RvaAdjust(pDosHeader,
+                      pNtHeader->OptionalHeader
+                      .DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
+
+        if (iidp == NULL) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return FALSE;
+        }
+
+        for (; iidp->OriginalFirstThunk != 0; iidp++) {
+
+            PCSTR pszName = (PCHAR)RvaAdjust(pDosHeader, iidp->Name);
+            if (pszName == NULL) {
+                SetLastError(ERROR_EXE_MARKED_INVALID);
+                return FALSE;
+            }
+
+            PIMAGE_THUNK_DATA pThunks = (PIMAGE_THUNK_DATA)
+                RvaAdjust(pDosHeader, iidp->OriginalFirstThunk);
+            PVOID * pAddrs = (PVOID *)
+                RvaAdjust(pDosHeader, iidp->FirstThunk);
+
+            HMODULE hFile = DetourGetContainingModule(pAddrs[0]);
+
+            if (pfImportFile != NULL) {
+                if (!pfImportFile(pContext, hFile, pszName)) {
+                    break;
+                }
+            }
+
+            DWORD nNames = 0;
+            if (pThunks) {
+                for (; pThunks[nNames].u1.Ordinal; nNames++) {
+                    DWORD nOrdinal = 0;
+                    PCSTR pszFunc = NULL;
+
+                    if (IMAGE_SNAP_BY_ORDINAL(pThunks[nNames].u1.Ordinal)) {
+                        nOrdinal = (DWORD)IMAGE_ORDINAL(pThunks[nNames].u1.Ordinal);
+                    }
+                    else {
+                        pszFunc = (PCSTR)RvaAdjust(pDosHeader,
+                                                   (DWORD)pThunks[nNames].u1.AddressOfData + 2);
+                    }
+
+                    if (pfImportFunc != NULL) {
+                        if (!pfImportFunc(pContext,
+                                          nOrdinal,
+                                          pszFunc,
+                                          &pAddrs[nNames])) {
+                            break;
+                        }
+                    }
+                }
+                if (pfImportFunc != NULL) {
+                    pfImportFunc(pContext, 0, NULL, NULL);
+                }
+            }
+        }
+        if (pfImportFile != NULL) {
+            pfImportFile(pContext, NULL, NULL);
+        }
+        SetLastError(NO_ERROR);
+        return TRUE;
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return FALSE;
+    }
+}
+
+// Context for DetourEnumerateImportsThunk, which adapts "regular" callbacks for use with "Ex".
+struct _DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT
+{
+    PVOID pContext;
+    PF_DETOUR_IMPORT_FUNC_CALLBACK pfImportFunc;
+};
+
+// Callback for DetourEnumerateImportsEx that adapts DetourEnumerateImportsEx
+// for use with a DetourEnumerateImports callback -- derefence the IAT and pass the value on.
+
+static
+BOOL
+CALLBACK
+DetourEnumerateImportsThunk(_In_ PVOID VoidContext,
+                            _In_ DWORD nOrdinal,
+                            _In_opt_ PCSTR pszFunc,
+                            _In_opt_ PVOID* ppvFunc)
+{
+    _DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT const * const
+        pContext = (_DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT*)VoidContext;
+    return pContext->pfImportFunc(pContext->pContext, nOrdinal, pszFunc, ppvFunc ? *ppvFunc : NULL);
+}
+
+BOOL WINAPI DetourEnumerateImports(_In_opt_ HMODULE hModule,
+                                   _In_opt_ PVOID pContext,
+                                   _In_opt_ PF_DETOUR_IMPORT_FILE_CALLBACK pfImportFile,
+                                   _In_opt_ PF_DETOUR_IMPORT_FUNC_CALLBACK pfImportFunc)
+{
+    _DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT const context = { pContext, pfImportFunc };
+
+    return DetourEnumerateImportsEx(hModule,
+                                    (PVOID)&context,
+                                    pfImportFile,
+                                    &DetourEnumerateImportsThunk);
+}
+
+static PDETOUR_LOADED_BINARY WINAPI GetPayloadSectionFromModule(HMODULE hModule)
+{
+    PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)hModule;
+    if (hModule == NULL) {
+        pDosHeader = (PIMAGE_DOS_HEADER)GetModuleHandleW(NULL);
+    }
+
+    __try {
+#pragma warning(suppress:6011) // GetModuleHandleW(NULL) never returns NULL.
+        if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+            SetLastError(ERROR_BAD_EXE_FORMAT);
+            return NULL;
+        }
+
+        PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
+                                                          pDosHeader->e_lfanew);
+        if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
+            SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+            return NULL;
+        }
+        if (pNtHeader->FileHeader.SizeOfOptionalHeader == 0) {
+            SetLastError(ERROR_EXE_MARKED_INVALID);
+            return NULL;
+        }
+
+        PIMAGE_SECTION_HEADER pSectionHeaders
+            = (PIMAGE_SECTION_HEADER)((PBYTE)pNtHeader
+                                      + sizeof(pNtHeader->Signature)
+                                      + sizeof(pNtHeader->FileHeader)
+                                      + pNtHeader->FileHeader.SizeOfOptionalHeader);
+
+        for (DWORD n = 0; n < pNtHeader->FileHeader.NumberOfSections; n++) {
+            if (strcmp((PCHAR)pSectionHeaders[n].Name, ".detour") == 0) {
+                if (pSectionHeaders[n].VirtualAddress == 0 ||
+                    pSectionHeaders[n].SizeOfRawData == 0) {
+
+                    break;
+                }
+
+                PBYTE pbData = (PBYTE)pDosHeader + pSectionHeaders[n].VirtualAddress;
+                DETOUR_SECTION_HEADER *pHeader = (DETOUR_SECTION_HEADER *)pbData;
+                if (pHeader->cbHeaderSize < sizeof(DETOUR_SECTION_HEADER) ||
+                    pHeader->nSignature != DETOUR_SECTION_HEADER_SIGNATURE) {
+
+                    break;
+                }
+
+                if (pHeader->nDataOffset == 0) {
+                    pHeader->nDataOffset = pHeader->cbHeaderSize;
+                }
+                SetLastError(NO_ERROR);
+                return (PBYTE)pHeader;
+            }
+        }
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return NULL;
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_EXE_MARKED_INVALID);
+        return NULL;
+    }
+}
+
+DWORD WINAPI DetourGetSizeOfPayloads(_In_opt_ HMODULE hModule)
+{
+    PDETOUR_LOADED_BINARY pBinary = GetPayloadSectionFromModule(hModule);
+    if (pBinary == NULL) {
+        // Error set by GetPayloadSectionFromModule.
+        return 0;
+    }
+
+    __try {
+        DETOUR_SECTION_HEADER *pHeader = (DETOUR_SECTION_HEADER *)pBinary;
+        if (pHeader->cbHeaderSize < sizeof(DETOUR_SECTION_HEADER) ||
+            pHeader->nSignature != DETOUR_SECTION_HEADER_SIGNATURE) {
+
+            SetLastError(ERROR_INVALID_HANDLE);
+            return 0;
+        }
+        SetLastError(NO_ERROR);
+        return pHeader->cbDataSize;
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_INVALID_HANDLE);
+        return 0;
+    }
+}
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourFindPayload(_In_opt_ HMODULE hModule,
+                               _In_ REFGUID rguid,
+                               _Out_ DWORD *pcbData)
+{
+    PBYTE pbData = NULL;
+    if (pcbData) {
+        *pcbData = 0;
+    }
+
+    PDETOUR_LOADED_BINARY pBinary = GetPayloadSectionFromModule(hModule);
+    if (pBinary == NULL) {
+        // Error set by GetPayloadSectionFromModule.
+        return NULL;
+    }
+
+    __try {
+        DETOUR_SECTION_HEADER *pHeader = (DETOUR_SECTION_HEADER *)pBinary;
+        if (pHeader->cbHeaderSize < sizeof(DETOUR_SECTION_HEADER) ||
+            pHeader->nSignature != DETOUR_SECTION_HEADER_SIGNATURE) {
+
+            SetLastError(ERROR_INVALID_EXE_SIGNATURE);
+            return NULL;
+        }
+
+        PBYTE pbBeg = ((PBYTE)pHeader) + pHeader->nDataOffset;
+        PBYTE pbEnd = ((PBYTE)pHeader) + pHeader->cbDataSize;
+
+        for (pbData = pbBeg; pbData < pbEnd;) {
+            DETOUR_SECTION_RECORD *pSection = (DETOUR_SECTION_RECORD *)pbData;
+
+            if (pSection->guid.Data1 == rguid.Data1 &&
+                pSection->guid.Data2 == rguid.Data2 &&
+                pSection->guid.Data3 == rguid.Data3 &&
+                pSection->guid.Data4[0] == rguid.Data4[0] &&
+                pSection->guid.Data4[1] == rguid.Data4[1] &&
+                pSection->guid.Data4[2] == rguid.Data4[2] &&
+                pSection->guid.Data4[3] == rguid.Data4[3] &&
+                pSection->guid.Data4[4] == rguid.Data4[4] &&
+                pSection->guid.Data4[5] == rguid.Data4[5] &&
+                pSection->guid.Data4[6] == rguid.Data4[6] &&
+                pSection->guid.Data4[7] == rguid.Data4[7]) {
+
+                if (pcbData) {
+                    *pcbData = pSection->cbBytes - sizeof(*pSection);
+                    SetLastError(NO_ERROR);
+                    return (PBYTE)(pSection + 1);
+                }
+            }
+
+            pbData = (PBYTE)pSection + pSection->cbBytes;
+        }
+        SetLastError(ERROR_INVALID_HANDLE);
+        return NULL;
+    }
+    __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
+             EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+        SetLastError(ERROR_INVALID_HANDLE);
+        return NULL;
+    }
+}
+
+_Writable_bytes_(*pcbData)
+_Readable_bytes_(*pcbData)
+_Success_(return != NULL)
+PVOID WINAPI DetourFindPayloadEx(_In_ REFGUID rguid,
+                                 _Out_ DWORD * pcbData)
+{
+    for (HMODULE hMod = NULL; (hMod = DetourEnumerateModules(hMod)) != NULL;) {
+        PVOID pvData;
+
+        pvData = DetourFindPayload(hMod, rguid, pcbData);
+        if (pvData != NULL) {
+            return pvData;
+        }
+    }
+    SetLastError(ERROR_MOD_NOT_FOUND);
+    return NULL;
+}
+
+BOOL WINAPI DetourRestoreAfterWithEx(_In_reads_bytes_(cbData) PVOID pvData,
+                                     _In_ DWORD cbData)
+{
+    PDETOUR_EXE_RESTORE pder = (PDETOUR_EXE_RESTORE)pvData;
+
+    if (pder->cb != sizeof(*pder) || pder->cb > cbData) {
+        SetLastError(ERROR_BAD_EXE_FORMAT);
+        return FALSE;
+    }
+
+    DWORD dwPermIdh = ~0u;
+    DWORD dwPermInh = ~0u;
+    DWORD dwPermClr = ~0u;
+    DWORD dwIgnore;
+    BOOL fSucceeded = FALSE;
+    BOOL fUpdated32To64 = FALSE;
+
+    if (pder->pclr != NULL && pder->clr.Flags != ((PDETOUR_CLR_HEADER)pder->pclr)->Flags) {
+        // If we had to promote the 32/64-bit agnostic IL to 64-bit, we can't restore
+        // that.
+        fUpdated32To64 = TRUE;
+    }
+
+    if (DetourVirtualProtectSameExecute(pder->pidh, pder->cbidh,
+                                        PAGE_EXECUTE_READWRITE, &dwPermIdh)) {
+        if (DetourVirtualProtectSameExecute(pder->pinh, pder->cbinh,
+                                            PAGE_EXECUTE_READWRITE, &dwPermInh)) {
+
+            CopyMemory(pder->pidh, &pder->idh, pder->cbidh);
+            CopyMemory(pder->pinh, &pder->inh, pder->cbinh);
+
+            if (pder->pclr != NULL && !fUpdated32To64) {
+                if (DetourVirtualProtectSameExecute(pder->pclr, pder->cbclr,
+                                                    PAGE_EXECUTE_READWRITE, &dwPermClr)) {
+                    CopyMemory(pder->pclr, &pder->clr, pder->cbclr);
+                    VirtualProtect(pder->pclr, pder->cbclr, dwPermClr, &dwIgnore);
+                    fSucceeded = TRUE;
+                }
+            }
+            else {
+                fSucceeded = TRUE;
+            }
+            VirtualProtect(pder->pinh, pder->cbinh, dwPermInh, &dwIgnore);
+        }
+        VirtualProtect(pder->pidh, pder->cbidh, dwPermIdh, &dwIgnore);
+    }
+    return fSucceeded;
+}
+
+BOOL WINAPI DetourRestoreAfterWith()
+{
+    PVOID pvData;
+    DWORD cbData;
+
+    pvData = DetourFindPayloadEx(DETOUR_EXE_RESTORE_GUID, &cbData);
+
+    if (pvData != NULL && cbData != 0) {
+        return DetourRestoreAfterWithEx(pvData, cbData);
+    }
+    SetLastError(ERROR_MOD_NOT_FOUND);
+    return FALSE;
+}
+
+//  End of File
diff --git a/src/detours/uimports.cpp b/src/detours/uimports.cpp
new file mode 100644
index 0000000..1e8cbd7
--- /dev/null
+++ b/src/detours/uimports.cpp
@@ -0,0 +1,269 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Add DLLs to a module import table (uimports.cpp of detours.lib)
+//
+//  Microsoft Research Detours Package, Version 4.0.1
+//
+//  Copyright (c) Microsoft Corporation.  All rights reserved.
+//
+//  Note that this file is included into creatwth.cpp one or more times
+//  (once for each supported module format).
+//
+
+#if DETOURS_VERSION != 0x4c0c1   // 0xMAJORcMINORcPATCH
+#error detours.h version mismatch
+#endif
+
+// UpdateImports32 aka UpdateImports64
+static BOOL UPDATE_IMPORTS_XX(HANDLE hProcess,
+                              HMODULE hModule,
+                              __in_ecount(nDlls) LPCSTR *plpDlls,
+                              DWORD nDlls)
+{
+    BOOL fSucceeded = FALSE;
+    DWORD cbNew = 0;
+
+    BYTE * pbNew = NULL;
+    DWORD i;
+    SIZE_T cbRead;
+    DWORD n;
+
+    PBYTE pbModule = (PBYTE)hModule;
+
+    IMAGE_DOS_HEADER idh;
+    ZeroMemory(&idh, sizeof(idh));
+    if (!ReadProcessMemory(hProcess, pbModule, &idh, sizeof(idh), &cbRead)
+        || cbRead < sizeof(idh)) {
+
+        DETOUR_TRACE(("ReadProcessMemory(idh@%p..%p) failed: %d\n",
+                      pbModule, pbModule + sizeof(idh), GetLastError()));
+
+      finish:
+        if (pbNew != NULL) {
+            delete[] pbNew;
+            pbNew = NULL;
+        }
+        return fSucceeded;
+    }
+
+    IMAGE_NT_HEADERS_XX inh;
+    ZeroMemory(&inh, sizeof(inh));
+
+    if (!ReadProcessMemory(hProcess, pbModule + idh.e_lfanew, &inh, sizeof(inh), &cbRead)
+        || cbRead < sizeof(inh)) {
+        DETOUR_TRACE(("ReadProcessMemory(inh@%p..%p) failed: %d\n",
+                      pbModule + idh.e_lfanew,
+                      pbModule + idh.e_lfanew + sizeof(inh),
+                      GetLastError()));
+        goto finish;
+    }
+
+    if (inh.OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR_MAGIC_XX) {
+        DETOUR_TRACE(("Wrong size image (%04x != %04x).\n",
+                      inh.OptionalHeader.Magic, IMAGE_NT_OPTIONAL_HDR_MAGIC_XX));
+        SetLastError(ERROR_INVALID_BLOCK);
+        goto finish;
+    }
+
+    // Zero out the bound table so loader doesn't use it instead of our new table.
+    inh.BOUND_DIRECTORY.VirtualAddress = 0;
+    inh.BOUND_DIRECTORY.Size = 0;
+
+    // Find the size of the mapped file.
+    DWORD dwSec = idh.e_lfanew +
+        FIELD_OFFSET(IMAGE_NT_HEADERS_XX, OptionalHeader) +
+        inh.FileHeader.SizeOfOptionalHeader;
+
+    for (i = 0; i < inh.FileHeader.NumberOfSections; i++) {
+        IMAGE_SECTION_HEADER ish;
+        ZeroMemory(&ish, sizeof(ish));
+
+        if (!ReadProcessMemory(hProcess, pbModule + dwSec + sizeof(ish) * i, &ish,
+                               sizeof(ish), &cbRead)
+            || cbRead < sizeof(ish)) {
+
+            DETOUR_TRACE(("ReadProcessMemory(ish@%p..%p) failed: %d\n",
+                          pbModule + dwSec + sizeof(ish) * i,
+                          pbModule + dwSec + sizeof(ish) * (i + 1),
+                          GetLastError()));
+            goto finish;
+        }
+
+        DETOUR_TRACE(("ish[%d] : va=%08x sr=%d\n", i, ish.VirtualAddress, ish.SizeOfRawData));
+
+        // If the file didn't have an IAT_DIRECTORY, we assign it...
+        if (inh.IAT_DIRECTORY.VirtualAddress == 0 &&
+            inh.IMPORT_DIRECTORY.VirtualAddress >= ish.VirtualAddress &&
+            inh.IMPORT_DIRECTORY.VirtualAddress < ish.VirtualAddress + ish.SizeOfRawData) {
+
+            inh.IAT_DIRECTORY.VirtualAddress = ish.VirtualAddress;
+            inh.IAT_DIRECTORY.Size = ish.SizeOfRawData;
+        }
+    }
+
+    DETOUR_TRACE(("     Imports: %p..%p\n",
+                  (DWORD_PTR)pbModule + inh.IMPORT_DIRECTORY.VirtualAddress,
+                  (DWORD_PTR)pbModule + inh.IMPORT_DIRECTORY.VirtualAddress +
+                  inh.IMPORT_DIRECTORY.Size));
+
+    DWORD nOldDlls = inh.IMPORT_DIRECTORY.Size / sizeof(IMAGE_IMPORT_DESCRIPTOR);
+    DWORD obRem = sizeof(IMAGE_IMPORT_DESCRIPTOR) * nDlls;
+    DWORD obOld = obRem + sizeof(IMAGE_IMPORT_DESCRIPTOR) * nOldDlls;
+    DWORD obTab = PadToDwordPtr(obOld);
+    DWORD obDll = obTab + sizeof(DWORD_XX) * 4 * nDlls;
+    DWORD obStr = obDll;
+    cbNew = obStr;
+    for (n = 0; n < nDlls; n++) {
+        cbNew += PadToDword((DWORD)strlen(plpDlls[n]) + 1);
+    }
+
+    _Analysis_assume_(cbNew >
+                      sizeof(IMAGE_IMPORT_DESCRIPTOR) * (nDlls + nOldDlls)
+                      + sizeof(DWORD_XX) * 4 * nDlls);
+    pbNew = new BYTE [cbNew];
+    if (pbNew == NULL) {
+        DETOUR_TRACE(("new BYTE [cbNew] failed.\n"));
+        goto finish;
+    }
+    ZeroMemory(pbNew, cbNew);
+
+    PBYTE pbBase = pbModule;
+    PBYTE pbNext = pbBase
+        + inh.OptionalHeader.BaseOfCode
+        + inh.OptionalHeader.SizeOfCode
+        + inh.OptionalHeader.SizeOfInitializedData
+        + inh.OptionalHeader.SizeOfUninitializedData;
+    if (pbBase < pbNext) {
+        pbBase = pbNext;
+    }
+    DETOUR_TRACE(("pbBase = %p\n", pbBase));
+
+    PBYTE pbNewIid = FindAndAllocateNearBase(hProcess, pbModule, pbBase, cbNew);
+    if (pbNewIid == NULL) {
+        DETOUR_TRACE(("FindAndAllocateNearBase failed.\n"));
+        goto finish;
+    }
+
+    PIMAGE_IMPORT_DESCRIPTOR piid = (PIMAGE_IMPORT_DESCRIPTOR)pbNew;
+    DWORD_XX *pt;
+
+    DWORD obBase = (DWORD)(pbNewIid - pbModule);
+    DWORD dwProtect = 0;
+
+    if (inh.IMPORT_DIRECTORY.VirtualAddress != 0) {
+        // Read the old import directory if it exists.
+        DETOUR_TRACE(("IMPORT_DIRECTORY perms=%x\n", dwProtect));
+
+        if (!ReadProcessMemory(hProcess,
+                               pbModule + inh.IMPORT_DIRECTORY.VirtualAddress,
+                               &piid[nDlls],
+                               nOldDlls * sizeof(IMAGE_IMPORT_DESCRIPTOR), &cbRead)
+            || cbRead < nOldDlls * sizeof(IMAGE_IMPORT_DESCRIPTOR)) {
+
+            DETOUR_TRACE(("ReadProcessMemory(imports) failed: %d\n", GetLastError()));
+            goto finish;
+        }
+    }
+
+    for (n = 0; n < nDlls; n++) {
+        HRESULT hrRet = StringCchCopyA((char*)pbNew + obStr, cbNew - obStr, plpDlls[n]);
+        if (FAILED(hrRet)) {
+            DETOUR_TRACE(("StringCchCopyA failed: %d\n", GetLastError()));
+            goto finish;
+        }
+
+        // After copying the string, we patch up the size "??" bits if any.
+        hrRet = ReplaceOptionalSizeA((char*)pbNew + obStr,
+                                     cbNew - obStr,
+                                     DETOURS_STRINGIFY(DETOURS_BITS_XX));
+        if (FAILED(hrRet)) {
+            DETOUR_TRACE(("ReplaceOptionalSizeA failed: %d\n", GetLastError()));
+            goto finish;
+        }
+
+        DWORD nOffset = obTab + (sizeof(DWORD_XX) * (4 * n));
+        piid[n].OriginalFirstThunk = obBase + nOffset;
+        pt = ((DWORD_XX*)(pbNew + nOffset));
+        pt[0] = IMAGE_ORDINAL_FLAG_XX + 1;
+        pt[1] = 0;
+
+        nOffset = obTab + (sizeof(DWORD_XX) * ((4 * n) + 2));
+        piid[n].FirstThunk = obBase + nOffset;
+        pt = ((DWORD_XX*)(pbNew + nOffset));
+        pt[0] = IMAGE_ORDINAL_FLAG_XX + 1;
+        pt[1] = 0;
+        piid[n].TimeDateStamp = 0;
+        piid[n].ForwarderChain = 0;
+        piid[n].Name = obBase + obStr;
+
+        obStr += PadToDword((DWORD)strlen(plpDlls[n]) + 1);
+    }
+    _Analysis_assume_(obStr <= cbNew);
+
+#if 0
+    for (i = 0; i < nDlls + nOldDlls; i++) {
+        DETOUR_TRACE(("%8d. Look=%08x Time=%08x Fore=%08x Name=%08x Addr=%08x\n",
+                      i,
+                      piid[i].OriginalFirstThunk,
+                      piid[i].TimeDateStamp,
+                      piid[i].ForwarderChain,
+                      piid[i].Name,
+                      piid[i].FirstThunk));
+        if (piid[i].OriginalFirstThunk == 0 && piid[i].FirstThunk == 0) {
+            break;
+        }
+    }
+#endif
+
+    if (!WriteProcessMemory(hProcess, pbNewIid, pbNew, obStr, NULL)) {
+        DETOUR_TRACE(("WriteProcessMemory(iid) failed: %d\n", GetLastError()));
+        goto finish;
+    }
+
+    DETOUR_TRACE(("obBaseBef = %08x..%08x\n",
+                  inh.IMPORT_DIRECTORY.VirtualAddress,
+                  inh.IMPORT_DIRECTORY.VirtualAddress + inh.IMPORT_DIRECTORY.Size));
+    DETOUR_TRACE(("obBaseAft = %08x..%08x\n", obBase, obBase + obStr));
+
+    // If the file doesn't have an IAT_DIRECTORY, we create it...
+    if (inh.IAT_DIRECTORY.VirtualAddress == 0) {
+        inh.IAT_DIRECTORY.VirtualAddress = obBase;
+        inh.IAT_DIRECTORY.Size = cbNew;
+    }
+
+    inh.IMPORT_DIRECTORY.VirtualAddress = obBase;
+    inh.IMPORT_DIRECTORY.Size = cbNew;
+
+    /////////////////////// Update the NT header for the new import directory.
+    //
+    if (!DetourVirtualProtectSameExecuteEx(hProcess, pbModule, inh.OptionalHeader.SizeOfHeaders,
+                                           PAGE_EXECUTE_READWRITE, &dwProtect)) {
+        DETOUR_TRACE(("VirtualProtectEx(inh) write failed: %d\n", GetLastError()));
+        goto finish;
+    }
+
+    inh.OptionalHeader.CheckSum = 0;
+
+    if (!WriteProcessMemory(hProcess, pbModule, &idh, sizeof(idh), NULL)) {
+        DETOUR_TRACE(("WriteProcessMemory(idh) failed: %d\n", GetLastError()));
+        goto finish;
+    }
+    DETOUR_TRACE(("WriteProcessMemory(idh:%p..%p)\n", pbModule, pbModule + sizeof(idh)));
+
+    if (!WriteProcessMemory(hProcess, pbModule + idh.e_lfanew, &inh, sizeof(inh), NULL)) {
+        DETOUR_TRACE(("WriteProcessMemory(inh) failed: %d\n", GetLastError()));
+        goto finish;
+    }
+    DETOUR_TRACE(("WriteProcessMemory(inh:%p..%p)\n",
+                  pbModule + idh.e_lfanew,
+                  pbModule + idh.e_lfanew + sizeof(inh)));
+
+    if (!VirtualProtectEx(hProcess, pbModule, inh.OptionalHeader.SizeOfHeaders,
+                          dwProtect, &dwProtect)) {
+        DETOUR_TRACE(("VirtualProtectEx(idh) restore failed: %d\n", GetLastError()));
+        goto finish;
+    }
+
+    fSucceeded = TRUE;
+    goto finish;
+}
diff --git a/src/dinput.c b/src/dinput.c
index 75579e6..6859120 100644
--- a/src/dinput.c
+++ b/src/dinput.c
@@ -2,13 +2,13 @@
 #include <dinput.h>
 #include "hook.h"
 
-typedef HRESULT (WINAPI *DInputCreateA)(HINSTANCE, DWORD, LPDIRECTINPUTA*, LPUNKNOWN);
-typedef HRESULT (WINAPI *DICreateDevice)(IDirectInputA*, REFGUID, LPDIRECTINPUTDEVICEA *, LPUNKNOWN);
-typedef HRESULT (WINAPI *DIDSetCooperativeLevel)(IDirectInputDeviceA *, HWND, DWORD);
+typedef HRESULT (WINAPI *DIRECTINPUTCREATEAPROC)(HINSTANCE, DWORD, LPDIRECTINPUTA*, LPUNKNOWN);
+typedef HRESULT (WINAPI *DICREATEDEVICEPROC)(IDirectInputA*, REFGUID, LPDIRECTINPUTDEVICEA *, LPUNKNOWN);
+typedef HRESULT (WINAPI *DIDSETCOOPERATIVELEVELPROC)(IDirectInputDeviceA *, HWND, DWORD);
 
-static DInputCreateA DInputCreateA_;
-static DICreateDevice DICreateDevice_;
-static DIDSetCooperativeLevel DIDSetCooperativeLevel_;
+static DIRECTINPUTCREATEAPROC DInputCreateA;
+static DICREATEDEVICEPROC DICreateDevice;
+static DIDSETCOOPERATIVELEVELPROC DIDSetCooperativeLevel;
 
 static PROC HookFunc(PROC *orgFunc, PROC newFunc)
 {
@@ -27,17 +27,17 @@ static PROC HookFunc(PROC *orgFunc, PROC newFunc)
 
 static HRESULT WINAPI fake_DIDSetCooperativeLevel(IDirectInputDeviceA *This, HWND hwnd, DWORD dwFlags)
 {
-    return DIDSetCooperativeLevel_(This, hwnd, DISCL_BACKGROUND | DISCL_NONEXCLUSIVE);
+    return DIDSetCooperativeLevel(This, hwnd, DISCL_BACKGROUND | DISCL_NONEXCLUSIVE);
 }
 
 static HRESULT WINAPI fake_DICreateDevice(IDirectInputA *This, REFGUID rguid, LPDIRECTINPUTDEVICEA * lplpDIDevice, LPUNKNOWN pUnkOuter)
 {
-    HRESULT result = DICreateDevice_(This, rguid, lplpDIDevice, pUnkOuter);
+    HRESULT result = DICreateDevice(This, rguid, lplpDIDevice, pUnkOuter);
 
-    if (SUCCEEDED(result) && !DIDSetCooperativeLevel_)
+    if (SUCCEEDED(result) && !DIDSetCooperativeLevel)
     {
-        DIDSetCooperativeLevel_ = 
-            (DIDSetCooperativeLevel)HookFunc(
+        DIDSetCooperativeLevel = 
+            (DIDSETCOOPERATIVELEVELPROC)HookFunc(
                 (PROC *)&(*lplpDIDevice)->lpVtbl->SetCooperativeLevel, (PROC)fake_DIDSetCooperativeLevel);
     }
 
@@ -46,22 +46,33 @@ static HRESULT WINAPI fake_DICreateDevice(IDirectInputA *This, REFGUID rguid, LP
 
 static HRESULT WINAPI fake_DirectInputCreateA(HINSTANCE hinst, DWORD dwVersion, LPDIRECTINPUTA* lplpDirectInput, LPUNKNOWN punkOuter)
 {
-    DInputCreateA_ = (DInputCreateA)GetProcAddress(GetModuleHandle("dinput.dll"), "DirectInputCreateA");
-    if (!DInputCreateA_)
+    DInputCreateA = 
+        (DIRECTINPUTCREATEAPROC)GetProcAddress(GetModuleHandle("dinput.dll"), "DirectInputCreateA");
+
+    if (!DInputCreateA)
         return DIERR_GENERIC;
 
-    HRESULT result = DInputCreateA_(hinst, dwVersion, lplpDirectInput, punkOuter);
+    HRESULT result = DInputCreateA(hinst, dwVersion, lplpDirectInput, punkOuter);
 
-    if (SUCCEEDED(result) && !DICreateDevice_)
+    if (SUCCEEDED(result) && !DICreateDevice)
     {
-        DICreateDevice_ =
-            (DICreateDevice)HookFunc((PROC *)&(*lplpDirectInput)->lpVtbl->CreateDevice, (PROC)fake_DICreateDevice);
+        DICreateDevice =
+            (DICREATEDEVICEPROC)HookFunc((PROC *)&(*lplpDirectInput)->lpVtbl->CreateDevice, (PROC)fake_DICreateDevice);
     }
 
     return result;
 }
 
-void dinput_init()
+void DInput_Hook()
 {
     Hook_PatchIAT(GetModuleHandle(NULL), "dinput.dll", "DirectInputCreateA", (PROC)fake_DirectInputCreateA);
 }
+
+void DInput_UnHook()
+{
+    Hook_PatchIAT(
+        GetModuleHandle(NULL), 
+        "dinput.dll", 
+        "DirectInputCreateA", 
+        (PROC)GetProcAddress(GetModuleHandle("dinput.dll"), "DirectInputCreateA"));
+}
diff --git a/src/hook.c b/src/hook.c
index cd3f847..8faa796 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -4,7 +4,12 @@
 #include "mouse.h"
 #include "hook.h"
 
+#ifdef _MSC_VER
+#include "detours.h"
+#endif
+
 BOOL Hook_Active;
+int HookingMethod = 1;
 GETCURSORPOSPROC real_GetCursorPos = GetCursorPos;
 CLIPCURSORPROC real_ClipCursor = ClipCursor;
 SHOWCURSORPROC real_ShowCursor = ShowCursor;
@@ -86,78 +91,41 @@ void Hook_PatchIAT(HMODULE hMod, char *moduleName, char *functionName, PROC newF
     }
 }
 
-PROC Hook_HotPatch(PROC function, PROC newFunction)
+void Hook_Create(char *moduleName, char *functionName, PROC newFunction, PROC *function)
 {
-    PROC result = function;
-
-    if (!function)
-        return result;
-
-    unsigned short *bytes = (unsigned short *)function;
-
-    if (*bytes == 0x25FF) // JMP DWORD PTR
+#ifdef _MSC_VER
+    if (HookingMethod == 2)
     {
-        char *address = (char *)function;
-        DWORD oldProtect;
-
-        if (VirtualProtect(address, 8, PAGE_EXECUTE_READWRITE, &oldProtect))
-        {
-            if (memcmp(address + 6, (const char[]) { 0xCC, 0xCC }, 2) == 0 ||
-                memcmp(address + 6, (const char[]) { 0x90, 0x90 }, 2) == 0)
-            {
-                memmove(address + 2, address, 6);
-                *((WORD *)(&address[0])) = 0xFF8B; // mov edi, edi
-            }
-
-            VirtualProtect(address, 8, oldProtect, &oldProtect);
-        }
+        DetourTransactionBegin();
+        DetourUpdateThread(GetCurrentThread());
+        DetourAttach((PVOID *)function, (PVOID)newFunction);
+        DetourTransactionCommit();
     }
+#endif
 
-    if (*bytes == 0xFF8B) // mov edi, edi
-    {
-        char *address = ((char *)function) - 5;
-        DWORD oldProtect;
-
-        if (VirtualProtect(address, 7, PAGE_EXECUTE_READWRITE, &oldProtect))
-        {
-            if (memcmp(address, (const char[]) { 0xCC, 0xCC, 0xCC, 0xCC, 0xCC }, 5) == 0 ||
-                memcmp(address, (const char[]) { 0x90, 0x90, 0x90, 0x90, 0x90 }, 5) == 0)
-            {
-                address[0] = 0xE9; // long jump
-                *((DWORD *)(&address[1])) = ((char *)newFunction) - address - 5;
-                *((WORD *)(&address[5])) = 0xF9EB; // short jump to our long jump
-
-                result = (PROC)(((char *)function) + 2);
-            }
-
-            VirtualProtect(address, 7, oldProtect, &oldProtect);
-        }
-    }
-
-    return result;
+    if (HookingMethod == 1)
+        Hook_PatchIAT(GetModuleHandle(NULL), moduleName, functionName, newFunction);
 }
 
-void Hook_TryHotPatch(char *moduleName, char *functionName, PROC newFunction, PROC *function)
+void Hook_Revert(char *moduleName, char *functionName, PROC newFunction, PROC *function)
 {
-    FARPROC org = GetProcAddress(GetModuleHandle(moduleName), functionName);
-    if (ddraw->hotPatch && org)
+#ifdef _MSC_VER
+    if (HookingMethod == 2)
     {
-        *function = Hook_HotPatch(org, newFunction);
-
-        if (*function == org) // hotpatch failed...
-        {
-            Hook_PatchIAT(GetModuleHandle(NULL), moduleName, functionName, newFunction);
-
-            if (ddraw->bnetHack)
-                Hook_PatchIAT(GetModuleHandle("storm.dll"), moduleName, functionName, newFunction);
-        }
+        DetourTransactionBegin();
+        DetourUpdateThread(GetCurrentThread());
+        DetourDetach((PVOID *)function, (PVOID)newFunction);
+        DetourTransactionCommit();
     }
-    else
-    {
-        Hook_PatchIAT(GetModuleHandle(NULL), moduleName, functionName, newFunction);
+#endif
 
-        if (ddraw->bnetHack)
-            Hook_PatchIAT(GetModuleHandle("storm.dll"), moduleName, functionName, newFunction);
+    if (HookingMethod == 1)
+    {
+        Hook_PatchIAT(
+            GetModuleHandle(NULL), 
+            moduleName, 
+            functionName, 
+            GetProcAddress(GetModuleHandle(moduleName), functionName));
     }
 }
 
@@ -167,27 +135,54 @@ void Hook_Init()
     {
         Hook_Active = TRUE;
 
-        Hook_TryHotPatch("user32.dll", "GetCursorPos", (PROC)fake_GetCursorPos, (PROC *)&real_GetCursorPos);
-        Hook_TryHotPatch("user32.dll", "ClipCursor", (PROC)fake_ClipCursor, (PROC *)&real_ClipCursor);
-        Hook_TryHotPatch("user32.dll", "ShowCursor", (PROC)fake_ShowCursor, (PROC *)&real_ShowCursor);
-        Hook_TryHotPatch("user32.dll", "SetCursor", (PROC)fake_SetCursor, (PROC *)&real_SetCursor);
-        Hook_TryHotPatch("user32.dll", "GetWindowRect", (PROC)fake_GetWindowRect, (PROC *)&real_GetWindowRect);
-        Hook_TryHotPatch("user32.dll", "GetClientRect", (PROC)fake_GetClientRect, (PROC *)&real_GetClientRect);
-        Hook_TryHotPatch("user32.dll", "ClientToScreen", (PROC)fake_ClientToScreen, (PROC *)&real_ClientToScreen);
-        Hook_TryHotPatch("user32.dll", "ScreenToClient", (PROC)fake_ScreenToClient, (PROC *)&real_ScreenToClient);
-        Hook_TryHotPatch("user32.dll", "SetCursorPos", (PROC)fake_SetCursorPos, (PROC *)&real_SetCursorPos);
-        Hook_TryHotPatch("user32.dll", "GetClipCursor", (PROC)fake_GetClipCursor, (PROC *)&real_GetClipCursor);
-        Hook_TryHotPatch("user32.dll", "WindowFromPoint", (PROC)fake_WindowFromPoint, (PROC *)&real_WindowFromPoint);
-        Hook_TryHotPatch("user32.dll", "GetCursorInfo", (PROC)fake_GetCursorInfo, (PROC *)&real_GetCursorInfo);
-        Hook_TryHotPatch("user32.dll", "GetSystemMetrics", (PROC)fake_GetSystemMetrics, (PROC *)&real_GetSystemMetrics);
-        Hook_TryHotPatch("user32.dll", "SetWindowPos", (PROC)fake_SetWindowPos, (PROC *)&real_SetWindowPos);
-        Hook_TryHotPatch("user32.dll", "MoveWindow", (PROC)fake_MoveWindow, (PROC *)&real_MoveWindow);
-        Hook_TryHotPatch("user32.dll", "SendMessageA", (PROC)fake_SendMessageA, (PROC *)&real_SendMessageA);
-        Hook_TryHotPatch("user32.dll", "SetWindowLongA", (PROC)fake_SetWindowLongA, (PROC *)&real_SetWindowLongA);
-        Hook_TryHotPatch("user32.dll", "EnableWindow", (PROC)fake_EnableWindow, (PROC *)&real_EnableWindow);
-        Hook_TryHotPatch("user32.dll", "CreateWindowExA", (PROC)fake_CreateWindowExA, (PROC *)&real_CreateWindowExA);
-        Hook_TryHotPatch("user32.dll", "DestroyWindow", (PROC)fake_DestroyWindow, (PROC *)&real_DestroyWindow);
-
-        //Hook_PatchIAT(GetModuleHandle(NULL), "user32.dll", "GetCursorPos", (PROC)fake_GetCursorPos);
+        Hook_Create("user32.dll", "GetCursorPos", (PROC)fake_GetCursorPos, (PROC *)&real_GetCursorPos);
+        Hook_Create("user32.dll", "ClipCursor", (PROC)fake_ClipCursor, (PROC *)&real_ClipCursor);
+        Hook_Create("user32.dll", "ShowCursor", (PROC)fake_ShowCursor, (PROC *)&real_ShowCursor);
+        Hook_Create("user32.dll", "SetCursor", (PROC)fake_SetCursor, (PROC *)&real_SetCursor);
+        Hook_Create("user32.dll", "GetWindowRect", (PROC)fake_GetWindowRect, (PROC *)&real_GetWindowRect);
+        Hook_Create("user32.dll", "GetClientRect", (PROC)fake_GetClientRect, (PROC *)&real_GetClientRect);
+        Hook_Create("user32.dll", "ClientToScreen", (PROC)fake_ClientToScreen, (PROC *)&real_ClientToScreen);
+        Hook_Create("user32.dll", "ScreenToClient", (PROC)fake_ScreenToClient, (PROC *)&real_ScreenToClient);
+        Hook_Create("user32.dll", "SetCursorPos", (PROC)fake_SetCursorPos, (PROC *)&real_SetCursorPos);
+        Hook_Create("user32.dll", "GetClipCursor", (PROC)fake_GetClipCursor, (PROC *)&real_GetClipCursor);
+        Hook_Create("user32.dll", "WindowFromPoint", (PROC)fake_WindowFromPoint, (PROC *)&real_WindowFromPoint);
+        Hook_Create("user32.dll", "GetCursorInfo", (PROC)fake_GetCursorInfo, (PROC *)&real_GetCursorInfo);
+        Hook_Create("user32.dll", "GetSystemMetrics", (PROC)fake_GetSystemMetrics, (PROC *)&real_GetSystemMetrics);
+        Hook_Create("user32.dll", "SetWindowPos", (PROC)fake_SetWindowPos, (PROC *)&real_SetWindowPos);
+        Hook_Create("user32.dll", "MoveWindow", (PROC)fake_MoveWindow, (PROC *)&real_MoveWindow);
+        Hook_Create("user32.dll", "SendMessageA", (PROC)fake_SendMessageA, (PROC *)&real_SendMessageA);
+        Hook_Create("user32.dll", "SetWindowLongA", (PROC)fake_SetWindowLongA, (PROC *)&real_SetWindowLongA);
+        Hook_Create("user32.dll", "EnableWindow", (PROC)fake_EnableWindow, (PROC *)&real_EnableWindow);
+        Hook_Create("user32.dll", "CreateWindowExA", (PROC)fake_CreateWindowExA, (PROC *)&real_CreateWindowExA);
+        Hook_Create("user32.dll", "DestroyWindow", (PROC)fake_DestroyWindow, (PROC *)&real_DestroyWindow);
+    }
+}
+
+void Hook_Exit()
+{
+    if (Hook_Active)
+    {
+        Hook_Active = FALSE;
+
+        Hook_Revert("user32.dll", "GetCursorPos", (PROC)fake_GetCursorPos, (PROC *)&real_GetCursorPos);
+        Hook_Revert("user32.dll", "ClipCursor", (PROC)fake_ClipCursor, (PROC *)&real_ClipCursor);
+        Hook_Revert("user32.dll", "ShowCursor", (PROC)fake_ShowCursor, (PROC *)&real_ShowCursor);
+        Hook_Revert("user32.dll", "SetCursor", (PROC)fake_SetCursor, (PROC *)&real_SetCursor);
+        Hook_Revert("user32.dll", "GetWindowRect", (PROC)fake_GetWindowRect, (PROC *)&real_GetWindowRect);
+        Hook_Revert("user32.dll", "GetClientRect", (PROC)fake_GetClientRect, (PROC *)&real_GetClientRect);
+        Hook_Revert("user32.dll", "ClientToScreen", (PROC)fake_ClientToScreen, (PROC *)&real_ClientToScreen);
+        Hook_Revert("user32.dll", "ScreenToClient", (PROC)fake_ScreenToClient, (PROC *)&real_ScreenToClient);
+        Hook_Revert("user32.dll", "SetCursorPos", (PROC)fake_SetCursorPos, (PROC *)&real_SetCursorPos);
+        Hook_Revert("user32.dll", "GetClipCursor", (PROC)fake_GetClipCursor, (PROC *)&real_GetClipCursor);
+        Hook_Revert("user32.dll", "WindowFromPoint", (PROC)fake_WindowFromPoint, (PROC *)&real_WindowFromPoint);
+        Hook_Revert("user32.dll", "GetCursorInfo", (PROC)fake_GetCursorInfo, (PROC *)&real_GetCursorInfo);
+        Hook_Revert("user32.dll", "GetSystemMetrics", (PROC)fake_GetSystemMetrics, (PROC *)&real_GetSystemMetrics);
+        Hook_Revert("user32.dll", "SetWindowPos", (PROC)fake_SetWindowPos, (PROC *)&real_SetWindowPos);
+        Hook_Revert("user32.dll", "MoveWindow", (PROC)fake_MoveWindow, (PROC *)&real_MoveWindow);
+        Hook_Revert("user32.dll", "SendMessageA", (PROC)fake_SendMessageA, (PROC *)&real_SendMessageA);
+        Hook_Revert("user32.dll", "SetWindowLongA", (PROC)fake_SetWindowLongA, (PROC *)&real_SetWindowLongA);
+        Hook_Revert("user32.dll", "EnableWindow", (PROC)fake_EnableWindow, (PROC *)&real_EnableWindow);
+        Hook_Revert("user32.dll", "CreateWindowExA", (PROC)fake_CreateWindowExA, (PROC *)&real_CreateWindowExA);
+        Hook_Revert("user32.dll", "DestroyWindow", (PROC)fake_DestroyWindow, (PROC *)&real_DestroyWindow);
     }
 }
diff --git a/src/main.c b/src/main.c
index a64152d..93a70a8 100644
--- a/src/main.c
+++ b/src/main.c
@@ -36,7 +36,8 @@
 BOOL screenshot(struct IDirectDrawSurfaceImpl *);
 void Settings_Load();
 void Settings_Save(RECT *lpRect, int windowState);
-void dinput_init();
+void DInput_Hook();
+void DInput_UnHook();
 
 IDirectDrawImpl *ddraw = NULL;
 
@@ -55,6 +56,33 @@ BOOL WINAPI DllMain(HANDLE hDll, DWORD dwReason, LPVOID lpReserved)
     {
         case DLL_PROCESS_ATTACH:
         {
+            char buf[1024];
+            if (GetEnvironmentVariable("__COMPAT_LAYER", buf, sizeof(buf)))
+            {
+                char *s = strtok(buf, " ");
+                while (s) 
+                {
+                    if (strcmpi(s, "WIN95") == 0 || strcmpi(s, "WIN98") == 0 || strcmpi(s, "NT4SP5") == 0)
+                    {
+                        char mes[128] = { 0 };
+
+                        _snprintf(
+                            mes, 
+                            sizeof(mes), 
+                            "Please disable the '%s' compatibility mode for all game executables and "
+                                "then try to start the game again.",
+                            s);
+
+                        MessageBoxA(NULL, mes, "Compatibility modes detected - cnc-ddraw", MB_OK);
+
+                        //return FALSE;
+                        break;
+                    }
+
+                    s = strtok(NULL, " ");
+                }
+            }
+
             printf("cnc-ddraw DLL_PROCESS_ATTACH\n");
             
             //SetProcessPriorityBoost(GetCurrentProcess(), TRUE);
@@ -88,7 +116,7 @@ BOOL WINAPI DllMain(HANDLE hDll, DWORD dwReason, LPVOID lpReserved)
             }
             
             timeBeginPeriod(1);
-            dinput_init();
+            DInput_Hook();
             break;
         }
         case DLL_PROCESS_DETACH:
@@ -98,6 +126,8 @@ BOOL WINAPI DllMain(HANDLE hDll, DWORD dwReason, LPVOID lpReserved)
             Settings_Save(&WindowRect, WindowState);
 
             timeEndPeriod(1);
+            Hook_Exit();
+            DInput_UnHook();
             break;
         }
     }
@@ -278,14 +308,14 @@ void UpdateBnetPos(int newX, int newY)
 
 HRESULT __stdcall ddraw_Compact(IDirectDrawImpl *This)
 {
-    printf("DirectDraw::Compact(This=%p) ???\n", This);
+    printf("??? DirectDraw::Compact(This=%p)\n", This);
 
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_DuplicateSurface(IDirectDrawImpl *This, LPDIRECTDRAWSURFACE src, LPDIRECTDRAWSURFACE *dest)
 {
-    printf("DirectDraw::DuplicateSurface(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::DuplicateSurface(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -430,13 +460,13 @@ HRESULT __stdcall ddraw_EnumDisplayModes(IDirectDrawImpl *This, DWORD dwFlags, L
 
 HRESULT __stdcall ddraw_EnumSurfaces(IDirectDrawImpl *This, DWORD a, LPDDSURFACEDESC b, LPVOID c, LPDDENUMSURFACESCALLBACK d)
 {
-    printf("DirectDraw::EnumSurfaces(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::EnumSurfaces(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_FlipToGDISurface(IDirectDrawImpl *This)
 {
-    printf("DirectDraw::FlipToGDISurface(This=%p) ???\n", This);
+    printf("??? DirectDraw::FlipToGDISurface(This=%p)\n", This);
 
     return DD_OK;
 }
@@ -473,44 +503,44 @@ HRESULT __stdcall ddraw_GetCaps(IDirectDrawImpl *This, LPDDCAPS lpDDDriverCaps,
 
 HRESULT __stdcall ddraw_GetDisplayMode(IDirectDrawImpl *This, LPDDSURFACEDESC a)
 {
-    printf("DirectDraw::GetDisplayMode(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::GetDisplayMode(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_GetFourCCCodes(IDirectDrawImpl *This, LPDWORD a, LPDWORD b)
 {
-    printf("DirectDraw::GetFourCCCodes(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::GetFourCCCodes(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_GetGDISurface(IDirectDrawImpl *This, LPDIRECTDRAWSURFACE *a)
 {
-    printf("DirectDraw::GetGDISurface(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::GetGDISurface(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_GetMonitorFrequency(IDirectDrawImpl *This, LPDWORD a)
 {
-    printf("DirectDraw::GetMonitorFrequency(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::GetMonitorFrequency(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_GetScanLine(IDirectDrawImpl *This, LPDWORD a)
 {
-    printf("DirectDraw::GetScanLine(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::GetScanLine(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_GetVerticalBlankStatus(IDirectDrawImpl *This, LPBOOL lpbIsInVB)
 {
-    printf("DirectDraw::GetVerticalBlankStatus(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::GetVerticalBlankStatus(This=%p, ...)\n", This);
     *lpbIsInVB = TRUE;
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_Initialize(IDirectDrawImpl *This, GUID *a)
 {
-    printf("DirectDraw::Initialize(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::Initialize(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -893,8 +923,6 @@ HRESULT __stdcall ddraw_SetDisplayMode(IDirectDrawImpl *This, DWORD width, DWORD
         RedrawWindow(This->hWnd, NULL, NULL, RDW_ERASE | RDW_INVALIDATE);
     }
 
-    InterlockedExchange(&ddraw->minimized, FALSE);
-    
     if(This->render.thread == NULL)
     {
         InterlockedExchange(&ddraw->render.paletteUpdated, TRUE);
@@ -975,6 +1003,7 @@ LRESULT CALLBACK WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
         case WM_NCLBUTTONUP:
         case WM_NCACTIVATE:
         case WM_NCPAINT:
+        case WM_NCHITTEST:
         {
             return DefWindowProc(hWnd, uMsg, wParam, lParam);
         }
@@ -1289,8 +1318,6 @@ LRESULT CALLBACK WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
 
                         mouse_lock();
                     }
-
-                    InterlockedExchange(&ddraw->minimized, FALSE);
                 }
 
                 if (!ddraw->handlemouse)
@@ -1314,8 +1341,6 @@ LRESULT CALLBACK WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
                         ShowWindow(ddraw->hWnd, SW_MINIMIZE);
                         ChangeDisplaySettings(&ddraw->mode, CDS_FULLSCREEN);
                     }
-
-                    InterlockedExchange(&ddraw->minimized, TRUE);
                 }
             }
 
@@ -1541,7 +1566,7 @@ HRESULT __stdcall ddraw_SetCooperativeLevel(IDirectDrawImpl *This, HWND hWnd, DW
 HRESULT __stdcall ddraw_WaitForVerticalBlank(IDirectDrawImpl *This, DWORD a, HANDLE b)
 {
 #if _DEBUG_X
-    printf("DirectDraw::WaitForVerticalBlank(This=%p, ...) ???\n", This);
+    printf("??? DirectDraw::WaitForVerticalBlank(This=%p, ...)\n", This);
 #endif
     return DD_OK;
 }
diff --git a/src/palette.c b/src/palette.c
index d2828be..be87311 100644
--- a/src/palette.c
+++ b/src/palette.c
@@ -72,7 +72,7 @@ HRESULT __stdcall ddraw_palette_SetEntries(IDirectDrawPaletteImpl *This, DWORD d
 
 HRESULT __stdcall ddraw_palette_QueryInterface(IDirectDrawPaletteImpl *This, REFIID riid, void **obj)
 {
-    printf("DirectDrawPalette::QueryInterface(This=%p, riid=%08X, obj=%p) ???\n", This, (unsigned int)riid, obj);
+    printf("??? DirectDrawPalette::QueryInterface(This=%p, riid=%08X, obj=%p)\n", This, (unsigned int)riid, obj);
     return S_OK;
 }
 
@@ -105,13 +105,13 @@ ULONG __stdcall ddraw_palette_Release(IDirectDrawPaletteImpl *This)
 
 HRESULT __stdcall ddraw_palette_GetCaps(IDirectDrawPaletteImpl *This, LPDWORD caps)
 {
-    printf("DirectDrawPalette::GetCaps(This=%p, caps=%p) ???\n", This, caps);
+    printf("??? DirectDrawPalette::GetCaps(This=%p, caps=%p)\n", This, caps);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_palette_Initialize(IDirectDrawPaletteImpl *This, LPDIRECTDRAW lpDD, DWORD dw, LPPALETTEENTRY paent)
 {
-    printf("DirectDrawPalette::Initialize(This=%p, ...) ???\n", This);
+    printf("??? DirectDrawPalette::Initialize(This=%p, ...)\n", This);
     return DD_OK;
 }
 
diff --git a/src/render.c b/src/render.c
index c7218d8..a4d3249 100644
--- a/src/render.c
+++ b/src/render.c
@@ -46,7 +46,7 @@ static GLuint FrameBufferTexId;
 static GLuint ScaleVBOs[3], ScaleVAO;
 static BOOL UseOpenGL;
 static BOOL AdjustAlignment;
-static BOOL UseBilinear;
+static BOOL FilterBilinear;
 
 static HGLRC CreateContext(HDC hdc);
 static void SetMaxFPS();
@@ -178,7 +178,7 @@ static void BuildPrograms()
         else
             OpenGL_GotVersion3 = FALSE;
 
-        UseBilinear = strstr(ddraw->shader, "\\bilinear.glsl") != 0;
+        FilterBilinear = strstr(ddraw->shader, "bilinear.glsl") != 0;
     }
 
     if (OpenGL_GotVersion2 && !MainProgram)
@@ -488,8 +488,8 @@ static void InitScaleProgram()
 
     glGenTextures(1, &FrameBufferTexId);
     glBindTexture(GL_TEXTURE_2D, FrameBufferTexId);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, UseBilinear ? GL_LINEAR : GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, UseBilinear ? GL_LINEAR : GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, FilterBilinear ? GL_LINEAR : GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, FilterBilinear ? GL_LINEAR : GL_NEAREST);
     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, SurfaceTexWidth, SurfaceTexHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
 
     glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, FrameBufferTexId, 0);
diff --git a/src/render_d3d9.c b/src/render_d3d9.c
index 361241c..4dd47c4 100644
--- a/src/render_d3d9.c
+++ b/src/render_d3d9.c
@@ -304,12 +304,6 @@ DWORD WINAPI render_d3d9_main(void)
 
     while (ddraw->render.run && WaitForSingleObject(ddraw->render.sem, 200) != WAIT_FAILED)
     {
-        if (InterlockedExchangeAdd(&ddraw->minimized, 0))
-        {
-            Sleep(500);
-            continue;
-        }
-
 #if _DEBUG
         DrawFrameInfoStart();
 #endif
diff --git a/src/settings.c b/src/settings.c
index 9a3ab4b..61b4c71 100644
--- a/src/settings.c
+++ b/src/settings.c
@@ -5,6 +5,7 @@
 #include "main.h"
 #include "opengl.h"
 #include "render_d3d9.h"
+#include "hook.h"
 
 static char SettingsIniPath[MAX_PATH];
 static char ProcessFileName[96];
@@ -41,7 +42,6 @@ void Settings_Load()
     ddraw->noactivateapp = GetBool("noactivateapp", FALSE);
     ddraw->vhack = GetBool("vhack", FALSE);
     ddraw->accurateTimers = GetBool("accuratetimers", FALSE);
-    ddraw->hotPatch = GetBool("hotpatch", FALSE);
     ddraw->bnetHack = GetBool("bnetHack", TRUE);
 
     WindowRect.right = GetInt("width", 0);
@@ -49,6 +49,10 @@ void Settings_Load()
     WindowRect.left = GetInt("posX", -32000);
     WindowRect.top = GetInt("posY", -32000);
 
+#ifndef _DEBUG
+    HookingMethod = GetInt("hook", 1);
+#endif
+    
     ddraw->render.maxfps = GetInt("maxfps", 125);
 
     if (ddraw->accurateTimers || ddraw->vsync)
@@ -276,9 +280,9 @@ static void CreateSettingsIni()
             "; Force CPU0 affinity, avoids crashes/freezing, *might* have a performance impact\n"
             "singlecpu=true\n"
             "\n"
-            "; Use hotpatching rather than IAT hooking\n"
+            "; Windows API Hooking, Possible values: 0 = disabled, 1 = IAT Hooking, 2 = Microsoft Detours\n"
             "; Note: Can be used to fix issues related to new features added by cnc-ddraw such as windowed mode or stretching\n"
-            "hotpatch=false\n"
+            "hook=1\n"
             "\n"
             "; Workaround for battle.net on Diablo and Warcraft 2 BNE\n"
             "bnetHack=true\n"
@@ -323,6 +327,9 @@ static void CreateSettingsIni()
             "[olwin]\n"
             "noactivateapp=true\n"
             "maxgameticks=60\n"
+            "hook=2\n"
+            "handlemouse=false\n"
+            "renderer=gdi\n"
             "\n"
             "; Dark Reign: The Future of War\n"
             "[DKReign]\n"
@@ -356,8 +363,9 @@ static void CreateSettingsIni()
             "maxfps=59\n"
             "accuratetimers=true\n"
             "\n"
-            "; Command & Conquer: Tiberian Sun\n"
+            "; Command & Conquer: Tiberian Sun / Command & Conquer: Red Alert 2\n"
             "[game]\n"
+            "checkfile=.\\blowfish.dll\n"
             "noactivateapp=true\n"
             "handlemouse=false\n"
             "maxfps=60\n"
@@ -398,6 +406,11 @@ static void CreateSettingsIni()
             "handlemouse=false\n"
             "maxfps=60\n"
             "\n"
+            "; Command & Conquer: Red Alert 2: Yuri's Revenge - XWIS\n"
+            "[Yuri's Revenge]\n"
+            "noactivateapp=true\n"
+            "handlemouse=false\n"
+            "maxfps=60\n"
             "; Diablo\n"
             "[Diablo]\n"
             "bnetHack=true\n"
@@ -416,7 +429,17 @@ static DWORD GetString(LPCSTR key, LPCSTR defaultValue, LPSTR outString, DWORD o
 {
     DWORD s = GetPrivateProfileStringA(ProcessFileName, key, "", outString, outSize, SettingsIniPath);
     if (s > 0)
-        return s;
+    {
+        char buf[MAX_PATH] = { 0 };
+
+        if (GetPrivateProfileStringA(ProcessFileName, "checkfile", "", buf, sizeof(buf), SettingsIniPath) > 0)
+        {
+            if (GetFileAttributes(buf) != INVALID_FILE_ATTRIBUTES)
+                return s;
+        }
+        else
+            return s;
+    }
 
     return GetPrivateProfileStringA("ddraw", key, defaultValue, outString, outSize, SettingsIniPath);
 }
diff --git a/src/surface.c b/src/surface.c
index e0e5932..749792f 100644
--- a/src/surface.c
+++ b/src/surface.c
@@ -34,11 +34,11 @@ void *pvBmpBits;
 
 HRESULT __stdcall ddraw_surface_QueryInterface(IDirectDrawSurfaceImpl *This, REFIID riid, void **obj)
 {
-    printf("DirectDrawSurface::QueryInterface(This=%p, riid=%08X, obj=%p) ???\n", This, (unsigned int)riid, obj);
+    printf("??? DirectDrawSurface::QueryInterface(This=%p, riid=%08X, obj=%p)\n", This, (unsigned int)riid, obj);
 
     if (riid && !IsEqualGUID(&IID_IDirectDrawSurface, riid))
     {
-        printf("  IID_IDirectDrawSurfaceX\n");
+        printf("  GUID = %08X\n", ((GUID *)riid)->Data1);
 
         IDirectDrawSurface_AddRef(This);
     }
@@ -95,14 +95,14 @@ ULONG __stdcall ddraw_surface_Release(IDirectDrawSurfaceImpl *This)
 
 HRESULT __stdcall ddraw_surface_AddAttachedSurface(IDirectDrawSurfaceImpl *This, LPDIRECTDRAWSURFACE lpDDSurface)
 {
-    printf("DirectDrawSurface::AddAttachedSurface(This=%p, lpDDSurface=%p) ???\n", This, lpDDSurface);
+    printf("??? DirectDrawSurface::AddAttachedSurface(This=%p, lpDDSurface=%p)\n", This, lpDDSurface);
     IDirectDrawSurface_AddRef(lpDDSurface);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_surface_AddOverlayDirtyRect(IDirectDrawSurfaceImpl *This, LPRECT a)
 {
-    printf("DirectDrawSurface::AddOverlayDirtyRect(This=%p, ...) ???\n", This);
+    printf("??? DirectDrawSurface::AddOverlayDirtyRect(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -213,8 +213,18 @@ HRESULT __stdcall ddraw_surface_Blt(IDirectDrawSurfaceImpl *This, LPRECT lpDestR
 
     if (Source)
     {
-        if (dwFlags & DDBLT_KEYSRC)
+        if ((dwFlags & DDBLT_KEYSRC) || (dwFlags & DDBLT_KEYSRCOVERRIDE))
         {
+            DDCOLORKEY colorKey;
+
+            colorKey.dwColorSpaceLowValue =
+                (dwFlags & DDBLT_KEYSRCOVERRIDE) ? 
+                    lpDDBltFx->ddckSrcColorkey.dwColorSpaceLowValue : Source->colorKey.dwColorSpaceLowValue;
+
+            colorKey.dwColorSpaceHighValue =
+                (dwFlags & DDBLT_KEYSRCOVERRIDE) ?
+                    lpDDBltFx->ddckSrcColorkey.dwColorSpaceHighValue : Source->colorKey.dwColorSpaceHighValue;
+            
             if (!isStretchBlt)
             {
                 int width = dst_w > src_w ? src_w : dst_w;
@@ -232,7 +242,7 @@ HRESULT __stdcall ddraw_surface_Blt(IDirectDrawSurfaceImpl *This, LPRECT lpDestR
                         {
                             unsigned char c = ((unsigned char *)Source->surface)[x1 + src_x + ysrc];
 
-                            if (c != Source->colorKey.dwColorSpaceLowValue)
+                            if (c < colorKey.dwColorSpaceLowValue || c > colorKey.dwColorSpaceHighValue)
                             {
                                 ((unsigned char *)This->surface)[x1 + dst_x + ydst] = c;
                             }
@@ -251,7 +261,7 @@ HRESULT __stdcall ddraw_surface_Blt(IDirectDrawSurfaceImpl *This, LPRECT lpDestR
                         {
                             unsigned short c = ((unsigned short *)Source->surface)[x1 + src_x + ysrc];
 
-                            if (c != Source->colorKey.dwColorSpaceLowValue)
+                            if (c < colorKey.dwColorSpaceLowValue || c > colorKey.dwColorSpaceHighValue)
                             {
                                 ((unsigned short *)This->surface)[x1 + dst_x + ydst] = c;
                             }
@@ -261,7 +271,7 @@ HRESULT __stdcall ddraw_surface_Blt(IDirectDrawSurfaceImpl *This, LPRECT lpDestR
             }
             else
             {
-                printf("   DDBLT_KEYSRC does not support stretching");
+                printf("   DDBLT_KEYSRC / DDBLT_KEYSRCOVERRIDE does not support stretching");
             }
         }
         else
@@ -478,7 +488,7 @@ HRESULT __stdcall ddraw_surface_Blt(IDirectDrawSurfaceImpl *This, LPRECT lpDestR
 
 HRESULT __stdcall ddraw_surface_BltBatch(IDirectDrawSurfaceImpl *This, LPDDBLTBATCH a, DWORD b, DWORD c)
 {
-    printf("IDirectDrawSurface::BltBatch(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawSurface::BltBatch(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -548,7 +558,7 @@ HRESULT __stdcall ddraw_surface_BltFast(IDirectDrawSurfaceImpl *This, DWORD dst_
                     {
                         unsigned char c = ((unsigned char *)Source->surface)[x1 + src_x + ysrc];
 
-                        if (c != Source->colorKey.dwColorSpaceLowValue)
+                        if (c < Source->colorKey.dwColorSpaceLowValue || c > Source->colorKey.dwColorSpaceHighValue)
                         {
                             ((unsigned char *)This->surface)[x1 + dst_x + ydst] = c;
                         }
@@ -567,7 +577,7 @@ HRESULT __stdcall ddraw_surface_BltFast(IDirectDrawSurfaceImpl *This, DWORD dst_
                     {
                         unsigned short c = ((unsigned short *)Source->surface)[x1 + src_x + ysrc];
                         
-                        if (c != Source->colorKey.dwColorSpaceLowValue)
+                        if (c < Source->colorKey.dwColorSpaceLowValue || c > Source->colorKey.dwColorSpaceHighValue)
                         {
                             ((unsigned short *)This->surface)[x1 + dst_x + ydst] = c;
                         }
@@ -667,7 +677,7 @@ HRESULT __stdcall ddraw_surface_EnumAttachedSurfaces(IDirectDrawSurfaceImpl *Thi
 
 HRESULT __stdcall ddraw_surface_EnumOverlayZOrders(IDirectDrawSurfaceImpl *This, DWORD a, LPVOID b, LPDDENUMSURFACESCALLBACK c)
 {
-    printf("IDirectDrawSurface::EnumOverlayZOrders(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawSurface::EnumOverlayZOrders(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -743,7 +753,7 @@ HRESULT __stdcall ddraw_surface_Flip(IDirectDrawSurfaceImpl *This, LPDIRECTDRAWS
 
 HRESULT __stdcall ddraw_surface_GetAttachedSurface(IDirectDrawSurfaceImpl *This, LPDDSCAPS lpDdsCaps, LPDIRECTDRAWSURFACE FAR *surface)
 {
-    printf("IDirectDrawSurface::GetAttachedSurface(This=%p, dwCaps=%08X, surface=%p) ???\n", This, lpDdsCaps->dwCaps, surface);
+    printf("??? IDirectDrawSurface::GetAttachedSurface(This=%p, dwCaps=%08X, surface=%p)\n", This, lpDdsCaps->dwCaps, surface);
     
     if ((This->caps & DDSCAPS_PRIMARYSURFACE) && (This->caps & DDSCAPS_FLIP) && (lpDdsCaps->dwCaps & DDSCAPS_BACKBUFFER))
     {
@@ -757,7 +767,7 @@ HRESULT __stdcall ddraw_surface_GetAttachedSurface(IDirectDrawSurfaceImpl *This,
 HRESULT __stdcall ddraw_surface_GetBltStatus(IDirectDrawSurfaceImpl *This, DWORD a)
 {
 #if _DEBUG_X
-    printf("IDirectDrawSurface::GetBltStatus(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawSurface::GetBltStatus(This=%p, ...)\n", This);
 #endif
     return DD_OK;
 }
@@ -772,7 +782,7 @@ HRESULT __stdcall ddraw_surface_GetCaps(IDirectDrawSurfaceImpl *This, LPDDSCAPS
 HRESULT __stdcall ddraw_surface_GetClipper(IDirectDrawSurfaceImpl *This, LPDIRECTDRAWCLIPPER FAR *a)
 {
 #if _DEBUG_X
-    printf("IDirectDrawSurface::GetClipper(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawSurface::GetClipper(This=%p, ...)\n", This);
 #endif
     return DD_OK;
 }
@@ -780,7 +790,7 @@ HRESULT __stdcall ddraw_surface_GetClipper(IDirectDrawSurfaceImpl *This, LPDIREC
 HRESULT __stdcall ddraw_surface_GetColorKey(IDirectDrawSurfaceImpl *This, DWORD flags, LPDDCOLORKEY colorKey)
 {
 #if _DEBUG_X
-    printf("DirectDrawSurface::GetColorKey(This=%p, flags=0x%08X, colorKey=%p) ???\n", This, flags, colorKey);
+    printf("??? DirectDrawSurface::GetColorKey(This=%p, flags=0x%08X, colorKey=%p)\n", This, flags, colorKey);
 #endif
 
     if (colorKey)
@@ -799,7 +809,7 @@ HRESULT __stdcall ddraw_surface_GetDC(IDirectDrawSurfaceImpl *This, HDC FAR *a)
 #endif
     if (This->width % 4)
     {
-        printf("   GetDC: width=%d height=%d ???\n", This->width, This->height);
+        printf("   GetDC: width=%d height=%d\n", This->width, This->height);
     }
 
     RGBQUAD *data = 
@@ -817,14 +827,14 @@ HRESULT __stdcall ddraw_surface_GetDC(IDirectDrawSurfaceImpl *This, HDC FAR *a)
 HRESULT __stdcall ddraw_surface_GetFlipStatus(IDirectDrawSurfaceImpl *This, DWORD a)
 {
 #if _DEBUG_X
-    printf("IDirectDrawSurface::GetFlipStatus(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawSurface::GetFlipStatus(This=%p, ...)\n", This);
 #endif
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_surface_GetOverlayPosition(IDirectDrawSurfaceImpl *This, LPLONG a, LPLONG b)
 {
-    printf("IDirectDrawSurface::GetOverlayPosition(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawSurface::GetOverlayPosition(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -868,7 +878,7 @@ HRESULT __stdcall ddraw_surface_GetPixelFormat(IDirectDrawSurfaceImpl *This, LPD
 
 HRESULT __stdcall ddraw_surface_Initialize(IDirectDrawSurfaceImpl *This, LPDIRECTDRAW a, LPDDSURFACEDESC b)
 {
-    printf("IDirectDrawSurface::Initialize(This=%p, ...) ???\n", This);
+    printf("??? IDirectDrawSurface::Initialize(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -921,21 +931,21 @@ HRESULT __stdcall ddraw_surface_ReleaseDC(IDirectDrawSurfaceImpl *This, HDC a)
 HRESULT __stdcall ddraw_surface_Restore(IDirectDrawSurfaceImpl *This)
 {
 #if _DEBUG_X
-    printf("DirectDrawSurface::Restore(This=%p) ???\n", This);
+    printf("??? DirectDrawSurface::Restore(This=%p)\n", This);
 #endif
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_surface_SetClipper(IDirectDrawSurfaceImpl *This, LPDIRECTDRAWCLIPPER a)
 {
-    printf("DirectDrawSurface::SetClipper(This=%p, ...) ???\n", This);
+    printf("??? DirectDrawSurface::SetClipper(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_surface_SetColorKey(IDirectDrawSurfaceImpl *This, DWORD flags, LPDDCOLORKEY colorKey)
 {
 #if _DEBUG_X
-    printf("DirectDrawSurface::SetColorKey(This=%p, flags=0x%08X, colorKey=%p) ???\n", This, flags, colorKey);
+    printf("??? DirectDrawSurface::SetColorKey(This=%p, flags=0x%08X, colorKey=%p)\n", This, flags, colorKey);
 
     if (colorKey)
     {
@@ -955,7 +965,7 @@ HRESULT __stdcall ddraw_surface_SetColorKey(IDirectDrawSurfaceImpl *This, DWORD
 
 HRESULT __stdcall ddraw_surface_SetOverlayPosition(IDirectDrawSurfaceImpl *This, LONG a, LONG b)
 {
-    printf("DirectDrawSurface::SetOverlayPosition(This=%p, ...) ???\n", This);
+    printf("??? DirectDrawSurface::SetOverlayPosition(This=%p, ...)\n", This);
     return DD_OK;
 }
 
@@ -1060,19 +1070,19 @@ HRESULT __stdcall ddraw_surface_Unlock(IDirectDrawSurfaceImpl *This, LPVOID lpRe
 
 HRESULT __stdcall ddraw_surface_UpdateOverlay(IDirectDrawSurfaceImpl *This, LPRECT a, LPDIRECTDRAWSURFACE b, LPRECT c, DWORD d, LPDDOVERLAYFX e)
 {
-    printf("DirectDrawSurface::UpdateOverlay(This=%p, ...) ???\n", This);
+    printf("??? DirectDrawSurface::UpdateOverlay(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_surface_UpdateOverlayDisplay(IDirectDrawSurfaceImpl *This, DWORD a)
 {
-    printf("DirectDrawSurface::UpdateOverlayDisplay(This=%p, ...) ???\n", This);
+    printf("??? DirectDrawSurface::UpdateOverlayDisplay(This=%p, ...)\n", This);
     return DD_OK;
 }
 
 HRESULT __stdcall ddraw_surface_UpdateOverlayZOrder(IDirectDrawSurfaceImpl *This, DWORD a, LPDIRECTDRAWSURFACE b)
 {
-    printf("DirectDrawSurface::UpdateOverlayZOrder(This=%p, ...) ???\n", This);
+    printf("??? DirectDrawSurface::UpdateOverlayZOrder(This=%p, ...)\n", This);
     return DD_OK;
 }