diff --git a/README.md b/README.md index cab55b4f76..4df7514a90 100644 --- a/README.md +++ b/README.md @@ -1,316 +1,52 @@ -Introduction ------------- - -**Proton** is a tool for use with the Steam client which allows games which are -exclusive to Windows to run on the Linux operating system. It uses Wine to -facilitate this. - -**Most users should use Proton provided by the Steam Client itself.** See -[this Steam Community post][steam-play-introduction] for more details. - -The source code is provided to enable advanced users the ability to alter -Proton. For example, some users may wish to use a different version of Wine -with a particular title. - -**The changelog** is available on [our wiki][changelog]. - -[steam-play-introduction]: https://steamcommunity.com/games/221410/announcements/detail/1696055855739350561 -[changelog]: https://github.com/ValveSoftware/Proton/wiki/Changelog - - -Obtaining Proton sources ------------------------- - -Acquire Proton's source by cloning -and checking out the branch you desire. - -You can clone the latest Proton to your system with this command: - -```bash -git clone --recurse-submodules https://github.com/ValveSoftware/Proton.git proton -``` - -Be sure to update submodules when switching between branches: - -```bash -git checkout experimental_6.3 -git submodule update --init --recursive -``` - -If you want to change any subcomponent, now is the time to do so. For -example, if you wish to make changes to Wine, you would apply them to the -`wine/` directory. - - -Building Proton ---------------- - -Most of Proton builds inside the Proton SDK container with very few -dependencies on the host side. - -## Preparing the build environment - -You need either a Docker or a Podman setup. We highly recommend [the rootless -Podman setup][rootless-podman]. Please refer to your distribution's -documentation for setup instructions (e.g. Arch [Podman][arch-podman] / -[Docker][arch-docker], Debian [Podman][debian-podman] / -[Docker][debian-docker]). - -[rootless-podman]: https://github.com/containers/podman/blob/main/docs/tutorials/rootless_tutorial.md -[arch-podman]: https://wiki.archlinux.org/title/Podman -[arch-docker]: https://wiki.archlinux.org/title/Docker -[debian-podman]: https://wiki.debian.org/Podman -[debian-docker]: https://wiki.debian.org/Docker - - -## The Easy Way - -We provide a top-level Makefile which will execute most of the build commands -for you. - -After checking out the repository and updating its submodules, assuming that -you have a working Docker or Podman setup, you can build and install Proton -with a simple: - -```bash -make install -``` - -If your build system is missing dependencies, it will fail quickly with a clear -error message. - -After the build finishes, you may need to restart the Steam client to see the -new Proton tool. The tool's name in the Steam client will be based on the -currently checked out branch of Proton. You can override this name using the -`build_name` variable. - -See `make help` for other build targets and options. - - - -## Manual building - -### Configuring the build - -```bash -mkdir ../build && cd ../build -../proton/configure.sh --enable-ccache --build-name=my_build -``` - -Running `configure.sh` will create a `Makefile` allowing you to build Proton. -The scripts checks if containers are functional and prompt you if any -host-side dependencies are missing. You should run the command from a -directory created specifically for your build. +Disclaimer +---------- -The configuration script tries to discover a working Docker or Podman setup -to use, but you can force a compatible engine with -`--container-engine=`. +For basic info about Proton, usage and compilation read [Valve's README](https://github.com/ValveSoftware/Proton/blob/bleeding-edge/README.md#install-proton-locally) -You can enable ccache with `--enable-cache` flag. This will mount your -`$CCACHE_DIR` or `$HOME/.ccache` inside the container. +Make sure to apply patches before compiling by running -`--proton-sdk-image=registry.gitlab.steamos.cloud/proton/soldier/sdk:` -can be used to build with a custom version of the Proton SDK images. +`./patches/apply-patches.sh` -Check `--help` for other configuration options. -NOTE: If **SELinux** is in use, the Proton build container may fail to access -your user's files. This is caused by [SELinux's filesystem -labels][selinux-labels]. You may pass the `--relabel-volumes` switch to -configure to cause the [container engine to relabel its -bind-mounts][bind-mounts] and allow access to those files from within the -container. This can be dangerous when used with system directories. Proceed -with caution and refer your container engine's manual. - -[selinux-labels]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/security-enhanced_linux/sect-security-enhanced_linux-working_with_selinux-selinux_contexts_labeling_files -[bind-mounts]: https://docs.docker.com/storage/bind-mounts/ - - -### Building - -``` -make -``` - -**Important make targets:** - -`make install` - install Proton into your user's Steam directory, see the [install Proton -locally](#install-proton-locally) section for details. - -`make redist` - create a redistribute build (`redist/`) that can be copied to -`~/.steam/root/compatibilitytools.d/`. - -`make deploy` - create a deployment build (`deploy/`). This is what we use to -deploy Proton to Steam users via Steamworks. - -`make module= module` - build both 32- and 64-bit versions of the -specified wine module. This allows rapid iteration on one module. This target -is only useful after building Proton. - -`make dxvk` / `make vkd3d-proton` - rebuild DXVK / vkd3d-proton. - - -### Debug Builds - -To prevent symbol stripping add `UNSTRIPPED_BUILD=1` to the `make` -invocation. This should be used only with a clean build directory. - -E.g.: - -``` -mkdir ../debug-proton-build && cd ../debug-proton-build -../proton/configure.sh --enable-ccache --build-name=debug_build -make UNSTRIPPED_BUILD=1 install -``` - - -Install Proton locally ----------------------- - -Steam ships with several versions of Proton, which games will use by default or -that you can select in Steam Settings' Steam Play page. Steam also supports -running games with local builds of Proton, which you can install on your -machine. - -To install a local build of Proton into Steam, make a new directory in -`~/.steam/root/compatibilitytools.d/` with a tool name of your choosing and -place the directory containing your redistributable build under that path. - -The `make install` target will perform this task for you, installing the -Proton build into the Steam folder for the current user. You will have to -restart the Steam client for it to pick up on a new tool. - -A correct local tool installation should look similar to this: - -``` -compatibilitytools.d/my_proton/ -├── compatibilitytool.vdf -├── filelock.py -├── LICENSE -├── proton -├── proton_dist.tar -├── toolmanifest.vdf -├── user_settings.sample.py -└── version -``` - -To enable your local build in Steam, go to the Steam Play section of the -Settings window. If the build was correctly installed, you should see -"proton-localbuild" in the drop-down list of compatibility tools. - -Each component of this software is used under the terms of their licenses. -See the `LICENSE` files here, as well as the `LICENSE`, `COPYING`, etc files -in each submodule and directory for details. If you distribute a built -version of Proton to other users, you must adhere to the terms of these -licenses. - - -Debugging ---------- - -Proton builds have their symbols stripped by default. You can switch to -"debug" beta branch in Steam (search for Proton in your library, -Properties... -> BETAS -> select "debug") or build without stripping (see -[Debug Builds section](#debug-builds)). - -The symbols are provided through the accompanying `.debug` files which may -need to be explicitly loaded by the debugging tools. For GDB there's a helper -script `wine/tools/gdbinit.py` (source it) that provides `load-symbol-files` -(or `lsf` for short) command which loads the symbols for all the mapped files. - -For tips on debugging see [docs/DEBUGGING.md](docs/DEBUGGING.md). +Introduction +------------ +**Proton-LFX2** is a fork of Proton that merged [LatencyFlex2](https://github.com/ishitatsuyuki/LatencyFleX2), a library who's goal is to lower input latency by hooking into Nvidia's Reflex. It also tried to streamline the process. -`compile_commands.json` ------------------------ +Speaking of streamline - Proton-LFX2 also attempts to bypass [Streamline](https://github.com/NVIDIAGameWorks/Streamline/) to allow the usage of upscalers like CyberFSR or CyberXESS in games that are not supported on Windows by those mods. The usage of those upscalers is also made easier in Proton-LFX2. -For use with [clangd](https://clangd.llvm.org/) LSP server and similar tooling. -Projects built using cmake or meson (e.g. vkd3d-proton) automatically come with -`compile_commands.json`. For autotools (e.g. wine) you have to [configure the -build](#configuring-the-build) with `--enable-bear` that uses -[bear](https://github.com/rizsotto/Bear) to create the compilation database. -It's not on by default as it make the build slightly slower. +LatencyFlex 2 +------------ -The build system collects all the created compile_commands.json files in a -build subdirectory named `compile_commands/`. +Main reason why this fork was created. All you need to do is to add -The paths are translated to point to the real source (i.e. not the rsynced -copy). It still may depend on build directory for things like auto-generated -`config.h` though and for wine it may be beneficial to run `tools/make_requests` -in you source directories as those changes are not committed. +`LFX2=1` -You can then configure your editor to use that file for clangd in a few ways: +to your environment variables. +Everything else is taken care off **except** `DXVK_NVAPI_USE_LATENCY_MARKERS`. Usually you won't have to care about that but it's sometimes needed. Refer to the [documentation](https://github.com/FakeMichau/latencyflex2/blob/master/docs/shim/installing.md#environment-variables) to read more. +In case of issues you still can enable LatencyFlex 2 manually, without using the `LFX2` environment variable. -1) directly - some editors/plugins allow you to specify the path to `compile_commands.json` -2) via `.clangd` file, e.g. -```bash -cd src/proton/wine/ -cat > .clangd < +That will also effectively disable LFX2 because currently it causes a crash. But you can still use `LFX2=1` to spoof an Nvidia GPU instead of doing that manually. +So for AMDGPUs you want both. +Also remember about applying the registry signature override file. \ No newline at end of file diff --git a/dxvk-nvapi b/dxvk-nvapi index da3bccb2f3..b64831c4ff 160000 --- a/dxvk-nvapi +++ b/dxvk-nvapi @@ -1 +1 @@ -Subproject commit da3bccb2f33af25064782382d462624c32064adc +Subproject commit b64831c4fff7a9bc259dff47cee5f94b7680f55a diff --git a/lfx2-bin/latencyflex2_rust.dll b/lfx2-bin/latencyflex2_rust.dll new file mode 100755 index 0000000000..884934b760 Binary files /dev/null and b/lfx2-bin/latencyflex2_rust.dll differ diff --git a/lfx2-bin/latencyflex2_rust_32.dll b/lfx2-bin/latencyflex2_rust_32.dll new file mode 100755 index 0000000000..53137d13a2 Binary files /dev/null and b/lfx2-bin/latencyflex2_rust_32.dll differ diff --git a/patches/apply-patches.sh b/patches/apply-patches.sh new file mode 100755 index 0000000000..4fe19515f0 --- /dev/null +++ b/patches/apply-patches.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +git checkout -- proton +echo "Proton: Add patches" +patch -Np1 < ./patches/proton.patch +patch -Np1 < ./patches/makefile.patch + +pushd dxvk || exit +git reset --hard HEAD +git clean -xdf +echo "DXVK: Add LFX2 support" +patch -Np1 < ../patches/lfx2-dxvk.patch +popd || exit + +pushd vkd3d-proton || exit +git reset --hard HEAD +git clean -xdf +echo "VKD3D-Proton: Add LFX2 support" +patch -Np1 < ../patches/lfx2-vkd3d.patch +popd || exit + +pushd wine || exit +git reset --hard HEAD +git clean -xdf +echo "Wine: Add HAGS spoof" +patch -Np1 < ../patches/wine-hags-spoof.patch +popd || exit + +pushd dxvk-nvapi || exit +git reset --hard HEAD +git clean -xdf +echo "DXVK-NVAPI: Add LFX2 support" +patch -Np1 < ../patches/lfx2-nvapi.patch +popd || exit diff --git a/patches/lfx2-dxvk.patch b/patches/lfx2-dxvk.patch new file mode 100644 index 0000000000..fa9f5b663c --- /dev/null +++ b/patches/lfx2-dxvk.patch @@ -0,0 +1,1171 @@ +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +new file mode 100644 +index 00000000..bda252c5 +--- /dev/null ++++ b/include/latencyflex2.h +@@ -0,0 +1,168 @@ ++#ifndef LATENCYFLEX2_H ++#define LATENCYFLEX2_H ++ ++#define LFX2_VK ++ ++#include ++#include ++#include ++#include ++#include ++#ifdef LFX2_VK ++#include ++#endif ++ ++#ifdef LFX2_DX12 ++#include ++#endif ++ ++#ifdef _WIN32 ++#define LFX2_API __declspec(dllimport) ++#else ++#define LFX2_API ++#endif ++ ++typedef enum lfx2MarkType { ++ lfx2MarkTypeBegin, ++ lfx2MarkTypeEnd, ++} lfx2MarkType; ++ ++typedef struct lfx2Context lfx2Context; ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++typedef struct lfx2Dx12Context lfx2Dx12Context; ++#endif ++ ++/** ++ * A write handle for frame markers. ++ */ ++typedef struct lfx2Frame lfx2Frame; ++ ++typedef struct lfx2ImplicitContext lfx2ImplicitContext; ++ ++#if defined(LFX2_VK) ++typedef struct lfx2VulkanContext lfx2VulkanContext; ++#endif ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++typedef struct lfx2Dx12SubmitAux { ++ ID3D12GraphicsCommandList* execute_before; ++ ID3D12GraphicsCommandList* execute_after; ++ ID3D12Fence* signal_fence; ++ uint64_t signal_fence_value; ++} lfx2Dx12SubmitAux; ++#endif ++ ++typedef uint64_t lfx2Timestamp; ++typedef uint64_t lfx2Interval; ++ ++typedef uint32_t lfx2SectionId; ++ ++#if defined(LFX2_VK) ++typedef struct lfx2VulkanSubmitAux { ++ VkCommandBuffer submit_before; ++ VkCommandBuffer submit_after; ++ VkSemaphore signal_sem; ++ uint64_t signal_sem_value; ++} lfx2VulkanSubmitAux; ++#endif ++ ++#ifdef __cplusplus ++extern "C" { ++#endif // __cplusplus ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++LFX2_API struct lfx2Dx12Context *lfx2Dx12ContextCreate(ID3D12Device* device); ++ ++LFX2_API void lfx2Dx12ContextAddRef(struct lfx2Dx12Context *context); ++ ++LFX2_API void lfx2Dx12ContextRelease(struct lfx2Dx12Context *context); ++ ++LFX2_API ++struct lfx2Dx12SubmitAux lfx2Dx12ContextBeforeSubmit(struct lfx2Dx12Context *context, ++ ID3D12CommandQueue* queue); ++ ++LFX2_API void lfx2Dx12ContextBeginFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++ ++LFX2_API void lfx2Dx12ContextEndFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++#endif ++ ++LFX2_API lfx2Timestamp lfx2TimestampNow(void); ++ ++#if defined(_WIN32) ++LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); ++#endif ++ ++LFX2_API void lfx2SleepUntil(lfx2Timestamp target); ++ ++LFX2_API struct lfx2Context *lfx2ContextCreate(void); ++ ++LFX2_API void lfx2ContextAddRef(struct lfx2Context *context); ++ ++LFX2_API void lfx2ContextRelease(struct lfx2Context *context); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameCreate(struct lfx2Context *context, ++ lfx2Timestamp *out_timestamp); ++ ++LFX2_API void lfx2FrameAddRef(struct lfx2Frame *frame); ++ ++LFX2_API void lfx2FrameRelease(struct lfx2Frame *frame); ++ ++LFX2_API ++void lfx2MarkSection(struct lfx2Frame *frame, ++ lfx2SectionId section_id, ++ enum lfx2MarkType mark_type, ++ lfx2Timestamp timestamp); ++ ++LFX2_API ++void lfx2FrameOverrideQueuingDelay(struct lfx2Frame *frame, ++ lfx2SectionId section_id, ++ lfx2Interval queueing_delay); ++ ++LFX2_API ++void lfx2FrameOverrideInverseThroughput(struct lfx2Frame *frame, ++ lfx2SectionId section_id, ++ lfx2Interval inverse_throughput); ++ ++LFX2_API struct lfx2ImplicitContext *lfx2ImplicitContextCreate(void); ++ ++LFX2_API void lfx2ImplicitContextRelease(struct lfx2ImplicitContext *context); ++ ++LFX2_API void lfx2ImplicitContextReset(struct lfx2ImplicitContext *context); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameCreateImplicit(struct lfx2ImplicitContext *context, ++ lfx2Timestamp *out_timestamp); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameDequeueImplicit(struct lfx2ImplicitContext *context, ++ bool critical); ++ ++#if defined(LFX2_VK) ++LFX2_API ++struct lfx2VulkanContext *lfx2VulkanContextCreate(PFN_vkGetInstanceProcAddr gipa, ++ VkInstance instance, ++ VkPhysicalDevice physical_device, ++ VkDevice device, ++ uint32_t queue_family_index); ++ ++LFX2_API void lfx2VulkanContextAddRef(struct lfx2VulkanContext *context); ++ ++LFX2_API void lfx2VulkanContextRelease(struct lfx2VulkanContext *context); ++ ++LFX2_API ++struct lfx2VulkanSubmitAux lfx2VulkanContextBeforeSubmit(struct lfx2VulkanContext *context); ++ ++LFX2_API ++void lfx2VulkanContextBeginFrame(struct lfx2VulkanContext *context, ++ struct lfx2Frame *frame); ++ ++LFX2_API void lfx2VulkanContextEndFrame(struct lfx2VulkanContext *context, struct lfx2Frame *frame); ++#endif ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif // __cplusplus ++ ++#endif /* LATENCYFLEX2_H */ +diff --git a/include/native/directx b/include/native/directx +--- a/include/native/directx ++++ b/include/native/directx +@@ -1 +1 @@ +-Subproject commit 9df86f2341616ef1888ae59919feaa6d4fad693d ++Subproject commit 9df86f2341616ef1888ae59919feaa6d4fad693d-dirty +diff --git a/include/spirv b/include/spirv +index 8b246ff7..ff2afc3a 160000 +--- a/include/spirv ++++ b/include/spirv +@@ -1 +1 @@ +-Subproject commit 8b246ff75c6615ba4532fe4fde20f1be090c3764 ++Subproject commit ff2afc3afc48dff4eec2a10f0212402a80708e38-dirty +diff --git a/include/vulkan b/include/vulkan +index 46dc0f6e..192d051d 160000 +--- a/include/vulkan ++++ b/include/vulkan +@@ -1 +1 @@ +-Subproject commit 46dc0f6e514f5730784bb2cac2a7c731636839e8 ++Subproject commit 192d051db3382e213f8bd9d8048fc9eaa78ed6ab-dirty +diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp +index c89e0bc0..6512656a 100644 +--- a/src/d3d11/d3d11_context_ext.cpp ++++ b/src/d3d11/d3d11_context_ext.cpp +@@ -215,7 +215,6 @@ namespace dxvk { + return true; + } + +- + template class D3D11DeviceContextExt; + template class D3D11DeviceContextExt; + +diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp +index aa379d41..fe338fa5 100644 +--- a/src/d3d11/d3d11_context_imm.cpp ++++ b/src/d3d11/d3d11_context_imm.cpp +@@ -844,6 +844,7 @@ namespace dxvk { + + EmitCs([] (DxvkContext* ctx) { + ctx->endFrame(); ++ ctx->endLfx2FrameImplicit(); + }); + } + +@@ -892,6 +893,7 @@ namespace dxvk { + + + void D3D11ImmediateContext::EmitCsChunk(DxvkCsChunkRef&& chunk) { ++ chunk->finalize(); + m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); + } + +diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h +index 3e683250..f5b105e8 100644 +--- a/src/d3d11/d3d11_context_imm.h ++++ b/src/d3d11/d3d11_context_imm.h +@@ -18,6 +18,7 @@ namespace dxvk { + friend class D3D11SwapChain; + friend class D3D11VideoContext; + friend class D3D11DXGIKeyedMutex; ++ friend class D3D11DeviceLfx2Ext; + public: + + D3D11ImmediateContext( +@@ -182,6 +183,8 @@ namespace dxvk { + HANDLE hEvent, + BOOL Synchronize); + ++ private: ++ Lfx2Frame m_implicitLfx2Frame {}; + }; + + } +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index 863a9a31..c3bbf223 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -2469,6 +2469,9 @@ namespace dxvk { + return deviceFeatures.nvxBinaryImport + && deviceFeatures.vk12.bufferDeviceAddress; + ++ case D3D11_VK_LATENCYFLEX2: ++ return true; // TODO ++ + default: + return false; + } +@@ -2781,10 +2784,6 @@ namespace dxvk { + return static_cast(got->second); + } + +- +- +- +- + D3D11VideoDevice::D3D11VideoDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice) +@@ -3084,6 +3083,7 @@ namespace dxvk { + m_dxvkDevice (pDxvkDevice), + m_d3d11Device (this, FeatureLevel, FeatureFlags), + m_d3d11DeviceExt(this, &m_d3d11Device), ++ m_d3d11DeviceLfx2Ext(this, m_d3d11Device.m_context.ptr()), + m_d3d11Interop (this, &m_d3d11Device), + m_d3d11Video (this, &m_d3d11Device), + m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), +@@ -3142,6 +3142,11 @@ namespace dxvk { + *ppvObject = ref(&m_d3d11DeviceExt); + return S_OK; + } ++ ++ if (riid == __uuidof(ID3DLfx2ExtDevice)) { ++ *ppvObject = ref(&m_d3d11DeviceLfx2Ext); ++ return S_OK; ++ } + + if (riid == __uuidof(IDXGIDXVKDevice)) { + *ppvObject = ref(&m_metaDevice); +@@ -3429,4 +3434,42 @@ namespace dxvk { + return m_dxvkDevice; + } + ++ D3D11DeviceLfx2Ext::D3D11DeviceLfx2Ext( ++ D3D11DXGIDevice* pContainer, ++ D3D11ImmediateContext* pImmediateContext) ++ : m_container(pContainer), m_immediateContext(pImmediateContext), m_dxvkDevice(pContainer->GetDXVKDevice()) { ++ ++ } ++ ++ HRESULT STDMETHODCALLTYPE D3D11DeviceLfx2Ext::QueryInterface(const IID &riid, void **ppvObject) { ++ return m_container->QueryInterface(riid, ppvObject); ++ } ++ ++ ULONG STDMETHODCALLTYPE D3D11DeviceLfx2Ext::AddRef() { ++ return m_container->AddRef(); ++ } ++ ++ ULONG STDMETHODCALLTYPE D3D11DeviceLfx2Ext::Release() { ++ return m_container->Release(); ++ } ++ ++ void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::ImplicitBeginFrame(uint64_t *outTimestamp, void *outFrame) { ++ *(lfx2Frame **)outFrame = m_dxvkDevice->lfx2().FrameCreateImplicit(m_dxvkDevice->getImplicitLfx2Context()->context(), outTimestamp); ++ } ++ ++ void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::MarkRenderStart(void *frame) { ++ auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); ++ ++ m_immediateContext->EmitCs([cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { ++ ctx->beginLfx2Frame(frameWrapper); ++ }); ++ } ++ ++ void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::MarkRenderEnd(void *frame) { ++ auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); ++ ++ m_immediateContext->EmitCs([cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { ++ ctx->endLfx2Frame(); ++ }); ++ } + } +diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h +index 7a44b5ad..d0d6a285 100644 +--- a/src/d3d11/d3d11_device.h ++++ b/src/d3d11/d3d11_device.h +@@ -48,6 +48,7 @@ namespace dxvk { + * as part of a \ref D3D11DeviceContainer. + */ + class D3D11Device final : public ID3D11Device5 { ++ friend class D3D11DXGIDevice; + /// Maximum number of resource init commands per command buffer + constexpr static uint64_t InitCommandThreshold = 50; + public: +@@ -561,7 +562,7 @@ namespace dxvk { + const D3D11_SAMPLER_DESC* pSamplerDesc, + ID3D11SamplerState** ppSamplerState, + uint32_t* pDriverHandle); +- ++ + private: + + D3D11DXGIDevice* m_container; +@@ -696,6 +697,29 @@ namespace dxvk { + + }; + ++ class D3D11DeviceLfx2Ext : public ID3DLfx2ExtDevice { ++ public: ++ D3D11DeviceLfx2Ext(D3D11DXGIDevice *pContainer, ++ D3D11ImmediateContext *pImmediateContext); ++ ++ HRESULT STDMETHODCALLTYPE QueryInterface(const IID &riid, void **ppvObject); ++ ++ ULONG STDMETHODCALLTYPE AddRef(); ++ ++ ULONG STDMETHODCALLTYPE Release(); ++ ++ void STDMETHODCALLTYPE ImplicitBeginFrame(uint64_t *outTimestamp, void *outFrame); ++ ++ void STDMETHODCALLTYPE MarkRenderStart(void *frame); ++ ++ void STDMETHODCALLTYPE MarkRenderEnd(void *frame); ++ ++ private: ++ D3D11DXGIDevice *m_container; ++ D3D11ImmediateContext *m_immediateContext; ++ const Rc m_dxvkDevice; ++ }; ++ + + /** + * \brief DXVK swap chain factory +@@ -858,6 +882,7 @@ namespace dxvk { + + D3D11Device m_d3d11Device; + D3D11DeviceExt m_d3d11DeviceExt; ++ D3D11DeviceLfx2Ext m_d3d11DeviceLfx2Ext; + D3D11VkInterop m_d3d11Interop; + D3D11VideoDevice m_d3d11Video; + D3D11on12Device m_d3d11on12; +diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h +index 8a2e6fcf..35bbedf0 100644 +--- a/src/d3d11/d3d11_interfaces.h ++++ b/src/d3d11/d3d11_interfaces.h +@@ -16,6 +16,7 @@ enum D3D11_VK_EXTENSION : uint32_t { + D3D11_VK_EXT_BARRIER_CONTROL = 3, + D3D11_VK_NVX_BINARY_IMPORT = 4, + D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5, ++ D3D11_VK_LATENCYFLEX2 = 6, + }; + + +@@ -114,6 +115,17 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { + uint32_t* pCudaTextureHandle) = 0; + }; + ++MIDL_INTERFACE("851a9f0f-5da0-4850-b563-a7bbc414f4e6") ++ID3DLfx2ExtDevice : public IUnknown { ++ ++ virtual void STDMETHODCALLTYPE MarkRenderStart(void *frame) = 0; ++ ++ virtual void STDMETHODCALLTYPE MarkRenderEnd(void *frame) = 0; ++ ++ virtual void STDMETHODCALLTYPE ImplicitBeginFrame(uint64_t *outTimestamp, void *outFrame) = 0; ++ ++}; ++ + + /** + * \brief Extended D3D11 context +@@ -182,11 +194,11 @@ ID3D11VkExtContext1 : public ID3D11VkExtContext { + uint32_t numWriteResources) = 0; + }; + +- + #ifndef _MSC_VER + __CRT_UUID_DECL(ID3D11VkExtShader, 0xbb8a4fb9,0x3935,0x4762,0xb4,0x4b,0x35,0x18,0x9a,0x26,0x41,0x4a); + __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17); + __CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); ++__CRT_UUID_DECL(ID3DLfx2ExtDevice, 0x851a9f0f,0x5da0,0x4850,0xb5,0x63,0xa7,0xbb,0xc4,0x14,0xf4,0xe6); + __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); + __CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); + #endif +diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp +index 73939e2c..4bae8609 100644 +--- a/src/d3d11/d3d11_swapchain.cpp ++++ b/src/d3d11/d3d11_swapchain.cpp +@@ -450,6 +450,7 @@ namespace dxvk { + // Ensure that we can safely destroy the swap chain + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); ++ m_device->getImplicitLfx2Context()->reset(); + + m_presentStatus.result = VK_SUCCESS; + m_dirtyHdrMetadata = true; +diff --git a/src/d3d11/meson.build b/src/d3d11/meson.build +index 20bec082..d10ff8d4 100644 +--- a/src/d3d11/meson.build ++++ b/src/d3d11/meson.build +@@ -86,6 +86,7 @@ d3d11_dll = shared_library(dxvk_name_prefix+'d3d11', dxgi_common_src + d3d11_src + link_args : d3d11_ld_args, + link_depends : [ d3d11_link_depends ], + kwargs : dxvk_so_version, ++ cpp_pch : '../pch/d3d11_pch.h', + ) + + d3d11_dep = declare_dependency( +diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp +index e53c96a3..aa671a30 100644 +--- a/src/d3d9/d3d9_device.cpp ++++ b/src/d3d9/d3d9_device.cpp +@@ -5346,6 +5346,7 @@ namespace dxvk { + + + void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { ++ chunk->finalize(); + m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); + } + +diff --git a/src/d3d9/meson.build b/src/d3d9/meson.build +index bc3eac42..062d9645 100644 +--- a/src/d3d9/meson.build ++++ b/src/d3d9/meson.build +@@ -65,6 +65,7 @@ d3d9_dll = shared_library(dxvk_name_prefix+'d3d9', d3d9_src, glsl_generator.proc + link_args : d3d9_ld_args, + link_depends : [ d3d9_link_depends ], + kwargs : dxvk_so_version, ++ cpp_pch : '../pch/d3d9_pch.h', + ) + + d3d9_dep = declare_dependency( +diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp +index 3bd3aa95..a38fea40 100644 +--- a/src/dxvk/dxvk_cmdlist.cpp ++++ b/src/dxvk/dxvk_cmdlist.cpp +@@ -272,6 +272,9 @@ namespace dxvk { + 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); + } + ++ if (isFirst && m_lfx2Aux.submit_before) ++ m_commandSubmission.executeCommandBuffer(m_lfx2Aux.submit_before); ++ + // Submit graphics commands + if (cmd.usedFlags.test(DxvkCmdBuffer::InitBuffer)) + m_commandSubmission.executeCommandBuffer(cmd.initBuffer); +@@ -296,6 +299,12 @@ namespace dxvk { + m_commandSubmission.signalFence(m_fence); + } + ++ if (isLast && m_lfx2Aux.submit_after) ++ m_commandSubmission.executeCommandBuffer(m_lfx2Aux.submit_after); ++ if (isLast && m_lfx2Aux.signal_sem) ++ m_commandSubmission.signalSemaphore(m_lfx2Aux.signal_sem, m_lfx2Aux.signal_sem_value, ++ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); ++ + // Finally, submit all graphics commands of the current submission + if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) + return status; +@@ -364,6 +373,8 @@ namespace dxvk { + + + void DxvkCommandList::reset() { ++ m_lfx2Aux = {}; ++ + // Free resources and other objects + // that are no longer in use + m_resources.reset(); +diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h +index 1934b9a0..cc3b581e 100644 +--- a/src/dxvk/dxvk_cmdlist.h ++++ b/src/dxvk/dxvk_cmdlist.h +@@ -1,6 +1,7 @@ + #pragma once + + #include ++#include + + #include "dxvk_bind_mask.h" + #include "dxvk_buffer.h" +@@ -17,6 +18,7 @@ + #include "dxvk_sparse.h" + #include "dxvk_staging.h" + #include "dxvk_stats.h" ++#include "dxvk_lfx2.h" + + namespace dxvk { + +@@ -1026,6 +1028,10 @@ namespace dxvk { + m_descriptorPools.push_back({ pool, manager }); + } + ++ void setLfx2Aux(lfx2VulkanSubmitAux aux) { ++ m_lfx2Aux = aux; ++ } ++ + private: + + DxvkDevice* m_device; +@@ -1065,6 +1071,8 @@ namespace dxvk { + + std::vector m_pipelines; + ++ lfx2VulkanSubmitAux m_lfx2Aux = {}; ++ + VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer) const { + if (cmdBuffer == DxvkCmdBuffer::ExecBuffer) return m_cmd.execBuffer; + if (cmdBuffer == DxvkCmdBuffer::InitBuffer) return m_cmd.initBuffer; +diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp +index c63dce53..73aac9b5 100644 +--- a/src/dxvk/dxvk_context.cpp ++++ b/src/dxvk/dxvk_context.cpp +@@ -1,4 +1,5 @@ + #include ++#include + #include + #include + +@@ -106,8 +107,10 @@ namespace dxvk { + + + void DxvkContext::flushCommandList(DxvkSubmitStatus* status) { +- m_device->submitCommandList( +- this->endRecording(), status); ++ auto cmdList = this->endRecording(); ++ if (m_type == DxvkContextType::Primary) ++ cmdList->setLfx2Aux(m_device->lfx2().VulkanContextBeforeSubmit(m_device->getLfx2VkContext())); ++ m_device->submitCommandList(cmdList, status); + + this->beginRecording( + m_device->createCommandList()); +@@ -6513,4 +6516,42 @@ namespace dxvk { + this->beginCurrentCommands(); + } + ++ void DxvkContext::tryBeginLfx2FrameImplicit(bool critical) { ++ if (!m_lfx2Frame) { ++ auto lfx2Frame = m_device->getImplicitLfx2Context()->dequeueFrame(critical); ++ if (lfx2Frame) { ++ beginLfx2Frame(lfx2Frame); ++ } ++ } ++ } ++ ++ void DxvkContext::endLfx2FrameImplicit() { ++ tryBeginLfx2FrameImplicit(true); ++ if (m_lfx2Frame) { ++ endLfx2Frame(); ++ } ++ } ++ ++ void DxvkContext::beginLfx2Frame(Lfx2Frame frame) { ++ if (m_type != DxvkContextType::Primary) ++ Logger::err("beginLfx2Frame should only be called on immediate contexts"); ++ auto &cLfx2 = m_device->lfx2(); ++ m_lfx2Frame = std::move(frame); ++ cLfx2.VulkanContextBeginFrame(m_device->getLfx2VkContext(), m_lfx2Frame); ++ m_frameCsTime = 0; ++ m_minQueuingDelay = UINT64_MAX; ++ } ++ ++ void DxvkContext::endLfx2Frame() { ++ if (m_type != DxvkContextType::Primary) ++ Logger::err("endLfx2Frame should only be called on immediate contexts"); ++ auto status = new DxvkSubmitStatus; ++ flushCommandList(status); ++ auto &cLfx2 = m_device->lfx2(); ++ cLfx2.VulkanContextEndFrame(m_device->getLfx2VkContext(), m_lfx2Frame); ++ cLfx2.FrameOverrideInverseThroughput(m_lfx2Frame, 800, m_frameCsTime); ++ if (m_minQueuingDelay != UINT64_MAX) ++ cLfx2.FrameOverrideQueuingDelay(m_lfx2Frame, 0, m_minQueuingDelay); ++ m_lfx2Frame = {}; ++ } + } +diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h +index 3b61d474..136a12dd 100644 +--- a/src/dxvk/dxvk_context.h ++++ b/src/dxvk/dxvk_context.h +@@ -67,6 +67,11 @@ namespace dxvk { + * \param [out] status Submission feedback + */ + void flushCommandList(DxvkSubmitStatus* status); ++ ++ void tryBeginLfx2FrameImplicit(bool critical); ++ void endLfx2FrameImplicit(); ++ void beginLfx2Frame(Lfx2Frame frame); ++ void endLfx2Frame(); + + /** + * \brief Begins generating query data +@@ -1388,6 +1393,11 @@ namespace dxvk { + m_cmd->addStatCtr(counter, value); + } + ++ void recordChunkExecutionTiming(uint64_t duration, uint64_t queueingDelay) { ++ m_frameCsTime += duration; ++ m_minQueuingDelay = std::min(m_minQueuingDelay, queueingDelay); ++ } ++ + private: + + Rc m_device; +@@ -1432,6 +1442,10 @@ namespace dxvk { + std::array m_gpLookupCache = { }; + std::array m_cpLookupCache = { }; + ++ Lfx2Frame m_lfx2Frame = {}; ++ uint64_t m_frameCsTime = 0; ++ uint64_t m_minQueuingDelay = 0; ++ + void blitImageFb( + const Rc& dstImage, + const Rc& srcImage, +@@ -1739,7 +1753,6 @@ namespace dxvk { + void endCurrentCommands(); + + void splitCommands(); +- + }; + + } +diff --git a/src/dxvk/dxvk_cs.cpp b/src/dxvk/dxvk_cs.cpp +index 5788ba82..44d7e718 100644 +--- a/src/dxvk/dxvk_cs.cpp ++++ b/src/dxvk/dxvk_cs.cpp +@@ -55,8 +55,12 @@ namespace dxvk { + + m_commandOffset = 0; + } +- +- ++ ++ void DxvkCsChunk::finalize() { ++ m_queuedTimestamp = high_resolution_clock::now(); ++ } ++ ++ + DxvkCsChunkPool::DxvkCsChunkPool() { + + } +@@ -161,6 +165,8 @@ namespace dxvk { + // them in order to potentially reduce lock contention. + std::vector chunks; + ++ high_resolution_clock::time_point lastFinish; ++ + try { + while (!m_stopped.load()) { + { std::unique_lock lock(m_mutex); +@@ -175,8 +181,16 @@ namespace dxvk { + + for (auto& chunk : chunks) { + m_context->addStatCtr(DxvkStatCounter::CsChunkCount, 1); +- ++ m_context->tryBeginLfx2FrameImplicit(false); ++ high_resolution_clock::time_point start = high_resolution_clock::now(); + chunk->executeAll(m_context.ptr()); ++ high_resolution_clock::time_point end = high_resolution_clock::now(); ++ m_context->recordChunkExecutionTiming( ++ std::chrono::duration_cast(end - start).count(), ++ std::max(std::chrono::duration_cast( ++ lastFinish - chunk->getQueuedTimestamp()).count(), 0LL) ++ ); ++ lastFinish = end; + + // Use a separate mutex for the chunk counter, this + // will only ever be contested if synchronization is +diff --git a/src/dxvk/dxvk_cs.h b/src/dxvk/dxvk_cs.h +index ae20746a..d8ee4fdf 100644 +--- a/src/dxvk/dxvk_cs.h ++++ b/src/dxvk/dxvk_cs.h +@@ -233,10 +233,18 @@ namespace dxvk { + * that it can be reused later. + */ + void reset(); ++ ++ void finalize(); ++ ++ high_resolution_clock::time_point getQueuedTimestamp() { ++ return m_queuedTimestamp; ++ } + + private: + + size_t m_commandOffset = 0; ++ ++ high_resolution_clock::time_point m_queuedTimestamp; + + DxvkCsCmd* m_head = nullptr; + DxvkCsCmd* m_tail = nullptr; +diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp +index 226596f0..943f3b27 100644 +--- a/src/dxvk/dxvk_device.cpp ++++ b/src/dxvk/dxvk_device.cpp +@@ -14,6 +14,7 @@ namespace dxvk { + m_instance (instance), + m_adapter (adapter), + m_vkd (vkd), ++ m_lfx2ImplicitContext(&m_lfx2), + m_features (features), + m_properties (adapter->devicePropertiesExt()), + m_perfHints (getPerfHints()), +@@ -21,6 +22,7 @@ namespace dxvk { + m_queues (queues), + m_submissionQueue (this, queueCallback) { + ++ m_lfx2Vk = m_lfx2.VulkanContextCreate(instance->vki()->getLoaderProc(), instance->handle(), m_adapter->handle(), m_vkd->device(), queues.graphics.queueIndex); + } + + +@@ -39,6 +41,8 @@ namespace dxvk { + // Stop workers explicitly in order to prevent + // access to structures that are being destroyed. + m_objects.pipelineManager().stopWorkerThreads(); ++ ++ m_lfx2.VulkanContextRelease(m_lfx2Vk); + } + + +@@ -339,4 +343,12 @@ namespace dxvk { + m_recycledCommandLists.returnObject(cmdList); + } + ++ lfx2VulkanContext *DxvkDevice::getLfx2VkContext() { ++ return m_lfx2Vk; ++ } ++ ++ DxvkLfx2ImplicitContext *DxvkDevice::getImplicitLfx2Context() { ++ return &m_lfx2ImplicitContext; ++ } ++ + } +diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h +index a24ee311..4e6c2ed7 100644 +--- a/src/dxvk/dxvk_device.h ++++ b/src/dxvk/dxvk_device.h +@@ -98,6 +98,10 @@ namespace dxvk { + Rc vkd() const { + return m_vkd; + } ++ ++ const Lfx2Fn &lfx2() const { ++ return m_lfx2; ++ } + + /** + * \brief Logical device handle +@@ -534,6 +538,9 @@ namespace dxvk { + * used by the GPU can be safely destroyed. + */ + void waitForIdle(); ++ ++ lfx2VulkanContext* getLfx2VkContext(); ++ DxvkLfx2ImplicitContext* getImplicitLfx2Context(); + + private: + +@@ -542,6 +549,9 @@ namespace dxvk { + Rc m_instance; + Rc m_adapter; + Rc m_vkd; ++ Lfx2Fn m_lfx2; ++ lfx2VulkanContext *m_lfx2Vk; ++ DxvkLfx2ImplicitContext m_lfx2ImplicitContext; + + DxvkDeviceFeatures m_features; + DxvkDeviceInfo m_properties; +diff --git a/src/dxvk/dxvk_extensions.h b/src/dxvk/dxvk_extensions.h +index 729f09c1..40c5bcc1 100644 +--- a/src/dxvk/dxvk_extensions.h ++++ b/src/dxvk/dxvk_extensions.h +@@ -297,6 +297,7 @@ namespace dxvk { + DxvkExt amdMemoryOverallocationBehaviour = { VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt amdShaderFragmentMask = { VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extAttachmentFeedbackLoopLayout = { VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME, DxvkExtMode::Optional }; ++ DxvkExt extCalibratedTimestamps = { VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extConservativeRasterization = { VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extCustomBorderColor = { VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extDepthClipEnable = { VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME, DxvkExtMode::Optional }; +diff --git a/src/dxvk/dxvk_gpu_query.cpp b/src/dxvk/dxvk_gpu_query.cpp +index aaec6a0f..5f4885f0 100644 +--- a/src/dxvk/dxvk_gpu_query.cpp ++++ b/src/dxvk/dxvk_gpu_query.cpp +@@ -340,7 +340,7 @@ namespace dxvk { + handle.queryId); + + cmd->cmdWriteTimestamp( +- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, ++ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + handle.queryPool, + handle.queryId); + +diff --git a/src/dxvk/dxvk_gpu_query.h b/src/dxvk/dxvk_gpu_query.h +index 919d0e26..acf1654e 100644 +--- a/src/dxvk/dxvk_gpu_query.h ++++ b/src/dxvk/dxvk_gpu_query.h +@@ -11,6 +11,7 @@ + namespace dxvk { + + class DxvkCommandList; ++ class DxvkDevice; + + class DxvkGpuQueryPool; + class DxvkGpuQueryAllocator; +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +new file mode 100644 +index 00000000..c7e50abe +--- /dev/null ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -0,0 +1,137 @@ ++#include "dxvk_lfx2.h" ++ ++#include ++ ++#include "../util/util_time.h" ++#include "dxvk_device.h" ++#include "../util/util_win32_compat.h" ++ ++namespace dxvk { ++ ++ Lfx2Fn::Lfx2Fn() { ++#ifdef _WIN32 ++ const auto lfxModuleName = "latencyflex2_rust.dll"; ++#else ++ const auto lfxModuleName = "liblatencyflex2_rust.so"; ++#endif ++ ++ m_lfxModule = ::LoadLibraryA(lfxModuleName); ++ if (m_lfxModule == nullptr) { ++ Logger::err(str::format("Failed to load ", lfxModuleName)); ++ return; ++ } ++ ++#define LOAD_PFN(x) \ ++ this->x = GetProcAddress("lfx2" #x) ++ ++ LOAD_PFN(ContextCreate); ++ LOAD_PFN(ContextAddRef); ++ LOAD_PFN(ContextRelease); ++ LOAD_PFN(FrameCreate); ++ LOAD_PFN(FrameAddRef); ++ LOAD_PFN(FrameRelease); ++ LOAD_PFN(MarkSection); ++ LOAD_PFN(FrameOverrideQueuingDelay); ++ LOAD_PFN(FrameOverrideInverseThroughput); ++ LOAD_PFN(SleepUntil); ++ LOAD_PFN(TimestampNow); ++#ifdef _WIN32 ++ LOAD_PFN(TimestampFromQpc); ++#endif ++ LOAD_PFN(ImplicitContextCreate); ++ LOAD_PFN(ImplicitContextRelease); ++ LOAD_PFN(ImplicitContextReset); ++ LOAD_PFN(FrameCreateImplicit); ++ LOAD_PFN(FrameDequeueImplicit); ++ LOAD_PFN(VulkanContextCreate); ++ LOAD_PFN(VulkanContextAddRef); ++ LOAD_PFN(VulkanContextRelease); ++ LOAD_PFN(VulkanContextBeforeSubmit); ++ LOAD_PFN(VulkanContextBeginFrame); ++ LOAD_PFN(VulkanContextEndFrame); ++ ++#undef LOAD_PFN ++ } ++ ++ Lfx2Fn::~Lfx2Fn() { ++ if (m_lfxModule == nullptr) ++ return; ++ ++ // Calling FreeLibrary deadlocks if called from DllMain. ++ if (this_thread::isInModuleDetachment()) ++ return; ++ ++ ::FreeLibrary(m_lfxModule); ++ m_lfxModule = nullptr; ++ } ++ ++ template ++ T Lfx2Fn::GetProcAddress(const char *name) { ++ return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); ++ } ++ ++ DxvkLfx2ImplicitContext::DxvkLfx2ImplicitContext(Lfx2Fn *lfx2): m_lfx2(lfx2) { ++ m_context = m_lfx2->ImplicitContextCreate(); ++ } ++ ++ DxvkLfx2ImplicitContext::~DxvkLfx2ImplicitContext() { ++ m_lfx2->ImplicitContextRelease(m_context); ++ } ++ ++ Lfx2Frame DxvkLfx2ImplicitContext::dequeueFrame(bool critical) { ++ lfx2Frame *frame = m_lfx2->FrameDequeueImplicit(m_context, critical); ++ Lfx2Frame wrapper(*m_lfx2, frame); ++ if (frame) ++ m_lfx2->FrameRelease(frame); ++ return wrapper; ++ } ++ ++ void DxvkLfx2ImplicitContext::reset() { ++ m_lfx2->ImplicitContextReset(m_context); ++ } ++ ++ Lfx2Frame::Lfx2Frame() { ++ ++ } ++ ++ Lfx2Frame::Lfx2Frame(const Lfx2Fn &lfx2, lfx2Frame *lfx2Frame) : m_lfx2(&lfx2), m_lfx2Frame(lfx2Frame) { ++ if (m_lfx2Frame) ++ m_lfx2->FrameAddRef(m_lfx2Frame); ++ } ++ ++ Lfx2Frame::~Lfx2Frame() { ++ if (m_lfx2Frame != nullptr) ++ m_lfx2->FrameRelease(m_lfx2Frame); ++ } ++ ++ Lfx2Frame::Lfx2Frame(const Lfx2Frame &other): m_lfx2(other.m_lfx2), m_lfx2Frame(other.m_lfx2Frame) { ++ m_lfx2->FrameAddRef(m_lfx2Frame); ++ } ++ ++ Lfx2Frame::Lfx2Frame(Lfx2Frame &&other) noexcept : m_lfx2(other.m_lfx2), m_lfx2Frame(other.m_lfx2Frame) { ++ other.m_lfx2Frame = nullptr; ++ } ++ ++ Lfx2Frame &Lfx2Frame::operator=(const Lfx2Frame &other) { ++ if (this != &other) { ++ if (m_lfx2Frame != nullptr) ++ m_lfx2->FrameRelease(m_lfx2Frame); ++ ++ m_lfx2 = other.m_lfx2; ++ m_lfx2Frame = other.m_lfx2Frame; ++ m_lfx2->FrameAddRef(m_lfx2Frame); ++ } ++ ++ return *this; ++ } ++ ++ Lfx2Frame &Lfx2Frame::operator=(Lfx2Frame &&other) noexcept { ++ if (m_lfx2Frame != nullptr) ++ m_lfx2->FrameRelease(m_lfx2Frame); ++ ++ m_lfx2 = other.m_lfx2; ++ m_lfx2Frame = other.m_lfx2Frame; ++ other.m_lfx2Frame = nullptr; ++ return *this; ++ } ++} // dxvk +\ No newline at end of file +diff --git a/src/dxvk/dxvk_lfx2.h b/src/dxvk/dxvk_lfx2.h +new file mode 100644 +index 00000000..dc81d0a5 +--- /dev/null ++++ b/src/dxvk/dxvk_lfx2.h +@@ -0,0 +1,83 @@ ++#pragma once ++ ++#include ++#include "dxvk_gpu_query.h" ++#include "latencyflex2.h" ++ ++namespace dxvk { ++ ++ class Lfx2Fn { ++ public: ++ Lfx2Fn(); ++ virtual ~Lfx2Fn(); ++ ++#define DECLARE_PFN(x) \ ++ decltype(&::lfx2##x) x {} ++ ++ DECLARE_PFN(ContextCreate); ++ DECLARE_PFN(ContextAddRef); ++ DECLARE_PFN(ContextRelease); ++ DECLARE_PFN(FrameCreate); ++ DECLARE_PFN(FrameAddRef); ++ DECLARE_PFN(FrameRelease); ++ DECLARE_PFN(MarkSection); ++ DECLARE_PFN(FrameOverrideQueuingDelay); ++ DECLARE_PFN(FrameOverrideInverseThroughput); ++ DECLARE_PFN(SleepUntil); ++ DECLARE_PFN(TimestampNow); ++#ifdef _WIN32 ++ DECLARE_PFN(TimestampFromQpc); ++#endif ++ DECLARE_PFN(ImplicitContextCreate); ++ DECLARE_PFN(ImplicitContextRelease); ++ DECLARE_PFN(ImplicitContextReset); ++ DECLARE_PFN(FrameCreateImplicit); ++ DECLARE_PFN(FrameDequeueImplicit); ++ DECLARE_PFN(VulkanContextCreate); ++ DECLARE_PFN(VulkanContextAddRef); ++ DECLARE_PFN(VulkanContextRelease); ++ DECLARE_PFN(VulkanContextBeforeSubmit); ++ DECLARE_PFN(VulkanContextBeginFrame); ++ DECLARE_PFN(VulkanContextEndFrame); ++ ++#undef DECLARE_PFN ++ ++ private: ++ template ++ T GetProcAddress(const char* name); ++ ++ HMODULE m_lfxModule{}; ++ }; ++ ++ class Lfx2Frame { ++ public: ++ Lfx2Frame(); ++ Lfx2Frame(const Lfx2Fn &lfx2, lfx2Frame *lfx2Frame); ++ Lfx2Frame(const Lfx2Frame &other); ++ Lfx2Frame(Lfx2Frame &&other) noexcept; ++ ~Lfx2Frame(); ++ ++ Lfx2Frame& operator=(const Lfx2Frame &other); ++ Lfx2Frame& operator=(Lfx2Frame &&other) noexcept; ++ ++ operator lfx2Frame *() const { return m_lfx2Frame; } ++ ++ private: ++ const Lfx2Fn *m_lfx2{}; ++ lfx2Frame *m_lfx2Frame{}; ++ }; ++ ++ class DxvkLfx2ImplicitContext { ++ public: ++ explicit DxvkLfx2ImplicitContext(Lfx2Fn *lfx2); ++ ~DxvkLfx2ImplicitContext(); ++ lfx2ImplicitContext *context() const { return m_context; } ++ Lfx2Frame dequeueFrame(bool critical); ++ void reset(); ++ ++ private: ++ Lfx2Fn *m_lfx2; ++ lfx2ImplicitContext *m_context; ++ }; ++ ++} // dxvk +\ No newline at end of file +diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build +index 78ff2ecf..a26f6a55 100644 +--- a/src/dxvk/meson.build ++++ b/src/dxvk/meson.build +@@ -79,6 +79,7 @@ dxvk_src = [ + 'dxvk_image.cpp', + 'dxvk_instance.cpp', + 'dxvk_lifetime.cpp', ++ 'dxvk_lfx2.cpp', + 'dxvk_memory.cpp', + 'dxvk_meta_blit.cpp', + 'dxvk_meta_clear.cpp', +@@ -127,6 +128,7 @@ dxvk_lib = static_library('dxvk', dxvk_src, glsl_generator.process(dxvk_shaders) + link_with : [ util_lib, spirv_lib, wsi_lib ], + dependencies : [ vkcommon_dep ] + dxvk_extra_deps, + include_directories : [ dxvk_include_path ], ++ cpp_pch : '../pch/dxvk_pch.h', + ) + + dxvk_dep = declare_dependency( +diff --git a/src/pch/d3d11_pch.h b/src/pch/d3d11_pch.h +new file mode 100644 +index 00000000..0989dd0d +--- /dev/null ++++ b/src/pch/d3d11_pch.h +@@ -0,0 +1,9 @@ ++#include "../dxvk/dxvk_adapter.h" ++#include "../dxvk/dxvk_buffer.h" ++#include "../dxvk/dxvk_device.h" ++#include "d3d11_context.h" ++#include "d3d11_device.h" ++#include "../util/util_string.h" ++#include "../util/util_env.h" ++#include "../util/log/log.h" ++#include "../util/thread.h" +\ No newline at end of file +diff --git a/src/pch/d3d9_pch.h b/src/pch/d3d9_pch.h +new file mode 100644 +index 00000000..1d9e06b2 +--- /dev/null ++++ b/src/pch/d3d9_pch.h +@@ -0,0 +1,7 @@ ++#include "../dxvk/dxvk_adapter.h" ++#include "../dxvk/dxvk_buffer.h" ++#include "../dxvk/dxvk_device.h" ++#include "../util/util_string.h" ++#include "../util/util_env.h" ++#include "../util/log/log.h" ++#include "../util/thread.h" +\ No newline at end of file +diff --git a/src/pch/dxvk_pch.h b/src/pch/dxvk_pch.h +new file mode 100644 +index 00000000..081d583b +--- /dev/null ++++ b/src/pch/dxvk_pch.h +@@ -0,0 +1,7 @@ ++#include "dxvk_adapter.h" ++#include "dxvk_buffer.h" ++#include "dxvk_device.h" ++#include "../util/util_string.h" ++#include "../util/util_env.h" ++#include "../util/log/log.h" ++#include "../util/thread.h" +\ No newline at end of file +diff --git a/src/vulkan/vulkan_loader.h b/src/vulkan/vulkan_loader.h +index 1741ccb8..630dfb7b 100644 +--- a/src/vulkan/vulkan_loader.h ++++ b/src/vulkan/vulkan_loader.h +@@ -452,6 +452,11 @@ namespace dxvk::vk { + VULKAN_FN(wine_vkAcquireKeyedMutex); + VULKAN_FN(wine_vkReleaseKeyedMutex); + #endif ++ ++ #ifdef VK_EXT_calibrated_timestamps ++ VULKAN_FN(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT); ++ VULKAN_FN(vkGetCalibratedTimestampsEXT); ++ #endif + }; + + } diff --git a/patches/lfx2-dxvk.patch.old b/patches/lfx2-dxvk.patch.old new file mode 100644 index 0000000000..a328c0839f --- /dev/null +++ b/patches/lfx2-dxvk.patch.old @@ -0,0 +1,3201 @@ +From 00ef6bc4f526bb3e8771853f7518608356e362cf Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Sat, 31 Dec 2022 23:07:33 +0900 +Subject: [PATCH 01/12] lfx2 wip + +--- + include/latencyflex2.h | 62 +++++++++++++++++++++ + src/d3d11/d3d11_context.cpp | 3 +- + src/d3d11/d3d11_context_ext.cpp | 38 +++++++++++++ + src/d3d11/d3d11_context_ext.h | 8 ++- + src/d3d11/d3d11_device.cpp | 3 + + src/d3d11/d3d11_interfaces.h | 13 +++++ + src/dxvk/dxvk_cmdlist.cpp | 4 +- + src/dxvk/dxvk_cmdlist.h | 8 +++ + src/dxvk/dxvk_context.cpp | 4 ++ + src/dxvk/dxvk_context.h | 2 + + src/dxvk/dxvk_device.h | 5 ++ + src/dxvk/dxvk_extensions.h | 1 + + src/dxvk/dxvk_gpu_query.h | 1 + + src/dxvk/dxvk_lfx2.cpp | 98 +++++++++++++++++++++++++++++++++ + src/dxvk/dxvk_lfx2.h | 48 ++++++++++++++++ + src/dxvk/meson.build | 1 + + src/util/util_time.h | 6 +- + src/vulkan/vulkan_loader.h | 5 ++ + 18 files changed, 306 insertions(+), 4 deletions(-) + create mode 100644 include/latencyflex2.h + create mode 100644 src/dxvk/dxvk_lfx2.cpp + create mode 100644 src/dxvk/dxvk_lfx2.h + +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +new file mode 100644 +index 00000000..5513093c +--- /dev/null ++++ b/include/latencyflex2.h +@@ -0,0 +1,62 @@ ++#ifndef LATENCYFLEX2_H ++#define LATENCYFLEX2_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef _WIN32 ++#define LFX2_API __declspec(dllimport) ++#else ++#define LFX2_API ++#endif ++ ++ ++enum class lfx2MarkType { ++ lfx2MarkTypeBegin, ++ lfx2MarkTypeEnd, ++}; ++ ++struct lfx2Context; ++ ++/// A write handle for frame markers. ++struct lfx2Frame; ++ ++using lfx2Timestamp = uint64_t; ++ ++using lfx2SectionId = uint32_t; ++ ++ ++extern "C" { ++ ++LFX2_API lfx2Timestamp lfx2TimestampNow(); ++ ++#if defined(_WIN32) ++LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); ++#endif ++ ++LFX2_API void lfx2SleepUntil(lfx2Timestamp target); ++ ++LFX2_API const lfx2Context *lfx2ContextCreate(); ++ ++LFX2_API void lfx2ContextAddRef(const lfx2Context *context); ++ ++LFX2_API void lfx2ContextRelease(const lfx2Context *context); ++ ++LFX2_API const lfx2Frame *lfx2FrameCreate(const lfx2Context *context, lfx2Timestamp *out_timestamp); ++ ++LFX2_API void lfx2FrameAddRef(const lfx2Frame *frame); ++ ++LFX2_API void lfx2FrameRelease(const lfx2Frame *frame); ++ ++LFX2_API ++void lfx2MarkSection(const lfx2Frame *frame, ++ lfx2SectionId section_id, ++ lfx2MarkType mark_type, ++ lfx2Timestamp timestamp); ++ ++} // extern "C" ++ ++#endif // LATENCYFLEX2_H +diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp +index 4b06ae38..fd72258a 100644 +--- a/src/d3d11/d3d11_context.cpp ++++ b/src/d3d11/d3d11_context.cpp +@@ -50,7 +50,8 @@ namespace dxvk { + } + + if (riid == __uuidof(ID3D11VkExtContext) +- || riid == __uuidof(ID3D11VkExtContext1)) { ++ || riid == __uuidof(ID3D11VkExtContext1) ++ || riid == __uuidof(ID3D11VkExtContext2)) { + *ppvObject = ref(&m_contextExt); + return S_OK; + } +diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp +index c89e0bc0..be412630 100644 +--- a/src/d3d11/d3d11_context_ext.cpp ++++ b/src/d3d11/d3d11_context_ext.cpp +@@ -215,6 +215,44 @@ namespace dxvk { + return true; + } + ++ template ++ bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderStartLFX2(void *frame) { ++ auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ ++ m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); ++ m_ctx->EmitCs([query, frame, cDevice = m_ctx->m_device] (DxvkContext* ctx) { ++ auto &cLfx2 = cDevice->lfx2(); ++ cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); ++ ctx->writeTimestamp(query); ++ ctx->trackLatencyMarker(frame, query, false); ++ }); ++ return true; ++ } ++ ++ template ++ bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderEndLFX2(void *frame) { ++ auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ ++ m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); ++ m_ctx->EmitCs([query, frame, cDevice = m_ctx->m_device] (DxvkContext* ctx) { ++ auto &cLfx2 = cDevice->lfx2(); ++ cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); ++ ctx->writeTimestamp(query); ++ ctx->trackLatencyMarker(frame, query, true); ++ }); ++ return true; ++ } ++ ++ template ++ bool STDMETHODCALLTYPE D3D11DeviceContextExt::SleepAndBeginFrameLFX2() { ++ Logger::err("SleepAndBeginFrameLFX2 should be only called on an immediate context"); ++ return false; ++ } ++ ++ template<> ++ bool STDMETHODCALLTYPE D3D11DeviceContextExt::SleepAndBeginFrameLFX2() { ++ return true; ++ } + + template class D3D11DeviceContextExt; + template class D3D11DeviceContextExt; +diff --git a/src/d3d11/d3d11_context_ext.h b/src/d3d11/d3d11_context_ext.h +index 6b95dcf9..e9951d27 100644 +--- a/src/d3d11/d3d11_context_ext.h ++++ b/src/d3d11/d3d11_context_ext.h +@@ -8,7 +8,7 @@ namespace dxvk { + class D3D11ImmediateContext; + + template +- class D3D11DeviceContextExt : public ID3D11VkExtContext1 { ++ class D3D11DeviceContextExt : public ID3D11VkExtContext2 { + + public: + +@@ -71,6 +71,12 @@ namespace dxvk { + void* const* pWriteResources, + uint32_t NumWriteResources); + ++ bool STDMETHODCALLTYPE MarkRenderStartLFX2(void *lfx2Frame); ++ ++ bool STDMETHODCALLTYPE MarkRenderEndLFX2(void *lfx2Frame); ++ ++ bool STDMETHODCALLTYPE SleepAndBeginFrameLFX2(); ++ + private: + + ContextType* m_ctx; +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index 9398e484..078dcb4b 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -2469,6 +2469,9 @@ namespace dxvk { + return deviceFeatures.nvxBinaryImport + && deviceFeatures.vk12.bufferDeviceAddress; + ++ case D3D11_VK_LATENCYFLEX2: ++ return true; // TODO ++ + default: + return false; + } +diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h +index 8a2e6fcf..77ad12ba 100644 +--- a/src/d3d11/d3d11_interfaces.h ++++ b/src/d3d11/d3d11_interfaces.h +@@ -16,6 +16,7 @@ enum D3D11_VK_EXTENSION : uint32_t { + D3D11_VK_EXT_BARRIER_CONTROL = 3, + D3D11_VK_NVX_BINARY_IMPORT = 4, + D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5, ++ D3D11_VK_LATENCYFLEX2 = 6, + }; + + +@@ -182,6 +183,17 @@ ID3D11VkExtContext1 : public ID3D11VkExtContext { + uint32_t numWriteResources) = 0; + }; + ++MIDL_INTERFACE("6e65f21a-0ecd-4e76-8faf-9e9afa4093a4") ++ID3D11VkExtContext2 : public ID3D11VkExtContext1 { ++ ++ virtual bool STDMETHODCALLTYPE MarkRenderStartLFX2( ++ void* lfx2Frame) = 0; ++ ++ virtual bool STDMETHODCALLTYPE MarkRenderEndLFX2( ++ void* lfx2Frame) = 0; ++ ++}; ++ + + #ifndef _MSC_VER + __CRT_UUID_DECL(ID3D11VkExtShader, 0xbb8a4fb9,0x3935,0x4762,0xb4,0x4b,0x35,0x18,0x9a,0x26,0x41,0x4a); +@@ -189,4 +201,5 @@ __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0x + __CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); + __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); + __CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); ++__CRT_UUID_DECL(ID3D11VkExtContext2, 0x6e65f21a,0x0ecd,0x4e76,0x8f,0xaf,0x9e,0x9a,0xfa,0x40,0x93,0xa4); + #endif +diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp +index 3bd3aa95..ad190fc0 100644 +--- a/src/dxvk/dxvk_cmdlist.cpp ++++ b/src/dxvk/dxvk_cmdlist.cpp +@@ -170,7 +170,8 @@ namespace dxvk { + DxvkCommandList::DxvkCommandList(DxvkDevice* device) + : m_device (device), + m_vkd (device->vkd()), +- m_vki (device->instance()->vki()) { ++ m_vki (device->instance()->vki()), ++ m_lfx2Tracker (device) { + const auto& graphicsQueue = m_device->queues().graphics; + const auto& transferQueue = m_device->queues().transfer; + +@@ -374,6 +375,7 @@ namespace dxvk { + // Return query and event handles + m_gpuQueryTracker.reset(); + m_gpuEventTracker.reset(); ++ m_lfx2Tracker.reset(); + + // Less important stuff + m_signalTracker.reset(); +diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h +index 1934b9a0..110a7ccc 100644 +--- a/src/dxvk/dxvk_cmdlist.h ++++ b/src/dxvk/dxvk_cmdlist.h +@@ -1,6 +1,7 @@ + #pragma once + + #include ++#include + + #include "dxvk_bind_mask.h" + #include "dxvk_buffer.h" +@@ -17,6 +18,7 @@ + #include "dxvk_sparse.h" + #include "dxvk_staging.h" + #include "dxvk_stats.h" ++#include "dxvk_lfx2.h" + + namespace dxvk { + +@@ -319,12 +321,17 @@ namespace dxvk { + m_signalTracker.add(signal, value); + } + ++ void trackLatencyMarker(void *lfx2Frame, Rc timestampQuery, bool end) { ++ m_lfx2Tracker.add(lfx2Frame, std::move(timestampQuery), end); ++ } ++ + /** + * \brief Notifies resources and signals + */ + void notifyObjects() { + m_resources.notify(); + m_signalTracker.notify(); ++ m_lfx2Tracker.notify(); + } + + /** +@@ -1050,6 +1057,7 @@ namespace dxvk { + DxvkGpuQueryTracker m_gpuQueryTracker; + DxvkBufferTracker m_bufferTracker; + DxvkStatCounters m_statCounters; ++ DxvkLfx2Tracker m_lfx2Tracker; + + DxvkCommandSubmission m_commandSubmission; + +diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp +index bbd139ed..a5f1cd9c 100644 +--- a/src/dxvk/dxvk_context.cpp ++++ b/src/dxvk/dxvk_context.cpp +@@ -6444,4 +6444,8 @@ namespace dxvk { + this->beginCurrentCommands(); + } + ++ void DxvkContext::trackLatencyMarker(void *frame, Rc timestampQuery, bool end) { ++ m_cmd->trackLatencyMarker(frame, std::move(timestampQuery), end); ++ } ++ + } +diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h +index b1fbc7df..1379cef6 100644 +--- a/src/dxvk/dxvk_context.h ++++ b/src/dxvk/dxvk_context.h +@@ -1388,6 +1388,8 @@ namespace dxvk { + m_cmd->addStatCtr(counter, value); + } + ++ void trackLatencyMarker(void *frame, Rc timestampQuery, bool end); ++ + private: + + Rc m_device; +diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h +index a24ee311..5b3a1f04 100644 +--- a/src/dxvk/dxvk_device.h ++++ b/src/dxvk/dxvk_device.h +@@ -98,6 +98,10 @@ namespace dxvk { + Rc vkd() const { + return m_vkd; + } ++ ++ const DxvkLfx2 &lfx2() const { ++ return m_lfx2; ++ } + + /** + * \brief Logical device handle +@@ -542,6 +546,7 @@ namespace dxvk { + Rc m_instance; + Rc m_adapter; + Rc m_vkd; ++ DxvkLfx2 m_lfx2; + + DxvkDeviceFeatures m_features; + DxvkDeviceInfo m_properties; +diff --git a/src/dxvk/dxvk_extensions.h b/src/dxvk/dxvk_extensions.h +index ae4c8a74..ac0f4853 100644 +--- a/src/dxvk/dxvk_extensions.h ++++ b/src/dxvk/dxvk_extensions.h +@@ -297,6 +297,7 @@ namespace dxvk { + DxvkExt amdMemoryOverallocationBehaviour = { VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt amdShaderFragmentMask = { VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extAttachmentFeedbackLoopLayout = { VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME, DxvkExtMode::Optional }; ++ DxvkExt extCalibratedTimestamps = { VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extConservativeRasterization = { VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extCustomBorderColor = { VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt extDepthClipEnable = { VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME, DxvkExtMode::Optional }; +diff --git a/src/dxvk/dxvk_gpu_query.h b/src/dxvk/dxvk_gpu_query.h +index 919d0e26..acf1654e 100644 +--- a/src/dxvk/dxvk_gpu_query.h ++++ b/src/dxvk/dxvk_gpu_query.h +@@ -11,6 +11,7 @@ + namespace dxvk { + + class DxvkCommandList; ++ class DxvkDevice; + + class DxvkGpuQueryPool; + class DxvkGpuQueryAllocator; +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +new file mode 100644 +index 00000000..ccb1ad1e +--- /dev/null ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -0,0 +1,98 @@ ++#include "dxvk_lfx2.h" ++ ++#include "../util/util_time.h" ++#include "dxvk_device.h" ++ ++namespace dxvk { ++ ++ DxvkLfx2::DxvkLfx2() { ++ const auto lfxModuleName = "latencyflex2_rust.dll"; ++ ++ m_lfxModule = ::LoadLibraryA(lfxModuleName); ++ if (m_lfxModule == nullptr) { ++ Logger::err(str::format("Failed to load ", lfxModuleName)); ++ return; ++ } ++ ++#define LOAD_PFN(x) \ ++ this->x = GetProcAddress("lfx2" #x) ++ ++ LOAD_PFN(ContextCreate); ++ LOAD_PFN(ContextAddRef); ++ LOAD_PFN(ContextRelease); ++ LOAD_PFN(FrameCreate); ++ LOAD_PFN(FrameAddRef); ++ LOAD_PFN(FrameRelease); ++ LOAD_PFN(MarkSection); ++ LOAD_PFN(SleepUntil); ++ LOAD_PFN(TimestampNow); ++ ++#undef LOAD_PFN ++ } ++ ++ DxvkLfx2::~DxvkLfx2() { ++ if (m_lfxModule == nullptr) ++ return; ++ ++ ::FreeLibrary(m_lfxModule); ++ m_lfxModule = nullptr; ++ } ++ ++ template ++ T DxvkLfx2::GetProcAddress(const char* name) { ++ return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); ++ } ++ ++ DxvkLfx2Tracker::DxvkLfx2Tracker(DxvkDevice *device) : m_device(device) { ++ } ++ ++ void DxvkLfx2Tracker::add(void *lfx2Frame, Rc query, bool end) { ++ m_query[end] = std::move(query); ++ m_frame_handle[end] = lfx2Frame; ++ } ++ ++ void DxvkLfx2Tracker::notify() { ++ for (uint32_t i = 0; i < 2; i++) { ++ Rc &query = m_query[i]; ++ if (query.ptr()) { ++ DxvkQueryData queryData; // NOLINT(cppcoreguidelines-pro-type-member-init) ++ DxvkGpuQueryStatus status; ++ while ((status = query->getData(queryData)) == DxvkGpuQueryStatus::Pending); ++ ++ if (status == DxvkGpuQueryStatus::Available) { ++ uint64_t gpuTimestamp = queryData.timestamp.time; ++ VkCalibratedTimestampInfoEXT calibratedTimestampInfo[2]; ++ uint64_t calibratedTimestamps[2]; ++ uint64_t maxDeviation[2]; ++ calibratedTimestampInfo[0].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; ++ calibratedTimestampInfo[0].pNext = nullptr; ++ calibratedTimestampInfo[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; ++ calibratedTimestampInfo[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; ++ calibratedTimestampInfo[1].pNext = nullptr; ++ calibratedTimestampInfo[1].timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; ++ m_device->vkd()->vkGetCalibratedTimestampsEXT(m_device->handle(), 2, calibratedTimestampInfo, ++ calibratedTimestamps, maxDeviation); ++ ++ uint64_t hostNsTimestamp = dxvk::high_resolution_clock::to_ns(calibratedTimestamps[1]); ++ int64_t gpuTimestampDelta = gpuTimestamp - calibratedTimestamps[0]; ++ int64_t timestamp = hostNsTimestamp + (int64_t) (gpuTimestampDelta * ++ (double) m_device->adapter()->deviceProperties().limits.timestampPeriod); ++ ++ m_device->lfx2().MarkSection(static_cast(m_frame_handle[i]), ++ 1000, i == 0 ? lfx2MarkType::lfx2MarkTypeBegin : lfx2MarkType::lfx2MarkTypeEnd, timestamp); ++ m_device->lfx2().FrameRelease(static_cast(m_frame_handle[i])); ++ } ++ } ++ } ++ } ++ ++ void DxvkLfx2Tracker::reset() { ++ for (auto &i: m_query) { ++ i = nullptr; ++ } ++ for (auto &i: m_frame_handle) { ++ i = nullptr; ++ } ++ } ++ ++} // dxvk +\ No newline at end of file +diff --git a/src/dxvk/dxvk_lfx2.h b/src/dxvk/dxvk_lfx2.h +new file mode 100644 +index 00000000..f56fa608 +--- /dev/null ++++ b/src/dxvk/dxvk_lfx2.h +@@ -0,0 +1,48 @@ ++#pragma once ++ ++#include "dxvk_gpu_query.h" ++#include "latencyflex2.h" ++ ++namespace dxvk { ++ ++ class DxvkLfx2 { ++ public: ++ DxvkLfx2(); ++ virtual ~DxvkLfx2(); ++ ++#define DECLARE_PFN(x) \ ++ decltype(&::lfx2##x) x {} ++ ++ DECLARE_PFN(ContextCreate); ++ DECLARE_PFN(ContextAddRef); ++ DECLARE_PFN(ContextRelease); ++ DECLARE_PFN(FrameCreate); ++ DECLARE_PFN(FrameAddRef); ++ DECLARE_PFN(FrameRelease); ++ DECLARE_PFN(MarkSection); ++ DECLARE_PFN(SleepUntil); ++ DECLARE_PFN(TimestampNow); ++ ++#undef DECLARE_PFN ++ ++ private: ++ template ++ T GetProcAddress(const char* name); ++ ++ HMODULE m_lfxModule{}; ++ }; ++ ++ class DxvkLfx2Tracker { ++ public: ++ explicit DxvkLfx2Tracker(DxvkDevice *device); ++ void add(void *lfx2Frame, Rc query, bool end); ++ void reset(); ++ void notify(); ++ ++ private: ++ DxvkDevice *m_device; ++ Rc m_query[2]{}; ++ void *m_frame_handle[2]{}; ++ }; ++ ++} // dxvk +\ No newline at end of file +diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build +index 2bf46c5f..fa174987 100644 +--- a/src/dxvk/meson.build ++++ b/src/dxvk/meson.build +@@ -82,6 +82,7 @@ dxvk_src = [ + 'dxvk_image.cpp', + 'dxvk_instance.cpp', + 'dxvk_lifetime.cpp', ++ 'dxvk_lfx2.cpp', + 'dxvk_memory.cpp', + 'dxvk_meta_blit.cpp', + 'dxvk_meta_clear.cpp', +diff --git a/src/util/util_time.h b/src/util/util_time.h +index bcc311b6..c62f1065 100644 +--- a/src/util/util_time.h ++++ b/src/util/util_time.h +@@ -23,13 +23,17 @@ namespace dxvk { + } + + static inline time_point get_time_from_counter(int64_t counter) { ++ return time_point(duration(to_ns(counter))); ++ } ++ ++ static inline int64_t to_ns(int64_t counter) { + // Keep the frequency static, this doesn't change at all. + static const int64_t freq = get_frequency(); + + const int64_t whole = (counter / freq) * period::den; + const int64_t part = (counter % freq) * period::den / freq; + +- return time_point(duration(whole + part)); ++ return whole + part; + } + + static inline int64_t get_frequency() { +diff --git a/src/vulkan/vulkan_loader.h b/src/vulkan/vulkan_loader.h +index 1741ccb8..630dfb7b 100644 +--- a/src/vulkan/vulkan_loader.h ++++ b/src/vulkan/vulkan_loader.h +@@ -452,6 +452,11 @@ namespace dxvk::vk { + VULKAN_FN(wine_vkAcquireKeyedMutex); + VULKAN_FN(wine_vkReleaseKeyedMutex); + #endif ++ ++ #ifdef VK_EXT_calibrated_timestamps ++ VULKAN_FN(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT); ++ VULKAN_FN(vkGetCalibratedTimestampsEXT); ++ #endif + }; + + } +-- +2.43.0 + + +From cf40b68b9e9423f4653c3a3b7aa7765686cf86e5 Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Sat, 31 Dec 2022 23:30:09 +0900 +Subject: [PATCH 02/12] unix compat for ci + +--- + src/dxvk/dxvk_lfx2.cpp | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index ccb1ad1e..1bea5bbe 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -2,11 +2,16 @@ + + #include "../util/util_time.h" + #include "dxvk_device.h" ++#include "../util/util_win32_compat.h" + + namespace dxvk { + + DxvkLfx2::DxvkLfx2() { ++#ifdef _WIN32 + const auto lfxModuleName = "latencyflex2_rust.dll"; ++#else ++ const auto lfxModuleName = "liblatencyflex2_rust.so"; ++#endif + + m_lfxModule = ::LoadLibraryA(lfxModuleName); + if (m_lfxModule == nullptr) { +@@ -38,9 +43,9 @@ namespace dxvk { + m_lfxModule = nullptr; + } + +- template +- T DxvkLfx2::GetProcAddress(const char* name) { +- return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); ++ template ++ T DxvkLfx2::GetProcAddress(const char *name) { ++ return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); + } + + DxvkLfx2Tracker::DxvkLfx2Tracker(DxvkDevice *device) : m_device(device) { +@@ -79,7 +84,8 @@ namespace dxvk { + (double) m_device->adapter()->deviceProperties().limits.timestampPeriod); + + m_device->lfx2().MarkSection(static_cast(m_frame_handle[i]), +- 1000, i == 0 ? lfx2MarkType::lfx2MarkTypeBegin : lfx2MarkType::lfx2MarkTypeEnd, timestamp); ++ 1000, i == 0 ? lfx2MarkType::lfx2MarkTypeBegin : lfx2MarkType::lfx2MarkTypeEnd, ++ timestamp); + m_device->lfx2().FrameRelease(static_cast(m_frame_handle[i])); + } + } +-- +2.43.0 + + +From 57357cc438286dcaf38138eccde080d8fdab299a Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Sat, 31 Dec 2022 23:33:17 +0900 +Subject: [PATCH 03/12] more win32 compat for ci + +--- + src/dxvk/dxvk_lfx2.cpp | 13 ++++++++++++- + src/dxvk/dxvk_lfx2.h | 3 +++ + src/util/util_time.h | 6 +----- + 3 files changed, 16 insertions(+), 6 deletions(-) + +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index 1bea5bbe..494256d5 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -31,6 +31,9 @@ namespace dxvk { + LOAD_PFN(MarkSection); + LOAD_PFN(SleepUntil); + LOAD_PFN(TimestampNow); ++#ifdef _WIN32 ++ LOAD_PFN(TimestampFromQpc); ++#endif + + #undef LOAD_PFN + } +@@ -74,11 +77,19 @@ namespace dxvk { + calibratedTimestampInfo[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; + calibratedTimestampInfo[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + calibratedTimestampInfo[1].pNext = nullptr; ++#ifdef _WIN32 + calibratedTimestampInfo[1].timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; ++#else ++ calibratedTimestampInfo[1].timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; ++#endif + m_device->vkd()->vkGetCalibratedTimestampsEXT(m_device->handle(), 2, calibratedTimestampInfo, + calibratedTimestamps, maxDeviation); + +- uint64_t hostNsTimestamp = dxvk::high_resolution_clock::to_ns(calibratedTimestamps[1]); ++#ifdef _WIN32 ++ uint64_t hostNsTimestamp = m_device->lfx2().TimestampFromQpc(calibratedTimestamps[1]); ++#else ++ uint64_t hostNsTimestamp = calibratedTimestamps[1]; ++#endif + int64_t gpuTimestampDelta = gpuTimestamp - calibratedTimestamps[0]; + int64_t timestamp = hostNsTimestamp + (int64_t) (gpuTimestampDelta * + (double) m_device->adapter()->deviceProperties().limits.timestampPeriod); +diff --git a/src/dxvk/dxvk_lfx2.h b/src/dxvk/dxvk_lfx2.h +index f56fa608..1f5c5b5f 100644 +--- a/src/dxvk/dxvk_lfx2.h ++++ b/src/dxvk/dxvk_lfx2.h +@@ -22,6 +22,9 @@ namespace dxvk { + DECLARE_PFN(MarkSection); + DECLARE_PFN(SleepUntil); + DECLARE_PFN(TimestampNow); ++#ifdef _WIN32 ++ DECLARE_PFN(TimestampFromQpc); ++#endif + + #undef DECLARE_PFN + +diff --git a/src/util/util_time.h b/src/util/util_time.h +index c62f1065..bcc311b6 100644 +--- a/src/util/util_time.h ++++ b/src/util/util_time.h +@@ -23,17 +23,13 @@ namespace dxvk { + } + + static inline time_point get_time_from_counter(int64_t counter) { +- return time_point(duration(to_ns(counter))); +- } +- +- static inline int64_t to_ns(int64_t counter) { + // Keep the frequency static, this doesn't change at all. + static const int64_t freq = get_frequency(); + + const int64_t whole = (counter / freq) * period::den; + const int64_t part = (counter % freq) * period::den / freq; + +- return whole + part; ++ return time_point(duration(whole + part)); + } + + static inline int64_t get_frequency() { +-- +2.43.0 + + +From 3f02471745b7fdcf7301f1e32552b768282bb235 Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Sun, 1 Jan 2023 16:04:13 +0900 +Subject: [PATCH 04/12] deconstify + +--- + include/latencyflex2.h | 14 +++++++------- + src/d3d11/d3d11_context_ext.cpp | 8 ++++---- + src/dxvk/dxvk_lfx2.cpp | 4 ++-- + 3 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +index 5513093c..6978e2af 100644 +--- a/include/latencyflex2.h ++++ b/include/latencyflex2.h +@@ -39,20 +39,20 @@ LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); + + LFX2_API void lfx2SleepUntil(lfx2Timestamp target); + +-LFX2_API const lfx2Context *lfx2ContextCreate(); ++LFX2_API lfx2Context *lfx2ContextCreate(); + +-LFX2_API void lfx2ContextAddRef(const lfx2Context *context); ++LFX2_API void lfx2ContextAddRef(lfx2Context *context); + +-LFX2_API void lfx2ContextRelease(const lfx2Context *context); ++LFX2_API void lfx2ContextRelease(lfx2Context *context); + +-LFX2_API const lfx2Frame *lfx2FrameCreate(const lfx2Context *context, lfx2Timestamp *out_timestamp); ++LFX2_API lfx2Frame *lfx2FrameCreate(lfx2Context *context, lfx2Timestamp *out_timestamp); + +-LFX2_API void lfx2FrameAddRef(const lfx2Frame *frame); ++LFX2_API void lfx2FrameAddRef(lfx2Frame *frame); + +-LFX2_API void lfx2FrameRelease(const lfx2Frame *frame); ++LFX2_API void lfx2FrameRelease(lfx2Frame *frame); + + LFX2_API +-void lfx2MarkSection(const lfx2Frame *frame, ++void lfx2MarkSection(lfx2Frame *frame, + lfx2SectionId section_id, + lfx2MarkType mark_type, + lfx2Timestamp timestamp); +diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp +index be412630..cd1a7d1d 100644 +--- a/src/d3d11/d3d11_context_ext.cpp ++++ b/src/d3d11/d3d11_context_ext.cpp +@@ -219,10 +219,10 @@ namespace dxvk { + bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderStartLFX2(void *frame) { + auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); + +- m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); ++ m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); + m_ctx->EmitCs([query, frame, cDevice = m_ctx->m_device] (DxvkContext* ctx) { + auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); ++ cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); + ctx->writeTimestamp(query); + ctx->trackLatencyMarker(frame, query, false); + }); +@@ -233,10 +233,10 @@ namespace dxvk { + bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderEndLFX2(void *frame) { + auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); + +- m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); ++ m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); + m_ctx->EmitCs([query, frame, cDevice = m_ctx->m_device] (DxvkContext* ctx) { + auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); ++ cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); + ctx->writeTimestamp(query); + ctx->trackLatencyMarker(frame, query, true); + }); +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index 494256d5..f56bcb51 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -94,10 +94,10 @@ namespace dxvk { + int64_t timestamp = hostNsTimestamp + (int64_t) (gpuTimestampDelta * + (double) m_device->adapter()->deviceProperties().limits.timestampPeriod); + +- m_device->lfx2().MarkSection(static_cast(m_frame_handle[i]), ++ m_device->lfx2().MarkSection(static_cast(m_frame_handle[i]), + 1000, i == 0 ? lfx2MarkType::lfx2MarkTypeBegin : lfx2MarkType::lfx2MarkTypeEnd, + timestamp); +- m_device->lfx2().FrameRelease(static_cast(m_frame_handle[i])); ++ m_device->lfx2().FrameRelease(static_cast(m_frame_handle[i])); + } + } + } +-- +2.43.0 + + +From 3e359e1ba2a7a5da872bbd42b2e3a5e5ccb2dce2 Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Mon, 2 Jan 2023 12:08:47 +0900 +Subject: [PATCH 05/12] pch + +--- + src/d3d11/meson.build | 1 + + src/d3d9/meson.build | 1 + + src/dxvk/meson.build | 1 + + src/pch/d3d11_pch.h | 9 +++++++++ + src/pch/d3d9_pch.h | 7 +++++++ + src/pch/dxvk_pch.h | 7 +++++++ + 6 files changed, 26 insertions(+) + create mode 100644 src/pch/d3d11_pch.h + create mode 100644 src/pch/d3d9_pch.h + create mode 100644 src/pch/dxvk_pch.h + +diff --git a/src/d3d11/meson.build b/src/d3d11/meson.build +index 9b51e6ea..218d7c6a 100644 +--- a/src/d3d11/meson.build ++++ b/src/d3d11/meson.build +@@ -86,6 +86,7 @@ d3d11_dll = shared_library('d3d11'+dll_ext, dxgi_common_src + d3d11_src + d3d10_ + vs_module_defs : 'd3d11'+def_spec_ext, + link_args : d3d11_ld_args, + link_depends : [ d3d11_link_depends ], ++ cpp_pch : '../pch/d3d11_pch.h', + ) + + d3d11_dep = declare_dependency( +diff --git a/src/d3d9/meson.build b/src/d3d9/meson.build +index dd6b2316..dc2aa1fb 100644 +--- a/src/d3d9/meson.build ++++ b/src/d3d9/meson.build +@@ -65,6 +65,7 @@ d3d9_dll = shared_library('d3d9'+dll_ext, d3d9_src, glsl_generator.process(d3d9_ + vs_module_defs : 'd3d9'+def_spec_ext, + link_args : d3d9_ld_args, + link_depends : [ d3d9_link_depends ], ++ cpp_pch : '../pch/d3d9_pch.h', + ) + + d3d9_dep = declare_dependency( +diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build +index fa174987..73fc4c71 100644 +--- a/src/dxvk/meson.build ++++ b/src/dxvk/meson.build +@@ -144,6 +144,7 @@ dxvk_lib = static_library('dxvk', dxvk_src, glsl_generator.process(dxvk_shaders) + link_with : [ util_lib, spirv_lib, wsi_lib ], + dependencies : [ vkcommon_dep ] + dxvk_extra_deps, + include_directories : [ dxvk_include_path ], ++ cpp_pch : '../pch/dxvk_pch.h', + ) + + dxvk_dep = declare_dependency( +diff --git a/src/pch/d3d11_pch.h b/src/pch/d3d11_pch.h +new file mode 100644 +index 00000000..0989dd0d +--- /dev/null ++++ b/src/pch/d3d11_pch.h +@@ -0,0 +1,9 @@ ++#include "../dxvk/dxvk_adapter.h" ++#include "../dxvk/dxvk_buffer.h" ++#include "../dxvk/dxvk_device.h" ++#include "d3d11_context.h" ++#include "d3d11_device.h" ++#include "../util/util_string.h" ++#include "../util/util_env.h" ++#include "../util/log/log.h" ++#include "../util/thread.h" +\ No newline at end of file +diff --git a/src/pch/d3d9_pch.h b/src/pch/d3d9_pch.h +new file mode 100644 +index 00000000..1d9e06b2 +--- /dev/null ++++ b/src/pch/d3d9_pch.h +@@ -0,0 +1,7 @@ ++#include "../dxvk/dxvk_adapter.h" ++#include "../dxvk/dxvk_buffer.h" ++#include "../dxvk/dxvk_device.h" ++#include "../util/util_string.h" ++#include "../util/util_env.h" ++#include "../util/log/log.h" ++#include "../util/thread.h" +\ No newline at end of file +diff --git a/src/pch/dxvk_pch.h b/src/pch/dxvk_pch.h +new file mode 100644 +index 00000000..081d583b +--- /dev/null ++++ b/src/pch/dxvk_pch.h +@@ -0,0 +1,7 @@ ++#include "dxvk_adapter.h" ++#include "dxvk_buffer.h" ++#include "dxvk_device.h" ++#include "../util/util_string.h" ++#include "../util/util_env.h" ++#include "../util/log/log.h" ++#include "../util/thread.h" +\ No newline at end of file +-- +2.43.0 + + +From 424a50594af6c38f3e7e76412f1dc2dd5b6ad07d Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Mon, 2 Jan 2023 22:34:42 +0900 +Subject: [PATCH 06/12] Frame wrapper conversion, add implicit frame support + +--- + src/d3d11/d3d11_context_ext.cpp | 27 +++----- + src/d3d11/d3d11_context_ext.h | 2 - + src/d3d11/d3d11_context_imm.cpp | 1 + + src/d3d11/d3d11_context_imm.h | 2 + + src/d3d11/d3d11_device.cpp | 9 +-- + src/d3d11/d3d11_device.h | 6 +- + src/d3d11/d3d11_interfaces.h | 8 +++ + src/d3d11/d3d11_swapchain.cpp | 1 + + src/dxvk/dxvk_cmdlist.h | 4 +- + src/dxvk/dxvk_context.cpp | 35 +++++++++- + src/dxvk/dxvk_context.h | 7 +- + src/dxvk/dxvk_cs.cpp | 2 +- + src/dxvk/dxvk_device.cpp | 5 ++ + src/dxvk/dxvk_device.h | 7 +- + src/dxvk/dxvk_gpu_query.cpp | 2 +- + src/dxvk/dxvk_lfx2.cpp | 109 +++++++++++++++++++++++++++++--- + src/dxvk/dxvk_lfx2.h | 44 +++++++++++-- + 17 files changed, 222 insertions(+), 49 deletions(-) + +diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp +index cd1a7d1d..3e33f2dc 100644 +--- a/src/d3d11/d3d11_context_ext.cpp ++++ b/src/d3d11/d3d11_context_ext.cpp +@@ -218,13 +218,13 @@ namespace dxvk { + template + bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderStartLFX2(void *frame) { + auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ auto frameWrapper = Lfx2Frame(m_ctx->m_device->lfx2(), static_cast(frame)); + +- m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); +- m_ctx->EmitCs([query, frame, cDevice = m_ctx->m_device] (DxvkContext* ctx) { ++ m_ctx->EmitCs([query, cDevice = m_ctx->m_device, frameWrapper] (DxvkContext* ctx) { + auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); ++ cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); + ctx->writeTimestamp(query); +- ctx->trackLatencyMarker(frame, query, false); ++ ctx->trackLatencyMarker(frameWrapper, query, false); + }); + return true; + } +@@ -232,28 +232,17 @@ namespace dxvk { + template + bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderEndLFX2(void *frame) { + auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ auto frameWrapper = Lfx2Frame(m_ctx->m_device->lfx2(), static_cast(frame)); + +- m_ctx->m_device->lfx2().FrameAddRef(static_cast(frame)); +- m_ctx->EmitCs([query, frame, cDevice = m_ctx->m_device] (DxvkContext* ctx) { ++ m_ctx->EmitCs([query, cDevice = m_ctx->m_device, frameWrapper] (DxvkContext* ctx) { + auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(static_cast(frame), 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); ++ cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); + ctx->writeTimestamp(query); +- ctx->trackLatencyMarker(frame, query, true); ++ ctx->trackLatencyMarker(frameWrapper, query, true); + }); + return true; + } + +- template +- bool STDMETHODCALLTYPE D3D11DeviceContextExt::SleepAndBeginFrameLFX2() { +- Logger::err("SleepAndBeginFrameLFX2 should be only called on an immediate context"); +- return false; +- } +- +- template<> +- bool STDMETHODCALLTYPE D3D11DeviceContextExt::SleepAndBeginFrameLFX2() { +- return true; +- } +- + template class D3D11DeviceContextExt; + template class D3D11DeviceContextExt; + +diff --git a/src/d3d11/d3d11_context_ext.h b/src/d3d11/d3d11_context_ext.h +index e9951d27..3b17d1f5 100644 +--- a/src/d3d11/d3d11_context_ext.h ++++ b/src/d3d11/d3d11_context_ext.h +@@ -75,8 +75,6 @@ namespace dxvk { + + bool STDMETHODCALLTYPE MarkRenderEndLFX2(void *lfx2Frame); + +- bool STDMETHODCALLTYPE SleepAndBeginFrameLFX2(); +- + private: + + ContextType* m_ctx; +diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp +index 466e9a96..32e3eb78 100644 +--- a/src/d3d11/d3d11_context_imm.cpp ++++ b/src/d3d11/d3d11_context_imm.cpp +@@ -842,6 +842,7 @@ namespace dxvk { + D3D10DeviceLock lock = LockContext(); + + EmitCs([] (DxvkContext* ctx) { ++ ctx->endLfx2Frame(); + ctx->endFrame(); + }); + } +diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h +index 3e683250..ffd2c703 100644 +--- a/src/d3d11/d3d11_context_imm.h ++++ b/src/d3d11/d3d11_context_imm.h +@@ -182,6 +182,8 @@ namespace dxvk { + HANDLE hEvent, + BOOL Synchronize); + ++ private: ++ Lfx2Frame m_implicitLfx2Frame {}; + }; + + } +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index 078dcb4b..0353109d 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -2784,10 +2784,10 @@ namespace dxvk { + return static_cast(got->second); + } + ++ void D3D11DeviceExt::EnqueueImplicitFrameLFX2(void *frame) { ++ m_device->GetDXVKDevice()->getImplicitLfx2Context()->EnqueueFrame(Lfx2Frame(m_device->GetDXVKDevice()->lfx2(), reinterpret_cast(frame))); ++ } + +- +- +- + D3D11VideoDevice::D3D11VideoDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice) +@@ -3141,7 +3141,8 @@ namespace dxvk { + } + + if (riid == __uuidof(ID3D11VkExtDevice) +- || riid == __uuidof(ID3D11VkExtDevice1)) { ++ || riid == __uuidof(ID3D11VkExtDevice1) ++ || riid == __uuidof(ID3D11VkExtDevice2)) { + *ppvObject = ref(&m_d3d11DeviceExt); + return S_OK; + } +diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h +index 7a44b5ad..129353be 100644 +--- a/src/d3d11/d3d11_device.h ++++ b/src/d3d11/d3d11_device.h +@@ -507,7 +507,7 @@ namespace dxvk { + /** + * \brief Extended D3D11 device + */ +- class D3D11DeviceExt : public ID3D11VkExtDevice1 { ++ class D3D11DeviceExt : public ID3D11VkExtDevice2 { + + public: + +@@ -561,7 +561,9 @@ namespace dxvk { + const D3D11_SAMPLER_DESC* pSamplerDesc, + ID3D11SamplerState** ppSamplerState, + uint32_t* pDriverHandle); +- ++ ++ void STDMETHODCALLTYPE EnqueueImplicitFrameLFX2(void *lfx2Frame) override; ++ + private: + + D3D11DXGIDevice* m_container; +diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h +index 77ad12ba..f7b8e6d2 100644 +--- a/src/d3d11/d3d11_interfaces.h ++++ b/src/d3d11/d3d11_interfaces.h +@@ -115,6 +115,13 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { + uint32_t* pCudaTextureHandle) = 0; + }; + ++MIDL_INTERFACE("a1a5185c-0c43-4608-91a0-97a0cd098d48") ++ID3D11VkExtDevice2 : public ID3D11VkExtDevice1 { ++ ++ virtual void STDMETHODCALLTYPE EnqueueImplicitFrameLFX2(void *lfx2Frame) = 0; ++ ++}; ++ + + /** + * \brief Extended D3D11 context +@@ -199,6 +206,7 @@ ID3D11VkExtContext2 : public ID3D11VkExtContext1 { + __CRT_UUID_DECL(ID3D11VkExtShader, 0xbb8a4fb9,0x3935,0x4762,0xb4,0x4b,0x35,0x18,0x9a,0x26,0x41,0x4a); + __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17); + __CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); ++__CRT_UUID_DECL(ID3D11VkExtDevice2, 0xa1a5185c,0x0c43,0x4608,0x91,0xa0,0x97,0xa0,0xcd,0x09,0x8d,0x48); + __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); + __CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); + __CRT_UUID_DECL(ID3D11VkExtContext2, 0x6e65f21a,0x0ecd,0x4e76,0x8f,0xaf,0x9e,0x9a,0xfa,0x40,0x93,0xa4); +diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp +index 73939e2c..f37486bc 100644 +--- a/src/d3d11/d3d11_swapchain.cpp ++++ b/src/d3d11/d3d11_swapchain.cpp +@@ -450,6 +450,7 @@ namespace dxvk { + // Ensure that we can safely destroy the swap chain + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); ++ m_device->getImplicitLfx2Context()->Reset(); + + m_presentStatus.result = VK_SUCCESS; + m_dirtyHdrMetadata = true; +diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h +index 110a7ccc..55507bb8 100644 +--- a/src/dxvk/dxvk_cmdlist.h ++++ b/src/dxvk/dxvk_cmdlist.h +@@ -321,8 +321,8 @@ namespace dxvk { + m_signalTracker.add(signal, value); + } + +- void trackLatencyMarker(void *lfx2Frame, Rc timestampQuery, bool end) { +- m_lfx2Tracker.add(lfx2Frame, std::move(timestampQuery), end); ++ void trackLatencyMarker(Lfx2Frame lfx2Frame, Rc timestampQuery, bool end) { ++ m_lfx2Tracker.add(std::move(lfx2Frame), std::move(timestampQuery), end); + } + + /** +diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp +index a5f1cd9c..319b292d 100644 +--- a/src/dxvk/dxvk_context.cpp ++++ b/src/dxvk/dxvk_context.cpp +@@ -6444,8 +6444,39 @@ namespace dxvk { + this->beginCurrentCommands(); + } + +- void DxvkContext::trackLatencyMarker(void *frame, Rc timestampQuery, bool end) { +- m_cmd->trackLatencyMarker(frame, std::move(timestampQuery), end); ++ void DxvkContext::trackLatencyMarker(Lfx2Frame frame, Rc timestampQuery, bool end) { ++ m_cmd->trackLatencyMarker(std::move(frame), std::move(timestampQuery), end); ++ } ++ ++ void DxvkContext::tryBeginLfx2Frame(bool critical) { ++ if (m_type != DxvkContextType::Primary) ++ Logger::err("beginLfx2Frame should only be called on immediate contexts"); ++ if (!m_lfx2Frame) { ++ m_lfx2Frame = m_device->getImplicitLfx2Context()->DequeueFrame(critical); ++ if (m_lfx2Frame) { ++ auto query = m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ m_device->lfx2().MarkSection(m_lfx2Frame, ++ 800, lfx2MarkType::lfx2MarkTypeBegin, ++ m_device->lfx2().TimestampNow()); ++ writeTimestamp(query); ++ trackLatencyMarker(m_lfx2Frame, query, false); ++ } ++ } ++ } ++ ++ void DxvkContext::endLfx2Frame() { ++ if (m_type != DxvkContextType::Primary) ++ Logger::err("endLfx2Frame should only be called on immediate contexts"); ++ tryBeginLfx2Frame(true); ++ if (m_lfx2Frame) { ++ auto query = m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ m_device->lfx2().MarkSection(m_lfx2Frame, ++ 800, lfx2MarkType::lfx2MarkTypeEnd, ++ m_device->lfx2().TimestampNow()); ++ writeTimestamp(query); ++ trackLatencyMarker(m_lfx2Frame, query, true); ++ m_lfx2Frame = {}; ++ } + } + + } +diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h +index 1379cef6..ffd505ba 100644 +--- a/src/dxvk/dxvk_context.h ++++ b/src/dxvk/dxvk_context.h +@@ -67,6 +67,9 @@ namespace dxvk { + * \param [out] status Submission feedback + */ + void flushCommandList(DxvkSubmitStatus* status); ++ ++ void tryBeginLfx2Frame(bool critical); ++ void endLfx2Frame(); + + /** + * \brief Begins generating query data +@@ -1388,7 +1391,7 @@ namespace dxvk { + m_cmd->addStatCtr(counter, value); + } + +- void trackLatencyMarker(void *frame, Rc timestampQuery, bool end); ++ void trackLatencyMarker(Lfx2Frame frame, Rc timestampQuery, bool end); + + private: + +@@ -1434,6 +1437,8 @@ namespace dxvk { + std::array m_gpLookupCache = { }; + std::array m_cpLookupCache = { }; + ++ Lfx2Frame m_lfx2Frame = {}; ++ + void blitImageFb( + const Rc& dstImage, + const Rc& srcImage, +diff --git a/src/dxvk/dxvk_cs.cpp b/src/dxvk/dxvk_cs.cpp +index 5788ba82..fe1f4c3a 100644 +--- a/src/dxvk/dxvk_cs.cpp ++++ b/src/dxvk/dxvk_cs.cpp +@@ -175,7 +175,7 @@ namespace dxvk { + + for (auto& chunk : chunks) { + m_context->addStatCtr(DxvkStatCounter::CsChunkCount, 1); +- ++ m_context->tryBeginLfx2Frame(false); + chunk->executeAll(m_context.ptr()); + + // Use a separate mutex for the chunk counter, this +diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp +index 9a053791..d133930f 100644 +--- a/src/dxvk/dxvk_device.cpp ++++ b/src/dxvk/dxvk_device.cpp +@@ -14,6 +14,7 @@ namespace dxvk { + m_instance (instance), + m_adapter (adapter), + m_vkd (vkd), ++ m_lfx2ImplicitContext(&m_lfx2), + m_features (features), + m_properties (adapter->devicePropertiesExt()), + m_perfHints (getPerfHints()), +@@ -339,4 +340,8 @@ namespace dxvk { + m_recycledCommandLists.returnObject(cmdList); + } + ++ DxvkLfx2ImplicitContext *DxvkDevice::getImplicitLfx2Context() { ++ return &m_lfx2ImplicitContext; ++ } ++ + } +diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h +index 5b3a1f04..7fa12dc5 100644 +--- a/src/dxvk/dxvk_device.h ++++ b/src/dxvk/dxvk_device.h +@@ -99,7 +99,7 @@ namespace dxvk { + return m_vkd; + } + +- const DxvkLfx2 &lfx2() const { ++ const Lfx2Fn &lfx2() const { + return m_lfx2; + } + +@@ -538,6 +538,8 @@ namespace dxvk { + * used by the GPU can be safely destroyed. + */ + void waitForIdle(); ++ ++ DxvkLfx2ImplicitContext* getImplicitLfx2Context(); + + private: + +@@ -546,7 +548,8 @@ namespace dxvk { + Rc m_instance; + Rc m_adapter; + Rc m_vkd; +- DxvkLfx2 m_lfx2; ++ Lfx2Fn m_lfx2; ++ DxvkLfx2ImplicitContext m_lfx2ImplicitContext; + + DxvkDeviceFeatures m_features; + DxvkDeviceInfo m_properties; +diff --git a/src/dxvk/dxvk_gpu_query.cpp b/src/dxvk/dxvk_gpu_query.cpp +index aaec6a0f..5f4885f0 100644 +--- a/src/dxvk/dxvk_gpu_query.cpp ++++ b/src/dxvk/dxvk_gpu_query.cpp +@@ -340,7 +340,7 @@ namespace dxvk { + handle.queryId); + + cmd->cmdWriteTimestamp( +- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, ++ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + handle.queryPool, + handle.queryId); + +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index f56bcb51..aa844668 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -1,12 +1,14 @@ + #include "dxvk_lfx2.h" + ++#include ++ + #include "../util/util_time.h" + #include "dxvk_device.h" + #include "../util/util_win32_compat.h" + + namespace dxvk { + +- DxvkLfx2::DxvkLfx2() { ++ Lfx2Fn::Lfx2Fn() { + #ifdef _WIN32 + const auto lfxModuleName = "latencyflex2_rust.dll"; + #else +@@ -38,7 +40,7 @@ namespace dxvk { + #undef LOAD_PFN + } + +- DxvkLfx2::~DxvkLfx2() { ++ Lfx2Fn::~Lfx2Fn() { + if (m_lfxModule == nullptr) + return; + +@@ -47,16 +49,16 @@ namespace dxvk { + } + + template +- T DxvkLfx2::GetProcAddress(const char *name) { ++ T Lfx2Fn::GetProcAddress(const char *name) { + return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); + } + + DxvkLfx2Tracker::DxvkLfx2Tracker(DxvkDevice *device) : m_device(device) { + } + +- void DxvkLfx2Tracker::add(void *lfx2Frame, Rc query, bool end) { ++ void DxvkLfx2Tracker::add(Lfx2Frame lfx2Frame, Rc query, bool end) { + m_query[end] = std::move(query); +- m_frame_handle[end] = lfx2Frame; ++ m_frame_handle[end] = std::move(lfx2Frame); + } + + void DxvkLfx2Tracker::notify() { +@@ -94,10 +96,9 @@ namespace dxvk { + int64_t timestamp = hostNsTimestamp + (int64_t) (gpuTimestampDelta * + (double) m_device->adapter()->deviceProperties().limits.timestampPeriod); + +- m_device->lfx2().MarkSection(static_cast(m_frame_handle[i]), ++ m_device->lfx2().MarkSection(m_frame_handle[i], + 1000, i == 0 ? lfx2MarkType::lfx2MarkTypeBegin : lfx2MarkType::lfx2MarkTypeEnd, + timestamp); +- m_device->lfx2().FrameRelease(static_cast(m_frame_handle[i])); + } + } + } +@@ -108,8 +109,100 @@ namespace dxvk { + i = nullptr; + } + for (auto &i: m_frame_handle) { +- i = nullptr; ++ i = {}; ++ } ++ } ++ ++ DxvkLfx2ImplicitContext::DxvkLfx2ImplicitContext(Lfx2Fn *lfx2): m_lfx2(lfx2) { ++ } ++ ++ DxvkLfx2ImplicitContext::~DxvkLfx2ImplicitContext() { ++ std::lock_guard lock(m_mutex); ++ m_frames.clear(); ++ } ++ ++ void DxvkLfx2ImplicitContext::EnqueueFrame(Lfx2Frame frame) { ++ std::unique_lock lock(m_mutex, std::defer_lock); ++ if (m_needReset.load()) { ++ std::this_thread::sleep_for(std::chrono::milliseconds(200)); ++ lock.lock(); ++ Logger::info("Reset LFX2 context done"); ++ m_needReset.store(false); ++ m_frames.clear(); ++ } else { ++ lock.lock(); ++ } ++ m_frames.push_back(std::move(frame)); ++ if (m_frames.size() >= 16) { ++ Logger::info("Resetting LFX2 context: too many inflight frames"); ++ m_needReset.store(true); + } + } + ++ Lfx2Frame DxvkLfx2ImplicitContext::DequeueFrame(bool critical) { ++ if (m_needReset.load()) { ++ return {}; ++ } ++ std::lock_guard lock(m_mutex); ++ if (m_frames.empty()) { ++ if (critical) { ++ Logger::info("Resetting LFX2 context: no frames"); ++ m_needReset.store(true); ++ } ++ return {}; ++ } ++ Lfx2Frame frame = std::move(m_frames.front()); ++ m_frames.pop_front(); ++ return frame; ++ } ++ ++ void DxvkLfx2ImplicitContext::Reset() { ++ std::lock_guard lock(m_mutex); ++ Logger::info("Resetting LFX2 context: initiated by swapchain"); ++ m_needReset.store(true); ++ } ++ ++ Lfx2Frame::Lfx2Frame() { ++ ++ } ++ ++ Lfx2Frame::Lfx2Frame(const Lfx2Fn &lfx2, lfx2Frame *lfx2Frame) : m_lfx2(&lfx2), m_lfx2Frame(lfx2Frame) { ++ m_lfx2->FrameAddRef(m_lfx2Frame); ++ } ++ ++ Lfx2Frame::~Lfx2Frame() { ++ if (m_lfx2Frame != nullptr) ++ m_lfx2->FrameRelease(m_lfx2Frame); ++ } ++ ++ Lfx2Frame::Lfx2Frame(const Lfx2Frame &other): m_lfx2(other.m_lfx2), m_lfx2Frame(other.m_lfx2Frame) { ++ m_lfx2->FrameAddRef(m_lfx2Frame); ++ } ++ ++ Lfx2Frame::Lfx2Frame(Lfx2Frame &&other) noexcept : m_lfx2(other.m_lfx2), m_lfx2Frame(other.m_lfx2Frame) { ++ other.m_lfx2Frame = nullptr; ++ } ++ ++ Lfx2Frame &Lfx2Frame::operator=(const Lfx2Frame &other) { ++ if (this != &other) { ++ if (m_lfx2Frame != nullptr) ++ m_lfx2->FrameRelease(m_lfx2Frame); ++ ++ m_lfx2 = other.m_lfx2; ++ m_lfx2Frame = other.m_lfx2Frame; ++ m_lfx2->FrameAddRef(m_lfx2Frame); ++ } ++ ++ return *this; ++ } ++ ++ Lfx2Frame &Lfx2Frame::operator=(Lfx2Frame &&other) noexcept { ++ if (m_lfx2Frame != nullptr) ++ m_lfx2->FrameRelease(m_lfx2Frame); ++ ++ m_lfx2 = other.m_lfx2; ++ m_lfx2Frame = other.m_lfx2Frame; ++ other.m_lfx2Frame = nullptr; ++ return *this; ++ } + } // dxvk +\ No newline at end of file +diff --git a/src/dxvk/dxvk_lfx2.h b/src/dxvk/dxvk_lfx2.h +index 1f5c5b5f..809234dc 100644 +--- a/src/dxvk/dxvk_lfx2.h ++++ b/src/dxvk/dxvk_lfx2.h +@@ -1,14 +1,15 @@ + #pragma once + ++#include + #include "dxvk_gpu_query.h" + #include "latencyflex2.h" + + namespace dxvk { + +- class DxvkLfx2 { ++ class Lfx2Fn { + public: +- DxvkLfx2(); +- virtual ~DxvkLfx2(); ++ Lfx2Fn(); ++ virtual ~Lfx2Fn(); + + #define DECLARE_PFN(x) \ + decltype(&::lfx2##x) x {} +@@ -35,17 +36,50 @@ namespace dxvk { + HMODULE m_lfxModule{}; + }; + ++ class Lfx2Frame { ++ public: ++ Lfx2Frame(); ++ Lfx2Frame(const Lfx2Fn &lfx2, lfx2Frame *lfx2Frame); ++ Lfx2Frame(const Lfx2Frame &other); ++ Lfx2Frame(Lfx2Frame &&other) noexcept; ++ ~Lfx2Frame(); ++ ++ Lfx2Frame& operator=(const Lfx2Frame &other); ++ Lfx2Frame& operator=(Lfx2Frame &&other) noexcept; ++ ++ operator lfx2Frame *() const { return m_lfx2Frame; } ++ ++ private: ++ const Lfx2Fn *m_lfx2{}; ++ lfx2Frame *m_lfx2Frame{}; ++ }; ++ ++ class DxvkLfx2ImplicitContext { ++ public: ++ explicit DxvkLfx2ImplicitContext(Lfx2Fn *lfx2); ++ ~DxvkLfx2ImplicitContext(); ++ void EnqueueFrame(Lfx2Frame frame); ++ Lfx2Frame DequeueFrame(bool critical); ++ void Reset(); ++ ++ private: ++ Lfx2Fn *m_lfx2; ++ std::mutex m_mutex; ++ std::deque m_frames; ++ std::atomic_bool m_needReset = false; ++ }; ++ + class DxvkLfx2Tracker { + public: + explicit DxvkLfx2Tracker(DxvkDevice *device); +- void add(void *lfx2Frame, Rc query, bool end); ++ void add(Lfx2Frame frame, Rc query, bool end); + void reset(); + void notify(); + + private: + DxvkDevice *m_device; + Rc m_query[2]{}; +- void *m_frame_handle[2]{}; ++ Lfx2Frame m_frame_handle[2]{}; + }; + + } // dxvk +\ No newline at end of file +-- +2.43.0 + + +From b9171a2ff124c6bee91b4f36c0b0d9c1cd08e69c Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Sat, 7 Jan 2023 12:58:36 +0900 +Subject: [PATCH 07/12] Update to new LFX2 implicit context API + +--- + include/latencyflex2.h | 68 +++++++++++++++++++++++------------ + src/d3d11/d3d11_device.cpp | 4 +-- + src/d3d11/d3d11_device.h | 2 +- + src/d3d11/d3d11_interfaces.h | 2 +- + src/d3d11/d3d11_swapchain.cpp | 2 +- + src/dxvk/dxvk_context.cpp | 2 +- + src/dxvk/dxvk_lfx2.cpp | 57 +++++++++-------------------- + src/dxvk/dxvk_lfx2.h | 15 ++++---- + 8 files changed, 77 insertions(+), 75 deletions(-) + +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +index 6978e2af..9de7a1cf 100644 +--- a/include/latencyflex2.h ++++ b/include/latencyflex2.h +@@ -1,12 +1,11 @@ + #ifndef LATENCYFLEX2_H + #define LATENCYFLEX2_H + +-#include +-#include +-#include +-#include +-#include +-#include ++#include ++#include ++#include ++#include ++#include + #ifdef _WIN32 + #define LFX2_API __declspec(dllimport) + #else +@@ -14,24 +13,29 @@ + #endif + + +-enum class lfx2MarkType { ++typedef enum lfx2MarkType { + lfx2MarkTypeBegin, + lfx2MarkTypeEnd, +-}; ++} lfx2MarkType; + +-struct lfx2Context; ++typedef struct lfx2Context lfx2Context; + +-/// A write handle for frame markers. +-struct lfx2Frame; ++/** ++ * A write handle for frame markers. ++ */ ++typedef struct lfx2Frame lfx2Frame; + +-using lfx2Timestamp = uint64_t; ++typedef struct lfx2ImplicitContext lfx2ImplicitContext; + +-using lfx2SectionId = uint32_t; ++typedef uint64_t lfx2Timestamp; + ++typedef uint32_t lfx2SectionId; + ++#ifdef __cplusplus + extern "C" { ++#endif // __cplusplus + +-LFX2_API lfx2Timestamp lfx2TimestampNow(); ++LFX2_API lfx2Timestamp lfx2TimestampNow(void); + + #if defined(_WIN32) + LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); +@@ -39,24 +43,42 @@ LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); + + LFX2_API void lfx2SleepUntil(lfx2Timestamp target); + +-LFX2_API lfx2Context *lfx2ContextCreate(); ++LFX2_API struct lfx2Context *lfx2ContextCreate(void); + +-LFX2_API void lfx2ContextAddRef(lfx2Context *context); ++LFX2_API void lfx2ContextAddRef(struct lfx2Context *context); + +-LFX2_API void lfx2ContextRelease(lfx2Context *context); ++LFX2_API void lfx2ContextRelease(struct lfx2Context *context); + +-LFX2_API lfx2Frame *lfx2FrameCreate(lfx2Context *context, lfx2Timestamp *out_timestamp); ++LFX2_API ++struct lfx2Frame *lfx2FrameCreate(struct lfx2Context *context, ++ lfx2Timestamp *out_timestamp); + +-LFX2_API void lfx2FrameAddRef(lfx2Frame *frame); ++LFX2_API void lfx2FrameAddRef(struct lfx2Frame *frame); + +-LFX2_API void lfx2FrameRelease(lfx2Frame *frame); ++LFX2_API void lfx2FrameRelease(struct lfx2Frame *frame); + + LFX2_API +-void lfx2MarkSection(lfx2Frame *frame, ++void lfx2MarkSection(struct lfx2Frame *frame, + lfx2SectionId section_id, +- lfx2MarkType mark_type, ++ enum lfx2MarkType mark_type, + lfx2Timestamp timestamp); + ++LFX2_API struct lfx2ImplicitContext *lfx2ImplicitContextCreate(void); ++ ++LFX2_API void lfx2ImplicitContextRelease(struct lfx2ImplicitContext *context); ++ ++LFX2_API void lfx2ImplicitContextReset(struct lfx2ImplicitContext *context); ++ ++LFX2_API ++void lfx2FrameCreateImplicit(struct lfx2ImplicitContext *context, ++ lfx2Timestamp *out_timestamp); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameDequeueImplicit(struct lfx2ImplicitContext *context, ++ bool critical); ++ ++#ifdef __cplusplus + } // extern "C" ++#endif // __cplusplus + +-#endif // LATENCYFLEX2_H ++#endif /* LATENCYFLEX2_H */ +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index 0353109d..0bee3d3d 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -2784,8 +2784,8 @@ namespace dxvk { + return static_cast(got->second); + } + +- void D3D11DeviceExt::EnqueueImplicitFrameLFX2(void *frame) { +- m_device->GetDXVKDevice()->getImplicitLfx2Context()->EnqueueFrame(Lfx2Frame(m_device->GetDXVKDevice()->lfx2(), reinterpret_cast(frame))); ++ void *D3D11DeviceExt::GetImplicitContextLFX2() { ++ return m_device->GetDXVKDevice()->getImplicitLfx2Context()->context(); + } + + D3D11VideoDevice::D3D11VideoDevice( +diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h +index 129353be..ab4763ed 100644 +--- a/src/d3d11/d3d11_device.h ++++ b/src/d3d11/d3d11_device.h +@@ -562,7 +562,7 @@ namespace dxvk { + ID3D11SamplerState** ppSamplerState, + uint32_t* pDriverHandle); + +- void STDMETHODCALLTYPE EnqueueImplicitFrameLFX2(void *lfx2Frame) override; ++ void* STDMETHODCALLTYPE GetImplicitContextLFX2(); + + private: + +diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h +index f7b8e6d2..b6d83e54 100644 +--- a/src/d3d11/d3d11_interfaces.h ++++ b/src/d3d11/d3d11_interfaces.h +@@ -118,7 +118,7 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { + MIDL_INTERFACE("a1a5185c-0c43-4608-91a0-97a0cd098d48") + ID3D11VkExtDevice2 : public ID3D11VkExtDevice1 { + +- virtual void STDMETHODCALLTYPE EnqueueImplicitFrameLFX2(void *lfx2Frame) = 0; ++ virtual void* STDMETHODCALLTYPE GetImplicitContextLFX2() = 0; + + }; + +diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp +index f37486bc..4bae8609 100644 +--- a/src/d3d11/d3d11_swapchain.cpp ++++ b/src/d3d11/d3d11_swapchain.cpp +@@ -450,7 +450,7 @@ namespace dxvk { + // Ensure that we can safely destroy the swap chain + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); +- m_device->getImplicitLfx2Context()->Reset(); ++ m_device->getImplicitLfx2Context()->reset(); + + m_presentStatus.result = VK_SUCCESS; + m_dirtyHdrMetadata = true; +diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp +index 319b292d..b6fdbe93 100644 +--- a/src/dxvk/dxvk_context.cpp ++++ b/src/dxvk/dxvk_context.cpp +@@ -6452,7 +6452,7 @@ namespace dxvk { + if (m_type != DxvkContextType::Primary) + Logger::err("beginLfx2Frame should only be called on immediate contexts"); + if (!m_lfx2Frame) { +- m_lfx2Frame = m_device->getImplicitLfx2Context()->DequeueFrame(critical); ++ m_lfx2Frame = m_device->getImplicitLfx2Context()->dequeueFrame(critical); + if (m_lfx2Frame) { + auto query = m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); + m_device->lfx2().MarkSection(m_lfx2Frame, +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index aa844668..2caa8983 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -36,6 +36,11 @@ namespace dxvk { + #ifdef _WIN32 + LOAD_PFN(TimestampFromQpc); + #endif ++ LOAD_PFN(ImplicitContextCreate); ++ LOAD_PFN(ImplicitContextRelease); ++ LOAD_PFN(ImplicitContextReset); ++ LOAD_PFN(FrameCreateImplicit); ++ LOAD_PFN(FrameDequeueImplicit); + + #undef LOAD_PFN + } +@@ -114,52 +119,23 @@ namespace dxvk { + } + + DxvkLfx2ImplicitContext::DxvkLfx2ImplicitContext(Lfx2Fn *lfx2): m_lfx2(lfx2) { ++ m_context = m_lfx2->ImplicitContextCreate(); + } + + DxvkLfx2ImplicitContext::~DxvkLfx2ImplicitContext() { +- std::lock_guard lock(m_mutex); +- m_frames.clear(); +- } +- +- void DxvkLfx2ImplicitContext::EnqueueFrame(Lfx2Frame frame) { +- std::unique_lock lock(m_mutex, std::defer_lock); +- if (m_needReset.load()) { +- std::this_thread::sleep_for(std::chrono::milliseconds(200)); +- lock.lock(); +- Logger::info("Reset LFX2 context done"); +- m_needReset.store(false); +- m_frames.clear(); +- } else { +- lock.lock(); +- } +- m_frames.push_back(std::move(frame)); +- if (m_frames.size() >= 16) { +- Logger::info("Resetting LFX2 context: too many inflight frames"); +- m_needReset.store(true); +- } ++ m_lfx2->ImplicitContextRelease(m_context); + } + +- Lfx2Frame DxvkLfx2ImplicitContext::DequeueFrame(bool critical) { +- if (m_needReset.load()) { +- return {}; +- } +- std::lock_guard lock(m_mutex); +- if (m_frames.empty()) { +- if (critical) { +- Logger::info("Resetting LFX2 context: no frames"); +- m_needReset.store(true); +- } +- return {}; +- } +- Lfx2Frame frame = std::move(m_frames.front()); +- m_frames.pop_front(); +- return frame; ++ Lfx2Frame DxvkLfx2ImplicitContext::dequeueFrame(bool critical) { ++ lfx2Frame *frame = m_lfx2->FrameDequeueImplicit(m_context, critical); ++ Lfx2Frame wrapper(*m_lfx2, frame); ++ if (frame) ++ m_lfx2->FrameRelease(frame); ++ return wrapper; + } + +- void DxvkLfx2ImplicitContext::Reset() { +- std::lock_guard lock(m_mutex); +- Logger::info("Resetting LFX2 context: initiated by swapchain"); +- m_needReset.store(true); ++ void DxvkLfx2ImplicitContext::reset() { ++ m_lfx2->ImplicitContextReset(m_context); + } + + Lfx2Frame::Lfx2Frame() { +@@ -167,7 +143,8 @@ namespace dxvk { + } + + Lfx2Frame::Lfx2Frame(const Lfx2Fn &lfx2, lfx2Frame *lfx2Frame) : m_lfx2(&lfx2), m_lfx2Frame(lfx2Frame) { +- m_lfx2->FrameAddRef(m_lfx2Frame); ++ if (m_lfx2Frame) ++ m_lfx2->FrameAddRef(m_lfx2Frame); + } + + Lfx2Frame::~Lfx2Frame() { +diff --git a/src/dxvk/dxvk_lfx2.h b/src/dxvk/dxvk_lfx2.h +index 809234dc..96c7931b 100644 +--- a/src/dxvk/dxvk_lfx2.h ++++ b/src/dxvk/dxvk_lfx2.h +@@ -26,6 +26,11 @@ namespace dxvk { + #ifdef _WIN32 + DECLARE_PFN(TimestampFromQpc); + #endif ++ DECLARE_PFN(ImplicitContextCreate); ++ DECLARE_PFN(ImplicitContextRelease); ++ DECLARE_PFN(ImplicitContextReset); ++ DECLARE_PFN(FrameCreateImplicit); ++ DECLARE_PFN(FrameDequeueImplicit); + + #undef DECLARE_PFN + +@@ -58,15 +63,13 @@ namespace dxvk { + public: + explicit DxvkLfx2ImplicitContext(Lfx2Fn *lfx2); + ~DxvkLfx2ImplicitContext(); +- void EnqueueFrame(Lfx2Frame frame); +- Lfx2Frame DequeueFrame(bool critical); +- void Reset(); ++ lfx2ImplicitContext *context() const { return m_context; } ++ Lfx2Frame dequeueFrame(bool critical); ++ void reset(); + + private: + Lfx2Fn *m_lfx2; +- std::mutex m_mutex; +- std::deque m_frames; +- std::atomic_bool m_needReset = false; ++ lfx2ImplicitContext *m_context; + }; + + class DxvkLfx2Tracker { +-- +2.43.0 + + +From 52ccfa0364dc2e61debeccf6df3d4f1dcb0e7d24 Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Sun, 8 Jan 2023 19:09:59 +0900 +Subject: [PATCH 08/12] Convert to new unified ID3DLfx2ExtDevice API + +--- + include/latencyflex2.h | 46 +++++++++++++++++++++++-- + src/d3d11/d3d11_context.cpp | 3 +- + src/d3d11/d3d11_context_ext.cpp | 28 ---------------- + src/d3d11/d3d11_context_ext.h | 6 +--- + src/d3d11/d3d11_context_imm.h | 1 + + src/d3d11/d3d11_device.cpp | 59 +++++++++++++++++++++++++++++---- + src/d3d11/d3d11_device.h | 29 ++++++++++++++-- + src/d3d11/d3d11_interfaces.h | 25 +++++--------- + 8 files changed, 134 insertions(+), 63 deletions(-) + +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +index 9de7a1cf..91ab1397 100644 +--- a/include/latencyflex2.h ++++ b/include/latencyflex2.h +@@ -6,12 +6,24 @@ + #include + #include + #include ++#ifdef LFX2_DX12 ++#include ++#endif ++ + #ifdef _WIN32 + #define LFX2_API __declspec(dllimport) + #else + #define LFX2_API + #endif + ++#ifdef LFX2_DX12 ++typedef struct lfx2Dx12SubmitAux { ++ ID3D12GraphicsCommandList* executeBefore; ++ ID3D12GraphicsCommandList* executeAfter; ++ ID3D12Fence* fence; ++ uint64_t fenceValue; ++} lfx2Dx12SubmitAux; ++#endif + + typedef enum lfx2MarkType { + lfx2MarkTypeBegin, +@@ -20,6 +32,10 @@ typedef enum lfx2MarkType { + + typedef struct lfx2Context lfx2Context; + ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++typedef struct lfx2Dx12Context lfx2Dx12Context; ++#endif ++ + /** + * A write handle for frame markers. + */ +@@ -35,6 +51,32 @@ typedef uint32_t lfx2SectionId; + extern "C" { + #endif // __cplusplus + ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++LFX2_API struct lfx2Dx12Context *lfx2Dx12ContextCreate(ID3D12Device* device); ++#endif ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++LFX2_API void lfx2Dx12ContextAddRef(struct lfx2Dx12Context *context); ++#endif ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++LFX2_API void lfx2Dx12ContextRelease(struct lfx2Dx12Context *context); ++#endif ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++LFX2_API ++lfx2Dx12SubmitAux lfx2Dx12ContextBeforeSubmit(struct lfx2Dx12Context *context, ++ ID3D12CommandQueue* queue); ++#endif ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++LFX2_API void lfx2Dx12ContextBeginFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++#endif ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++LFX2_API void lfx2Dx12ContextEndFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++#endif ++ + LFX2_API lfx2Timestamp lfx2TimestampNow(void); + + #if defined(_WIN32) +@@ -70,8 +112,8 @@ LFX2_API void lfx2ImplicitContextRelease(struct lfx2ImplicitContext *context); + LFX2_API void lfx2ImplicitContextReset(struct lfx2ImplicitContext *context); + + LFX2_API +-void lfx2FrameCreateImplicit(struct lfx2ImplicitContext *context, +- lfx2Timestamp *out_timestamp); ++struct lfx2Frame *lfx2FrameCreateImplicit(struct lfx2ImplicitContext *context, ++ lfx2Timestamp *out_timestamp); + + LFX2_API + struct lfx2Frame *lfx2FrameDequeueImplicit(struct lfx2ImplicitContext *context, +diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp +index fd72258a..4b06ae38 100644 +--- a/src/d3d11/d3d11_context.cpp ++++ b/src/d3d11/d3d11_context.cpp +@@ -50,8 +50,7 @@ namespace dxvk { + } + + if (riid == __uuidof(ID3D11VkExtContext) +- || riid == __uuidof(ID3D11VkExtContext1) +- || riid == __uuidof(ID3D11VkExtContext2)) { ++ || riid == __uuidof(ID3D11VkExtContext1)) { + *ppvObject = ref(&m_contextExt); + return S_OK; + } +diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp +index 3e33f2dc..6512656a 100644 +--- a/src/d3d11/d3d11_context_ext.cpp ++++ b/src/d3d11/d3d11_context_ext.cpp +@@ -215,34 +215,6 @@ namespace dxvk { + return true; + } + +- template +- bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderStartLFX2(void *frame) { +- auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); +- auto frameWrapper = Lfx2Frame(m_ctx->m_device->lfx2(), static_cast(frame)); +- +- m_ctx->EmitCs([query, cDevice = m_ctx->m_device, frameWrapper] (DxvkContext* ctx) { +- auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); +- ctx->writeTimestamp(query); +- ctx->trackLatencyMarker(frameWrapper, query, false); +- }); +- return true; +- } +- +- template +- bool STDMETHODCALLTYPE D3D11DeviceContextExt::MarkRenderEndLFX2(void *frame) { +- auto query = m_ctx->m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); +- auto frameWrapper = Lfx2Frame(m_ctx->m_device->lfx2(), static_cast(frame)); +- +- m_ctx->EmitCs([query, cDevice = m_ctx->m_device, frameWrapper] (DxvkContext* ctx) { +- auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); +- ctx->writeTimestamp(query); +- ctx->trackLatencyMarker(frameWrapper, query, true); +- }); +- return true; +- } +- + template class D3D11DeviceContextExt; + template class D3D11DeviceContextExt; + +diff --git a/src/d3d11/d3d11_context_ext.h b/src/d3d11/d3d11_context_ext.h +index 3b17d1f5..6b95dcf9 100644 +--- a/src/d3d11/d3d11_context_ext.h ++++ b/src/d3d11/d3d11_context_ext.h +@@ -8,7 +8,7 @@ namespace dxvk { + class D3D11ImmediateContext; + + template +- class D3D11DeviceContextExt : public ID3D11VkExtContext2 { ++ class D3D11DeviceContextExt : public ID3D11VkExtContext1 { + + public: + +@@ -71,10 +71,6 @@ namespace dxvk { + void* const* pWriteResources, + uint32_t NumWriteResources); + +- bool STDMETHODCALLTYPE MarkRenderStartLFX2(void *lfx2Frame); +- +- bool STDMETHODCALLTYPE MarkRenderEndLFX2(void *lfx2Frame); +- + private: + + ContextType* m_ctx; +diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h +index ffd2c703..f5b105e8 100644 +--- a/src/d3d11/d3d11_context_imm.h ++++ b/src/d3d11/d3d11_context_imm.h +@@ -18,6 +18,7 @@ namespace dxvk { + friend class D3D11SwapChain; + friend class D3D11VideoContext; + friend class D3D11DXGIKeyedMutex; ++ friend class D3D11DeviceLfx2Ext; + public: + + D3D11ImmediateContext( +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index 0bee3d3d..a11b8835 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -2784,10 +2784,6 @@ namespace dxvk { + return static_cast(got->second); + } + +- void *D3D11DeviceExt::GetImplicitContextLFX2() { +- return m_device->GetDXVKDevice()->getImplicitLfx2Context()->context(); +- } +- + D3D11VideoDevice::D3D11VideoDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice) +@@ -3087,6 +3083,7 @@ namespace dxvk { + m_dxvkDevice (pDxvkDevice), + m_d3d11Device (this, FeatureLevel, FeatureFlags), + m_d3d11DeviceExt(this, &m_d3d11Device), ++ m_d3d11DeviceLfx2Ext(this, m_d3d11Device.m_context.ptr()), + m_d3d11Interop (this, &m_d3d11Device), + m_d3d11Video (this, &m_d3d11Device), + m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), +@@ -3141,11 +3138,15 @@ namespace dxvk { + } + + if (riid == __uuidof(ID3D11VkExtDevice) +- || riid == __uuidof(ID3D11VkExtDevice1) +- || riid == __uuidof(ID3D11VkExtDevice2)) { ++ || riid == __uuidof(ID3D11VkExtDevice1)) { + *ppvObject = ref(&m_d3d11DeviceExt); + return S_OK; + } ++ ++ if (riid == __uuidof(ID3DLfx2ExtDevice)) { ++ *ppvObject = ref(&m_d3d11DeviceLfx2Ext); ++ return S_OK; ++ } + + if (riid == __uuidof(IDXGIDXVKDevice)) { + *ppvObject = ref(&m_metaDevice); +@@ -3432,4 +3433,50 @@ namespace dxvk { + return m_dxvkDevice; + } + ++ D3D11DeviceLfx2Ext::D3D11DeviceLfx2Ext( ++ D3D11DXGIDevice* pContainer, ++ D3D11ImmediateContext* pImmediateContext) ++ : m_container(pContainer), m_immediateContext(pImmediateContext), m_dxvkDevice(pContainer->GetDXVKDevice()) { ++ ++ } ++ ++ HRESULT STDMETHODCALLTYPE D3D11DeviceLfx2Ext::QueryInterface(const IID &riid, void **ppvObject) { ++ return m_container->QueryInterface(riid, ppvObject); ++ } ++ ++ ULONG STDMETHODCALLTYPE D3D11DeviceLfx2Ext::AddRef() { ++ return m_container->AddRef(); ++ } ++ ++ ULONG STDMETHODCALLTYPE D3D11DeviceLfx2Ext::Release() { ++ return m_container->Release(); ++ } ++ ++ void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::ImplicitBeginFrame(uint64_t *outTimestamp, void *outFrame) { ++ *(lfx2Frame **)outFrame = m_dxvkDevice->lfx2().FrameCreateImplicit(m_dxvkDevice->getImplicitLfx2Context()->context(), outTimestamp); ++ } ++ ++ void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::MarkRenderStart(void *frame) { ++ auto query = m_dxvkDevice->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); ++ ++ m_immediateContext->EmitCs([query, cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { ++ auto &cLfx2 = cDevice->lfx2(); ++ cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); ++ ctx->writeTimestamp(query); ++ ctx->trackLatencyMarker(frameWrapper, query, false); ++ }); ++ } ++ ++ void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::MarkRenderEnd(void *frame) { ++ auto query = m_dxvkDevice->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); ++ auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); ++ ++ m_immediateContext->EmitCs([query, cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { ++ auto &cLfx2 = cDevice->lfx2(); ++ cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); ++ ctx->writeTimestamp(query); ++ ctx->trackLatencyMarker(frameWrapper, query, true); ++ }); ++ } + } +diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h +index ab4763ed..d0d6a285 100644 +--- a/src/d3d11/d3d11_device.h ++++ b/src/d3d11/d3d11_device.h +@@ -48,6 +48,7 @@ namespace dxvk { + * as part of a \ref D3D11DeviceContainer. + */ + class D3D11Device final : public ID3D11Device5 { ++ friend class D3D11DXGIDevice; + /// Maximum number of resource init commands per command buffer + constexpr static uint64_t InitCommandThreshold = 50; + public: +@@ -507,7 +508,7 @@ namespace dxvk { + /** + * \brief Extended D3D11 device + */ +- class D3D11DeviceExt : public ID3D11VkExtDevice2 { ++ class D3D11DeviceExt : public ID3D11VkExtDevice1 { + + public: + +@@ -562,8 +563,6 @@ namespace dxvk { + ID3D11SamplerState** ppSamplerState, + uint32_t* pDriverHandle); + +- void* STDMETHODCALLTYPE GetImplicitContextLFX2(); +- + private: + + D3D11DXGIDevice* m_container; +@@ -698,6 +697,29 @@ namespace dxvk { + + }; + ++ class D3D11DeviceLfx2Ext : public ID3DLfx2ExtDevice { ++ public: ++ D3D11DeviceLfx2Ext(D3D11DXGIDevice *pContainer, ++ D3D11ImmediateContext *pImmediateContext); ++ ++ HRESULT STDMETHODCALLTYPE QueryInterface(const IID &riid, void **ppvObject); ++ ++ ULONG STDMETHODCALLTYPE AddRef(); ++ ++ ULONG STDMETHODCALLTYPE Release(); ++ ++ void STDMETHODCALLTYPE ImplicitBeginFrame(uint64_t *outTimestamp, void *outFrame); ++ ++ void STDMETHODCALLTYPE MarkRenderStart(void *frame); ++ ++ void STDMETHODCALLTYPE MarkRenderEnd(void *frame); ++ ++ private: ++ D3D11DXGIDevice *m_container; ++ D3D11ImmediateContext *m_immediateContext; ++ const Rc m_dxvkDevice; ++ }; ++ + + /** + * \brief DXVK swap chain factory +@@ -860,6 +882,7 @@ namespace dxvk { + + D3D11Device m_d3d11Device; + D3D11DeviceExt m_d3d11DeviceExt; ++ D3D11DeviceLfx2Ext m_d3d11DeviceLfx2Ext; + D3D11VkInterop m_d3d11Interop; + D3D11VideoDevice m_d3d11Video; + D3D11on12Device m_d3d11on12; +diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h +index b6d83e54..35bbedf0 100644 +--- a/src/d3d11/d3d11_interfaces.h ++++ b/src/d3d11/d3d11_interfaces.h +@@ -115,10 +115,14 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { + uint32_t* pCudaTextureHandle) = 0; + }; + +-MIDL_INTERFACE("a1a5185c-0c43-4608-91a0-97a0cd098d48") +-ID3D11VkExtDevice2 : public ID3D11VkExtDevice1 { ++MIDL_INTERFACE("851a9f0f-5da0-4850-b563-a7bbc414f4e6") ++ID3DLfx2ExtDevice : public IUnknown { + +- virtual void* STDMETHODCALLTYPE GetImplicitContextLFX2() = 0; ++ virtual void STDMETHODCALLTYPE MarkRenderStart(void *frame) = 0; ++ ++ virtual void STDMETHODCALLTYPE MarkRenderEnd(void *frame) = 0; ++ ++ virtual void STDMETHODCALLTYPE ImplicitBeginFrame(uint64_t *outTimestamp, void *outFrame) = 0; + + }; + +@@ -190,24 +194,11 @@ ID3D11VkExtContext1 : public ID3D11VkExtContext { + uint32_t numWriteResources) = 0; + }; + +-MIDL_INTERFACE("6e65f21a-0ecd-4e76-8faf-9e9afa4093a4") +-ID3D11VkExtContext2 : public ID3D11VkExtContext1 { +- +- virtual bool STDMETHODCALLTYPE MarkRenderStartLFX2( +- void* lfx2Frame) = 0; +- +- virtual bool STDMETHODCALLTYPE MarkRenderEndLFX2( +- void* lfx2Frame) = 0; +- +-}; +- +- + #ifndef _MSC_VER + __CRT_UUID_DECL(ID3D11VkExtShader, 0xbb8a4fb9,0x3935,0x4762,0xb4,0x4b,0x35,0x18,0x9a,0x26,0x41,0x4a); + __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17); + __CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); +-__CRT_UUID_DECL(ID3D11VkExtDevice2, 0xa1a5185c,0x0c43,0x4608,0x91,0xa0,0x97,0xa0,0xcd,0x09,0x8d,0x48); ++__CRT_UUID_DECL(ID3DLfx2ExtDevice, 0x851a9f0f,0x5da0,0x4850,0xb5,0x63,0xa7,0xbb,0xc4,0x14,0xf4,0xe6); + __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); + __CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); +-__CRT_UUID_DECL(ID3D11VkExtContext2, 0x6e65f21a,0x0ecd,0x4e76,0x8f,0xaf,0x9e,0x9a,0xfa,0x40,0x93,0xa4); + #endif +-- +2.43.0 + + +From 8a374ead0982bdf5e3d466979e5ba436fc7e1cff Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Fri, 13 Jan 2023 18:56:54 +0900 +Subject: [PATCH 09/12] Don't call FreeLibrary within unload callback + +In attempt to fix shutdown hang. +--- + src/dxvk/dxvk_lfx2.cpp | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index 2caa8983..4d5298db 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -49,6 +49,10 @@ namespace dxvk { + if (m_lfxModule == nullptr) + return; + ++ // Calling FreeLibrary deadlocks if called from DllMain. ++ if (this_thread::isInModuleDetachment()) ++ return; ++ + ::FreeLibrary(m_lfxModule); + m_lfxModule = nullptr; + } +-- +2.43.0 + + +From 1437bedd1b98814040ad3356d3272654f8c1516d Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Sun, 15 Jan 2023 12:51:34 +0900 +Subject: [PATCH 10/12] Port to new LFX2 Vulkan Context API + +--- + include/latencyflex2.h | 73 +++++++++++++++++++++++---------- + src/d3d11/d3d11_context_imm.cpp | 2 +- + src/d3d11/d3d11_device.cpp | 15 +++---- + src/dxvk/dxvk_cmdlist.cpp | 15 +++++-- + src/dxvk/dxvk_cmdlist.h | 12 +++--- + src/dxvk/dxvk_context.cpp | 27 ++++-------- + src/dxvk/dxvk_context.h | 2 - + src/dxvk/dxvk_device.cpp | 7 ++++ + src/dxvk/dxvk_device.h | 2 + + src/dxvk/dxvk_lfx2.cpp | 66 +++-------------------------- + src/dxvk/dxvk_lfx2.h | 19 +++------ + 11 files changed, 106 insertions(+), 134 deletions(-) + +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +index 91ab1397..9e8d1536 100644 +--- a/include/latencyflex2.h ++++ b/include/latencyflex2.h +@@ -1,11 +1,17 @@ + #ifndef LATENCYFLEX2_H + #define LATENCYFLEX2_H + ++#define LFX2_VK ++ + #include + #include + #include + #include + #include ++#ifdef LFX2_VK ++#include ++#endif ++ + #ifdef LFX2_DX12 + #include + #endif +@@ -16,15 +22,6 @@ + #define LFX2_API + #endif + +-#ifdef LFX2_DX12 +-typedef struct lfx2Dx12SubmitAux { +- ID3D12GraphicsCommandList* executeBefore; +- ID3D12GraphicsCommandList* executeAfter; +- ID3D12Fence* fence; +- uint64_t fenceValue; +-} lfx2Dx12SubmitAux; +-#endif +- + typedef enum lfx2MarkType { + lfx2MarkTypeBegin, + lfx2MarkTypeEnd, +@@ -43,37 +40,49 @@ typedef struct lfx2Frame lfx2Frame; + + typedef struct lfx2ImplicitContext lfx2ImplicitContext; + ++#if defined(LFX2_VK) ++typedef struct lfx2VulkanContext lfx2VulkanContext; ++#endif ++ ++#if (defined(LFX2_DX12) && defined(_WIN32)) ++typedef struct lfx2Dx12SubmitAux { ++ ID3D12GraphicsCommandList* execute_before; ++ ID3D12GraphicsCommandList* execute_after; ++ ID3D12Fence* signal_fence; ++ uint64_t signal_fence_value; ++} lfx2Dx12SubmitAux; ++#endif ++ + typedef uint64_t lfx2Timestamp; + + typedef uint32_t lfx2SectionId; + ++#if defined(LFX2_VK) ++typedef struct lfx2VulkanSubmitAux { ++ VkCommandBuffer submit_before; ++ VkCommandBuffer submit_after; ++ VkSemaphore signal_sem; ++ uint64_t signal_sem_value; ++} lfx2VulkanSubmitAux; ++#endif ++ + #ifdef __cplusplus + extern "C" { + #endif // __cplusplus + + #if (defined(LFX2_DX12) && defined(_WIN32)) + LFX2_API struct lfx2Dx12Context *lfx2Dx12ContextCreate(ID3D12Device* device); +-#endif + +-#if (defined(LFX2_DX12) && defined(_WIN32)) + LFX2_API void lfx2Dx12ContextAddRef(struct lfx2Dx12Context *context); +-#endif + +-#if (defined(LFX2_DX12) && defined(_WIN32)) + LFX2_API void lfx2Dx12ContextRelease(struct lfx2Dx12Context *context); +-#endif + +-#if (defined(LFX2_DX12) && defined(_WIN32)) + LFX2_API +-lfx2Dx12SubmitAux lfx2Dx12ContextBeforeSubmit(struct lfx2Dx12Context *context, +- ID3D12CommandQueue* queue); +-#endif ++struct lfx2Dx12SubmitAux lfx2Dx12ContextBeforeSubmit(struct lfx2Dx12Context *context, ++ ID3D12CommandQueue* queue); + +-#if (defined(LFX2_DX12) && defined(_WIN32)) + LFX2_API void lfx2Dx12ContextBeginFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); +-#endif + +-#if (defined(LFX2_DX12) && defined(_WIN32)) + LFX2_API void lfx2Dx12ContextEndFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); + #endif + +@@ -119,6 +128,28 @@ LFX2_API + struct lfx2Frame *lfx2FrameDequeueImplicit(struct lfx2ImplicitContext *context, + bool critical); + ++#if defined(LFX2_VK) ++LFX2_API ++struct lfx2VulkanContext *lfx2VulkanContextCreate(PFN_vkGetInstanceProcAddr gipa, ++ VkInstance instance, ++ VkPhysicalDevice physical_device, ++ VkDevice device, ++ uint32_t queue_family_index); ++ ++LFX2_API void lfx2VulkanContextAddRef(struct lfx2VulkanContext *context); ++ ++LFX2_API void lfx2VulkanContextRelease(struct lfx2VulkanContext *context); ++ ++LFX2_API ++struct lfx2VulkanSubmitAux lfx2VulkanContextBeforeSubmit(struct lfx2VulkanContext *context); ++ ++LFX2_API ++void lfx2VulkanContextBeginFrame(struct lfx2VulkanContext *context, ++ struct lfx2Frame *frame); ++ ++LFX2_API void lfx2VulkanContextEndFrame(struct lfx2VulkanContext *context, struct lfx2Frame *frame); ++#endif ++ + #ifdef __cplusplus + } // extern "C" + #endif // __cplusplus +diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp +index 32e3eb78..767956c2 100644 +--- a/src/d3d11/d3d11_context_imm.cpp ++++ b/src/d3d11/d3d11_context_imm.cpp +@@ -842,8 +842,8 @@ namespace dxvk { + D3D10DeviceLock lock = LockContext(); + + EmitCs([] (DxvkContext* ctx) { +- ctx->endLfx2Frame(); + ctx->endFrame(); ++ ctx->endLfx2Frame(); + }); + } + +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index a11b8835..ea39e6fe 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -3457,26 +3457,21 @@ namespace dxvk { + } + + void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::MarkRenderStart(void *frame) { +- auto query = m_dxvkDevice->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); + auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); + +- m_immediateContext->EmitCs([query, cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { ++ m_immediateContext->EmitCs([cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { + auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeBegin, cLfx2.TimestampNow()); +- ctx->writeTimestamp(query); +- ctx->trackLatencyMarker(frameWrapper, query, false); ++ cLfx2.VulkanContextBeginFrame(cDevice->getLfx2VkContext(), frameWrapper); + }); + } + + void STDMETHODCALLTYPE D3D11DeviceLfx2Ext::MarkRenderEnd(void *frame) { +- auto query = m_dxvkDevice->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); + auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); + +- m_immediateContext->EmitCs([query, cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { ++ m_immediateContext->EmitCs([cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { + auto &cLfx2 = cDevice->lfx2(); +- cLfx2.MarkSection(frameWrapper, 800, lfx2MarkType::lfx2MarkTypeEnd, cLfx2.TimestampNow()); +- ctx->writeTimestamp(query); +- ctx->trackLatencyMarker(frameWrapper, query, true); ++ ctx->flushCommandList(); ++ cLfx2.VulkanContextEndFrame(cDevice->getLfx2VkContext(), frameWrapper); + }); + } + } +diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp +index ad190fc0..a38fea40 100644 +--- a/src/dxvk/dxvk_cmdlist.cpp ++++ b/src/dxvk/dxvk_cmdlist.cpp +@@ -170,8 +170,7 @@ namespace dxvk { + DxvkCommandList::DxvkCommandList(DxvkDevice* device) + : m_device (device), + m_vkd (device->vkd()), +- m_vki (device->instance()->vki()), +- m_lfx2Tracker (device) { ++ m_vki (device->instance()->vki()) { + const auto& graphicsQueue = m_device->queues().graphics; + const auto& transferQueue = m_device->queues().transfer; + +@@ -273,6 +272,9 @@ namespace dxvk { + 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); + } + ++ if (isFirst && m_lfx2Aux.submit_before) ++ m_commandSubmission.executeCommandBuffer(m_lfx2Aux.submit_before); ++ + // Submit graphics commands + if (cmd.usedFlags.test(DxvkCmdBuffer::InitBuffer)) + m_commandSubmission.executeCommandBuffer(cmd.initBuffer); +@@ -297,6 +299,12 @@ namespace dxvk { + m_commandSubmission.signalFence(m_fence); + } + ++ if (isLast && m_lfx2Aux.submit_after) ++ m_commandSubmission.executeCommandBuffer(m_lfx2Aux.submit_after); ++ if (isLast && m_lfx2Aux.signal_sem) ++ m_commandSubmission.signalSemaphore(m_lfx2Aux.signal_sem, m_lfx2Aux.signal_sem_value, ++ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); ++ + // Finally, submit all graphics commands of the current submission + if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) + return status; +@@ -365,6 +373,8 @@ namespace dxvk { + + + void DxvkCommandList::reset() { ++ m_lfx2Aux = {}; ++ + // Free resources and other objects + // that are no longer in use + m_resources.reset(); +@@ -375,7 +385,6 @@ namespace dxvk { + // Return query and event handles + m_gpuQueryTracker.reset(); + m_gpuEventTracker.reset(); +- m_lfx2Tracker.reset(); + + // Less important stuff + m_signalTracker.reset(); +diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h +index 55507bb8..cc3b581e 100644 +--- a/src/dxvk/dxvk_cmdlist.h ++++ b/src/dxvk/dxvk_cmdlist.h +@@ -321,17 +321,12 @@ namespace dxvk { + m_signalTracker.add(signal, value); + } + +- void trackLatencyMarker(Lfx2Frame lfx2Frame, Rc timestampQuery, bool end) { +- m_lfx2Tracker.add(std::move(lfx2Frame), std::move(timestampQuery), end); +- } +- + /** + * \brief Notifies resources and signals + */ + void notifyObjects() { + m_resources.notify(); + m_signalTracker.notify(); +- m_lfx2Tracker.notify(); + } + + /** +@@ -1033,6 +1028,10 @@ namespace dxvk { + m_descriptorPools.push_back({ pool, manager }); + } + ++ void setLfx2Aux(lfx2VulkanSubmitAux aux) { ++ m_lfx2Aux = aux; ++ } ++ + private: + + DxvkDevice* m_device; +@@ -1057,7 +1056,6 @@ namespace dxvk { + DxvkGpuQueryTracker m_gpuQueryTracker; + DxvkBufferTracker m_bufferTracker; + DxvkStatCounters m_statCounters; +- DxvkLfx2Tracker m_lfx2Tracker; + + DxvkCommandSubmission m_commandSubmission; + +@@ -1073,6 +1071,8 @@ namespace dxvk { + + std::vector m_pipelines; + ++ lfx2VulkanSubmitAux m_lfx2Aux = {}; ++ + VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer) const { + if (cmdBuffer == DxvkCmdBuffer::ExecBuffer) return m_cmd.execBuffer; + if (cmdBuffer == DxvkCmdBuffer::InitBuffer) return m_cmd.initBuffer; +diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp +index b6fdbe93..726356a6 100644 +--- a/src/dxvk/dxvk_context.cpp ++++ b/src/dxvk/dxvk_context.cpp +@@ -106,8 +106,10 @@ namespace dxvk { + + + void DxvkContext::flushCommandList(DxvkSubmitStatus* status) { +- m_device->submitCommandList( +- this->endRecording(), status); ++ auto cmdList = this->endRecording(); ++ if (m_type == DxvkContextType::Primary) ++ cmdList->setLfx2Aux(m_device->lfx2().VulkanContextBeforeSubmit(m_device->getLfx2VkContext())); ++ m_device->submitCommandList(cmdList, status); + + this->beginRecording( + m_device->createCommandList()); +@@ -6444,22 +6446,14 @@ namespace dxvk { + this->beginCurrentCommands(); + } + +- void DxvkContext::trackLatencyMarker(Lfx2Frame frame, Rc timestampQuery, bool end) { +- m_cmd->trackLatencyMarker(std::move(frame), std::move(timestampQuery), end); +- } +- + void DxvkContext::tryBeginLfx2Frame(bool critical) { + if (m_type != DxvkContextType::Primary) + Logger::err("beginLfx2Frame should only be called on immediate contexts"); + if (!m_lfx2Frame) { + m_lfx2Frame = m_device->getImplicitLfx2Context()->dequeueFrame(critical); + if (m_lfx2Frame) { +- auto query = m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); +- m_device->lfx2().MarkSection(m_lfx2Frame, +- 800, lfx2MarkType::lfx2MarkTypeBegin, +- m_device->lfx2().TimestampNow()); +- writeTimestamp(query); +- trackLatencyMarker(m_lfx2Frame, query, false); ++ auto &cLfx2 = m_device->lfx2(); ++ cLfx2.VulkanContextBeginFrame(m_device->getLfx2VkContext(), m_lfx2Frame); + } + } + } +@@ -6469,12 +6463,9 @@ namespace dxvk { + Logger::err("endLfx2Frame should only be called on immediate contexts"); + tryBeginLfx2Frame(true); + if (m_lfx2Frame) { +- auto query = m_device->createGpuQuery(VK_QUERY_TYPE_TIMESTAMP, 0, 0); +- m_device->lfx2().MarkSection(m_lfx2Frame, +- 800, lfx2MarkType::lfx2MarkTypeEnd, +- m_device->lfx2().TimestampNow()); +- writeTimestamp(query); +- trackLatencyMarker(m_lfx2Frame, query, true); ++ flushCommandList(); ++ auto &cLfx2 = m_device->lfx2(); ++ cLfx2.VulkanContextEndFrame(m_device->getLfx2VkContext(), m_lfx2Frame); + m_lfx2Frame = {}; + } + } +diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h +index ffd505ba..b5af4d52 100644 +--- a/src/dxvk/dxvk_context.h ++++ b/src/dxvk/dxvk_context.h +@@ -1391,8 +1391,6 @@ namespace dxvk { + m_cmd->addStatCtr(counter, value); + } + +- void trackLatencyMarker(Lfx2Frame frame, Rc timestampQuery, bool end); +- + private: + + Rc m_device; +diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp +index d133930f..0c240eb1 100644 +--- a/src/dxvk/dxvk_device.cpp ++++ b/src/dxvk/dxvk_device.cpp +@@ -22,6 +22,7 @@ namespace dxvk { + m_queues (queues), + m_submissionQueue (this, queueCallback) { + ++ m_lfx2Vk = m_lfx2.VulkanContextCreate(instance->vki()->getLoaderProc(), instance->handle(), m_adapter->handle(), m_vkd->device(), queues.graphics.queueIndex); + } + + +@@ -40,6 +41,8 @@ namespace dxvk { + // Stop workers explicitly in order to prevent + // access to structures that are being destroyed. + m_objects.pipelineManager().stopWorkerThreads(); ++ ++ m_lfx2.VulkanContextRelease(m_lfx2Vk); + } + + +@@ -340,6 +343,10 @@ namespace dxvk { + m_recycledCommandLists.returnObject(cmdList); + } + ++ lfx2VulkanContext *DxvkDevice::getLfx2VkContext() { ++ return m_lfx2Vk; ++ } ++ + DxvkLfx2ImplicitContext *DxvkDevice::getImplicitLfx2Context() { + return &m_lfx2ImplicitContext; + } +diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h +index 7fa12dc5..4e6c2ed7 100644 +--- a/src/dxvk/dxvk_device.h ++++ b/src/dxvk/dxvk_device.h +@@ -539,6 +539,7 @@ namespace dxvk { + */ + void waitForIdle(); + ++ lfx2VulkanContext* getLfx2VkContext(); + DxvkLfx2ImplicitContext* getImplicitLfx2Context(); + + private: +@@ -549,6 +550,7 @@ namespace dxvk { + Rc m_adapter; + Rc m_vkd; + Lfx2Fn m_lfx2; ++ lfx2VulkanContext *m_lfx2Vk; + DxvkLfx2ImplicitContext m_lfx2ImplicitContext; + + DxvkDeviceFeatures m_features; +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index 4d5298db..23e20bb9 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -41,6 +41,12 @@ namespace dxvk { + LOAD_PFN(ImplicitContextReset); + LOAD_PFN(FrameCreateImplicit); + LOAD_PFN(FrameDequeueImplicit); ++ LOAD_PFN(VulkanContextCreate); ++ LOAD_PFN(VulkanContextAddRef); ++ LOAD_PFN(VulkanContextRelease); ++ LOAD_PFN(VulkanContextBeforeSubmit); ++ LOAD_PFN(VulkanContextBeginFrame); ++ LOAD_PFN(VulkanContextEndFrame); + + #undef LOAD_PFN + } +@@ -62,66 +68,6 @@ namespace dxvk { + return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); + } + +- DxvkLfx2Tracker::DxvkLfx2Tracker(DxvkDevice *device) : m_device(device) { +- } +- +- void DxvkLfx2Tracker::add(Lfx2Frame lfx2Frame, Rc query, bool end) { +- m_query[end] = std::move(query); +- m_frame_handle[end] = std::move(lfx2Frame); +- } +- +- void DxvkLfx2Tracker::notify() { +- for (uint32_t i = 0; i < 2; i++) { +- Rc &query = m_query[i]; +- if (query.ptr()) { +- DxvkQueryData queryData; // NOLINT(cppcoreguidelines-pro-type-member-init) +- DxvkGpuQueryStatus status; +- while ((status = query->getData(queryData)) == DxvkGpuQueryStatus::Pending); +- +- if (status == DxvkGpuQueryStatus::Available) { +- uint64_t gpuTimestamp = queryData.timestamp.time; +- VkCalibratedTimestampInfoEXT calibratedTimestampInfo[2]; +- uint64_t calibratedTimestamps[2]; +- uint64_t maxDeviation[2]; +- calibratedTimestampInfo[0].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; +- calibratedTimestampInfo[0].pNext = nullptr; +- calibratedTimestampInfo[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; +- calibratedTimestampInfo[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; +- calibratedTimestampInfo[1].pNext = nullptr; +-#ifdef _WIN32 +- calibratedTimestampInfo[1].timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; +-#else +- calibratedTimestampInfo[1].timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; +-#endif +- m_device->vkd()->vkGetCalibratedTimestampsEXT(m_device->handle(), 2, calibratedTimestampInfo, +- calibratedTimestamps, maxDeviation); +- +-#ifdef _WIN32 +- uint64_t hostNsTimestamp = m_device->lfx2().TimestampFromQpc(calibratedTimestamps[1]); +-#else +- uint64_t hostNsTimestamp = calibratedTimestamps[1]; +-#endif +- int64_t gpuTimestampDelta = gpuTimestamp - calibratedTimestamps[0]; +- int64_t timestamp = hostNsTimestamp + (int64_t) (gpuTimestampDelta * +- (double) m_device->adapter()->deviceProperties().limits.timestampPeriod); +- +- m_device->lfx2().MarkSection(m_frame_handle[i], +- 1000, i == 0 ? lfx2MarkType::lfx2MarkTypeBegin : lfx2MarkType::lfx2MarkTypeEnd, +- timestamp); +- } +- } +- } +- } +- +- void DxvkLfx2Tracker::reset() { +- for (auto &i: m_query) { +- i = nullptr; +- } +- for (auto &i: m_frame_handle) { +- i = {}; +- } +- } +- + DxvkLfx2ImplicitContext::DxvkLfx2ImplicitContext(Lfx2Fn *lfx2): m_lfx2(lfx2) { + m_context = m_lfx2->ImplicitContextCreate(); + } +diff --git a/src/dxvk/dxvk_lfx2.h b/src/dxvk/dxvk_lfx2.h +index 96c7931b..e6ae73df 100644 +--- a/src/dxvk/dxvk_lfx2.h ++++ b/src/dxvk/dxvk_lfx2.h +@@ -31,6 +31,12 @@ namespace dxvk { + DECLARE_PFN(ImplicitContextReset); + DECLARE_PFN(FrameCreateImplicit); + DECLARE_PFN(FrameDequeueImplicit); ++ DECLARE_PFN(VulkanContextCreate); ++ DECLARE_PFN(VulkanContextAddRef); ++ DECLARE_PFN(VulkanContextRelease); ++ DECLARE_PFN(VulkanContextBeforeSubmit); ++ DECLARE_PFN(VulkanContextBeginFrame); ++ DECLARE_PFN(VulkanContextEndFrame); + + #undef DECLARE_PFN + +@@ -72,17 +78,4 @@ namespace dxvk { + lfx2ImplicitContext *m_context; + }; + +- class DxvkLfx2Tracker { +- public: +- explicit DxvkLfx2Tracker(DxvkDevice *device); +- void add(Lfx2Frame frame, Rc query, bool end); +- void reset(); +- void notify(); +- +- private: +- DxvkDevice *m_device; +- Rc m_query[2]{}; +- Lfx2Frame m_frame_handle[2]{}; +- }; +- + } // dxvk +\ No newline at end of file +-- +2.43.0 + + +From 62e4b89d0e95979f85737df752207d5da4d51e41 Mon Sep 17 00:00:00 2001 +From: Tatsuyuki Ishi +Date: Tue, 7 Feb 2023 14:46:17 +0900 +Subject: [PATCH 11/12] Add support for explicit latency and throughput + annotation + +--- + include/latencyflex2.h | 11 +++++++++ + src/d3d11/d3d11_context_imm.cpp | 3 ++- + src/d3d11/d3d11_device.cpp | 7 ++---- + src/d3d9/d3d9_device.cpp | 1 + + src/dxvk/dxvk_context.cpp | 44 ++++++++++++++++++++++----------- + src/dxvk/dxvk_context.h | 12 +++++++-- + src/dxvk/dxvk_cs.cpp | 20 ++++++++++++--- + src/dxvk/dxvk_cs.h | 8 ++++++ + src/dxvk/dxvk_lfx2.cpp | 2 ++ + src/dxvk/dxvk_lfx2.h | 2 ++ + 10 files changed, 84 insertions(+), 26 deletions(-) + +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +index 9e8d1536..bda252c5 100644 +--- a/include/latencyflex2.h ++++ b/include/latencyflex2.h +@@ -54,6 +54,7 @@ typedef struct lfx2Dx12SubmitAux { + #endif + + typedef uint64_t lfx2Timestamp; ++typedef uint64_t lfx2Interval; + + typedef uint32_t lfx2SectionId; + +@@ -114,6 +115,16 @@ void lfx2MarkSection(struct lfx2Frame *frame, + enum lfx2MarkType mark_type, + lfx2Timestamp timestamp); + ++LFX2_API ++void lfx2FrameOverrideQueuingDelay(struct lfx2Frame *frame, ++ lfx2SectionId section_id, ++ lfx2Interval queueing_delay); ++ ++LFX2_API ++void lfx2FrameOverrideInverseThroughput(struct lfx2Frame *frame, ++ lfx2SectionId section_id, ++ lfx2Interval inverse_throughput); ++ + LFX2_API struct lfx2ImplicitContext *lfx2ImplicitContextCreate(void); + + LFX2_API void lfx2ImplicitContextRelease(struct lfx2ImplicitContext *context); +diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp +index 767956c2..bdab28c9 100644 +--- a/src/d3d11/d3d11_context_imm.cpp ++++ b/src/d3d11/d3d11_context_imm.cpp +@@ -843,7 +843,7 @@ namespace dxvk { + + EmitCs([] (DxvkContext* ctx) { + ctx->endFrame(); +- ctx->endLfx2Frame(); ++ ctx->endLfx2FrameImplicit(); + }); + } + +@@ -892,6 +892,7 @@ namespace dxvk { + + + void D3D11ImmediateContext::EmitCsChunk(DxvkCsChunkRef&& chunk) { ++ chunk->finalize(); + m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); + } + +diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp +index ea39e6fe..f0ba640b 100644 +--- a/src/d3d11/d3d11_device.cpp ++++ b/src/d3d11/d3d11_device.cpp +@@ -3460,8 +3460,7 @@ namespace dxvk { + auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); + + m_immediateContext->EmitCs([cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { +- auto &cLfx2 = cDevice->lfx2(); +- cLfx2.VulkanContextBeginFrame(cDevice->getLfx2VkContext(), frameWrapper); ++ ctx->beginLfx2Frame(frameWrapper); + }); + } + +@@ -3469,9 +3468,7 @@ namespace dxvk { + auto frameWrapper = Lfx2Frame(m_dxvkDevice->lfx2(), static_cast(frame)); + + m_immediateContext->EmitCs([cDevice = m_dxvkDevice, frameWrapper] (DxvkContext* ctx) { +- auto &cLfx2 = cDevice->lfx2(); +- ctx->flushCommandList(); +- cLfx2.VulkanContextEndFrame(cDevice->getLfx2VkContext(), frameWrapper); ++ ctx->endLfx2Frame(); + }); + } + } +diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp +index a5fc7f5d..e0030cf5 100644 +--- a/src/d3d9/d3d9_device.cpp ++++ b/src/d3d9/d3d9_device.cpp +@@ -5272,6 +5272,7 @@ namespace dxvk { + + + void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { ++ chunk->finalize(); + m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); + } + +diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp +index 726356a6..384be705 100644 +--- a/src/dxvk/dxvk_context.cpp ++++ b/src/dxvk/dxvk_context.cpp +@@ -1,4 +1,5 @@ + #include ++#include + #include + #include + +@@ -6446,28 +6447,41 @@ namespace dxvk { + this->beginCurrentCommands(); + } + +- void DxvkContext::tryBeginLfx2Frame(bool critical) { +- if (m_type != DxvkContextType::Primary) +- Logger::err("beginLfx2Frame should only be called on immediate contexts"); ++ void DxvkContext::tryBeginLfx2FrameImplicit(bool critical) { + if (!m_lfx2Frame) { +- m_lfx2Frame = m_device->getImplicitLfx2Context()->dequeueFrame(critical); +- if (m_lfx2Frame) { +- auto &cLfx2 = m_device->lfx2(); +- cLfx2.VulkanContextBeginFrame(m_device->getLfx2VkContext(), m_lfx2Frame); ++ auto lfx2Frame = m_device->getImplicitLfx2Context()->dequeueFrame(critical); ++ if (lfx2Frame) { ++ beginLfx2Frame(lfx2Frame); + } + } + } + +- void DxvkContext::endLfx2Frame() { +- if (m_type != DxvkContextType::Primary) +- Logger::err("endLfx2Frame should only be called on immediate contexts"); +- tryBeginLfx2Frame(true); ++ void DxvkContext::endLfx2FrameImplicit() { ++ tryBeginLfx2FrameImplicit(true); + if (m_lfx2Frame) { +- flushCommandList(); +- auto &cLfx2 = m_device->lfx2(); +- cLfx2.VulkanContextEndFrame(m_device->getLfx2VkContext(), m_lfx2Frame); +- m_lfx2Frame = {}; ++ endLfx2Frame(); + } + } + ++ void DxvkContext::beginLfx2Frame(Lfx2Frame frame) { ++ if (m_type != DxvkContextType::Primary) ++ Logger::err("beginLfx2Frame should only be called on immediate contexts"); ++ auto &cLfx2 = m_device->lfx2(); ++ m_lfx2Frame = std::move(frame); ++ cLfx2.VulkanContextBeginFrame(m_device->getLfx2VkContext(), m_lfx2Frame); ++ m_frameCsTime = 0; ++ m_minQueuingDelay = UINT64_MAX; ++ } ++ ++ void DxvkContext::endLfx2Frame() { ++ if (m_type != DxvkContextType::Primary) ++ Logger::err("endLfx2Frame should only be called on immediate contexts"); ++ flushCommandList(); ++ auto &cLfx2 = m_device->lfx2(); ++ cLfx2.VulkanContextEndFrame(m_device->getLfx2VkContext(), m_lfx2Frame); ++ cLfx2.FrameOverrideInverseThroughput(m_lfx2Frame, 800, m_frameCsTime); ++ if (m_minQueuingDelay != UINT64_MAX) ++ cLfx2.FrameOverrideQueuingDelay(m_lfx2Frame, 0, m_minQueuingDelay); ++ m_lfx2Frame = {}; ++ } + } +diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h +index b5af4d52..8dcdb3a7 100644 +--- a/src/dxvk/dxvk_context.h ++++ b/src/dxvk/dxvk_context.h +@@ -68,7 +68,9 @@ namespace dxvk { + */ + void flushCommandList(DxvkSubmitStatus* status); + +- void tryBeginLfx2Frame(bool critical); ++ void tryBeginLfx2FrameImplicit(bool critical); ++ void endLfx2FrameImplicit(); ++ void beginLfx2Frame(Lfx2Frame frame); + void endLfx2Frame(); + + /** +@@ -1391,6 +1393,11 @@ namespace dxvk { + m_cmd->addStatCtr(counter, value); + } + ++ void recordChunkExecutionTiming(uint64_t duration, uint64_t queueingDelay) { ++ m_frameCsTime += duration; ++ m_minQueuingDelay = std::min(m_minQueuingDelay, queueingDelay); ++ } ++ + private: + + Rc m_device; +@@ -1436,6 +1443,8 @@ namespace dxvk { + std::array m_cpLookupCache = { }; + + Lfx2Frame m_lfx2Frame = {}; ++ uint64_t m_frameCsTime = 0; ++ uint64_t m_minQueuingDelay = 0; + + void blitImageFb( + const Rc& dstImage, +@@ -1743,7 +1752,6 @@ namespace dxvk { + void endCurrentCommands(); + + void splitCommands(); +- + }; + + } +diff --git a/src/dxvk/dxvk_cs.cpp b/src/dxvk/dxvk_cs.cpp +index fe1f4c3a..44d7e718 100644 +--- a/src/dxvk/dxvk_cs.cpp ++++ b/src/dxvk/dxvk_cs.cpp +@@ -55,8 +55,12 @@ namespace dxvk { + + m_commandOffset = 0; + } +- +- ++ ++ void DxvkCsChunk::finalize() { ++ m_queuedTimestamp = high_resolution_clock::now(); ++ } ++ ++ + DxvkCsChunkPool::DxvkCsChunkPool() { + + } +@@ -161,6 +165,8 @@ namespace dxvk { + // them in order to potentially reduce lock contention. + std::vector chunks; + ++ high_resolution_clock::time_point lastFinish; ++ + try { + while (!m_stopped.load()) { + { std::unique_lock lock(m_mutex); +@@ -175,8 +181,16 @@ namespace dxvk { + + for (auto& chunk : chunks) { + m_context->addStatCtr(DxvkStatCounter::CsChunkCount, 1); +- m_context->tryBeginLfx2Frame(false); ++ m_context->tryBeginLfx2FrameImplicit(false); ++ high_resolution_clock::time_point start = high_resolution_clock::now(); + chunk->executeAll(m_context.ptr()); ++ high_resolution_clock::time_point end = high_resolution_clock::now(); ++ m_context->recordChunkExecutionTiming( ++ std::chrono::duration_cast(end - start).count(), ++ std::max(std::chrono::duration_cast( ++ lastFinish - chunk->getQueuedTimestamp()).count(), 0LL) ++ ); ++ lastFinish = end; + + // Use a separate mutex for the chunk counter, this + // will only ever be contested if synchronization is +diff --git a/src/dxvk/dxvk_cs.h b/src/dxvk/dxvk_cs.h +index ae20746a..d8ee4fdf 100644 +--- a/src/dxvk/dxvk_cs.h ++++ b/src/dxvk/dxvk_cs.h +@@ -233,10 +233,18 @@ namespace dxvk { + * that it can be reused later. + */ + void reset(); ++ ++ void finalize(); ++ ++ high_resolution_clock::time_point getQueuedTimestamp() { ++ return m_queuedTimestamp; ++ } + + private: + + size_t m_commandOffset = 0; ++ ++ high_resolution_clock::time_point m_queuedTimestamp; + + DxvkCsCmd* m_head = nullptr; + DxvkCsCmd* m_tail = nullptr; +diff --git a/src/dxvk/dxvk_lfx2.cpp b/src/dxvk/dxvk_lfx2.cpp +index 23e20bb9..c7e50abe 100644 +--- a/src/dxvk/dxvk_lfx2.cpp ++++ b/src/dxvk/dxvk_lfx2.cpp +@@ -31,6 +31,8 @@ namespace dxvk { + LOAD_PFN(FrameAddRef); + LOAD_PFN(FrameRelease); + LOAD_PFN(MarkSection); ++ LOAD_PFN(FrameOverrideQueuingDelay); ++ LOAD_PFN(FrameOverrideInverseThroughput); + LOAD_PFN(SleepUntil); + LOAD_PFN(TimestampNow); + #ifdef _WIN32 +diff --git a/src/dxvk/dxvk_lfx2.h b/src/dxvk/dxvk_lfx2.h +index e6ae73df..dc81d0a5 100644 +--- a/src/dxvk/dxvk_lfx2.h ++++ b/src/dxvk/dxvk_lfx2.h +@@ -21,6 +21,8 @@ namespace dxvk { + DECLARE_PFN(FrameAddRef); + DECLARE_PFN(FrameRelease); + DECLARE_PFN(MarkSection); ++ DECLARE_PFN(FrameOverrideQueuingDelay); ++ DECLARE_PFN(FrameOverrideInverseThroughput); + DECLARE_PFN(SleepUntil); + DECLARE_PFN(TimestampNow); + #ifdef _WIN32 +-- +2.43.0 + + +From 30b72b3ad2c4a9aae9eb1a7681128e464bb9b1d9 Mon Sep 17 00:00:00 2001 +From: FakeMichau <49685661+FakeMichau@users.noreply.github.com> +Date: Tue, 16 Jan 2024 18:15:48 +0100 +Subject: [PATCH 12/12] Update for upstream changes + +--- + src/dxvk/dxvk_context.cpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp +index 384be705..0b319287 100644 +--- a/src/dxvk/dxvk_context.cpp ++++ b/src/dxvk/dxvk_context.cpp +@@ -6476,7 +6476,8 @@ namespace dxvk { + void DxvkContext::endLfx2Frame() { + if (m_type != DxvkContextType::Primary) + Logger::err("endLfx2Frame should only be called on immediate contexts"); +- flushCommandList(); ++ auto status = new DxvkSubmitStatus; ++ flushCommandList(status); + auto &cLfx2 = m_device->lfx2(); + cLfx2.VulkanContextEndFrame(m_device->getLfx2VkContext(), m_lfx2Frame); + cLfx2.FrameOverrideInverseThroughput(m_lfx2Frame, 800, m_frameCsTime); +-- +2.43.0 + diff --git a/patches/lfx2-nvapi.patch b/patches/lfx2-nvapi.patch new file mode 100644 index 0000000000..9de0420a7a --- /dev/null +++ b/patches/lfx2-nvapi.patch @@ -0,0 +1,947 @@ +From 68de9614166e3df3174c1f2b41f3eb94fb66792e Mon Sep 17 00:00:00 2001 +From: FakeMichau <49685661+FakeMichau@users.noreply.github.com> +Date: Mon, 2 Oct 2023 14:50:19 +0200 +Subject: [PATCH 1/3] LFX2 support and gpu spoofing + +--- + inc/latencyflex2.h | 83 ++++++++++++++++ + src/d3d/lfx2.cpp | 176 +++++++++++++++++++++++++++++++++ + src/d3d/lfx2.h | 60 +++++++++++ + src/d3d/nvapi_d3d_instance.cpp | 33 +++++-- + src/d3d/nvapi_d3d_instance.h | 9 +- + src/dxvk/dxvk_interfaces.cpp | 1 + + src/dxvk/dxvk_interfaces.h | 13 +++ + src/meson.build | 1 + + src/nvapi_d3d.cpp | 28 ++++-- + src/nvapi_gpu.cpp | 73 +------------- + src/resource_factory.cpp | 4 +- + src/resource_factory.h | 4 +- + 12 files changed, 395 insertions(+), 90 deletions(-) + create mode 100644 inc/latencyflex2.h + create mode 100644 src/d3d/lfx2.cpp + create mode 100644 src/d3d/lfx2.h + +diff --git a/inc/latencyflex2.h b/inc/latencyflex2.h +new file mode 100644 +index 0000000..02767c5 +--- /dev/null ++++ b/inc/latencyflex2.h +@@ -0,0 +1,83 @@ ++#ifndef LATENCYFLEX2_H ++#define LATENCYFLEX2_H ++ ++#include ++#include ++#include ++#include ++#include ++#ifdef _WIN32 ++#define LFX2_API __declspec(dllimport) ++#else ++#define LFX2_API ++#endif ++ ++typedef enum lfx2MarkType { ++ lfx2MarkTypeBegin, ++ lfx2MarkTypeEnd, ++} lfx2MarkType; ++ ++typedef struct lfx2Context lfx2Context; ++ ++/** ++ * A write handle for frame markers. ++ */ ++typedef struct lfx2Frame lfx2Frame; ++ ++typedef struct lfx2ImplicitContext lfx2ImplicitContext; ++ ++typedef uint64_t lfx2Timestamp; ++ ++typedef uint32_t lfx2SectionId; ++ ++#ifdef __cplusplus ++extern "C" { ++#endif // __cplusplus ++ ++ LFX2_API lfx2Timestamp lfx2TimestampNow(void); ++ ++#if defined(_WIN32) ++ LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); ++#endif ++ ++ LFX2_API void lfx2SleepUntil(lfx2Timestamp target); ++ ++ LFX2_API struct lfx2Context* lfx2ContextCreate(void); ++ ++ LFX2_API void lfx2ContextAddRef(struct lfx2Context* context); ++ ++ LFX2_API void lfx2ContextRelease(struct lfx2Context* context); ++ ++ LFX2_API ++ struct lfx2Frame* lfx2FrameCreate(struct lfx2Context* context, ++ lfx2Timestamp* out_timestamp); ++ ++ LFX2_API void lfx2FrameAddRef(struct lfx2Frame* frame); ++ ++ LFX2_API void lfx2FrameRelease(struct lfx2Frame* frame); ++ ++ LFX2_API ++ void lfx2MarkSection(struct lfx2Frame* frame, ++ lfx2SectionId section_id, ++ enum lfx2MarkType mark_type, ++ lfx2Timestamp timestamp); ++ ++ LFX2_API struct lfx2ImplicitContext* lfx2ImplicitContextCreate(void); ++ ++ LFX2_API void lfx2ImplicitContextRelease(struct lfx2ImplicitContext* context); ++ ++ LFX2_API void lfx2ImplicitContextReset(struct lfx2ImplicitContext* context); ++ ++ LFX2_API ++ struct lfx2Frame* lfx2FrameCreateImplicit(struct lfx2ImplicitContext* context, ++ lfx2Timestamp* out_timestamp); ++ ++ LFX2_API ++ struct lfx2Frame* lfx2FrameDequeueImplicit(struct lfx2ImplicitContext* context, ++ bool critical); ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif // __cplusplus ++ ++#endif /* LATENCYFLEX2_H */ +diff --git a/src/d3d/lfx2.cpp b/src/d3d/lfx2.cpp +new file mode 100644 +index 0000000..b75cf0d +--- /dev/null ++++ b/src/d3d/lfx2.cpp +@@ -0,0 +1,176 @@ ++#include "lfx2.h" ++#include "util/util_log.h" ++#include "util/util_string.h" ++ ++namespace dxvk { ++ ++ Lfx2::Lfx2() { ++ const auto lfxModuleName = "latencyflex2_rust.dll"; ++ ++ m_lfxModule = ::LoadLibraryA(lfxModuleName); ++ if (m_lfxModule == nullptr) { ++ auto lastError = ::GetLastError(); ++ if (lastError != ERROR_MOD_NOT_FOUND) // Ignore library not found ++ log::write(str::format("Loading ", lfxModuleName, ++ " failed with error code: ", lastError)); ++ return; ++ } ++ ++#define LOAD_PFN(x) \ ++ this->x = GetProcAddress("lfx2" #x) ++ ++ LOAD_PFN(ContextCreate); ++ LOAD_PFN(ContextAddRef); ++ LOAD_PFN(ContextRelease); ++ LOAD_PFN(FrameCreate); ++ LOAD_PFN(FrameAddRef); ++ LOAD_PFN(FrameRelease); ++ LOAD_PFN(MarkSection); ++ LOAD_PFN(SleepUntil); ++ LOAD_PFN(TimestampNow); ++#ifdef _WIN32 ++ LOAD_PFN(TimestampFromQpc); ++#endif ++ LOAD_PFN(ImplicitContextCreate); ++ LOAD_PFN(ImplicitContextRelease); ++ LOAD_PFN(ImplicitContextReset); ++ LOAD_PFN(FrameCreateImplicit); ++ LOAD_PFN(FrameDequeueImplicit); ++ ++#undef LOAD_PFN ++ } ++ ++ Lfx2::~Lfx2() { ++ if (m_lfxModule == nullptr) ++ return; ++ ++ if (m_nextFrame) ++ FrameRelease(m_nextFrame); ++ ++ if (m_lfxContext) ++ ContextRelease(m_lfxContext); ++ ++ ::FreeLibrary(m_lfxModule); ++ } ++ ++ bool Lfx2::IsAvailable() const { ++ return m_lfxModule != nullptr; ++ } ++ ++ void Lfx2::Sleep() { ++ std::unique_lock lock(m_frameMapMutex); ++ if (m_lfxContext && !m_nextFrame) { ++ lfx2Timestamp sleepTarget; ++ m_nextFrame = FrameCreate(m_lfxContext, &sleepTarget); ++ lock.unlock(); ++ ++ SleepUntil(sleepTarget); ++ } ++ // Else: Sleep was called without recording frame timing, skip ++ } ++ ++ void Lfx2::Mark(uint64_t frame_id, NV_LATENCY_MARKER_TYPE type, Com& extDevice) { ++ if (!m_lfxContext) ++ return; ++ ++ uint32_t section; ++ lfx2MarkType markType; ++ switch (type) { ++ case SIMULATION_START: ++ section = 0; ++ markType = lfx2MarkType::lfx2MarkTypeBegin; ++ break; ++ case SIMULATION_END: ++ section = 0; ++ markType = lfx2MarkType::lfx2MarkTypeEnd; ++ break; ++ case RENDERSUBMIT_START: ++ section = 500; ++ markType = lfx2MarkType::lfx2MarkTypeBegin; ++ break; ++ case RENDERSUBMIT_END: ++ section = 500; ++ markType = lfx2MarkType::lfx2MarkTypeEnd; ++ break; ++ default: ++ return; ++ } ++ ++ std::unique_lock lock(m_frameMapMutex); ++ lfx2Frame* frame; ++ if (type == SIMULATION_START) { ++ EnsureFrame(); ++ m_frameMap[frame_id] = m_nextFrame; ++ m_callsExpectedByFrame[frame_id] = 4; ++ frame = m_nextFrame; ++ m_nextFrame = nullptr; ++ ++ decltype(m_frameMap)::iterator it; ++ if (frame_id >= kMaxInflightFrames && (it = m_frameMap.upper_bound(frame_id - kMaxInflightFrames)) != m_frameMap.end()) { ++ for (auto i = m_frameMap.begin(); i != it;) { ++ FrameRelease(i->second); ++ m_callsExpectedByFrame.erase(i->first); ++ i = m_frameMap.erase(i); ++ } ++ } ++ } else { ++ if (m_frameMap.find(frame_id) == m_frameMap.end()) ++ return; ++ frame = m_frameMap[frame_id]; ++ } ++ lock.unlock(); ++ ++ MarkSection(frame, section, markType, TimestampNow()); ++ ++ if (extDevice.ptr()) { ++ if (type == RENDERSUBMIT_START) { ++ extDevice->MarkRenderStart((void*)frame); ++ } else if (type == RENDERSUBMIT_END) { ++ extDevice->MarkRenderEnd((void*)frame); ++ } ++ } ++ ++ lock.lock(); ++ if (--m_callsExpectedByFrame[frame_id] == 0) { ++ FrameRelease(frame); ++ m_frameMap.erase(frame_id); ++ m_callsExpectedByFrame.erase(frame_id); ++ } ++ } ++ ++ void Lfx2::EnsureFrame() { ++ if (!m_nextFrame) { ++ lfx2Timestamp sleepTarget; ++ m_nextFrame = FrameCreate(m_lfxContext, &sleepTarget); ++ } ++ } ++ ++ void Lfx2::SleepImplicit(Com& extDevice) { ++ lfx2Timestamp sleepTarget; ++ lfx2Frame* implicitFrame; ++ extDevice->ImplicitBeginFrame(&sleepTarget, reinterpret_cast(&implicitFrame)); ++ ++ SleepUntil(sleepTarget); ++ MarkSection(implicitFrame, 0, lfx2MarkType::lfx2MarkTypeBegin, TimestampNow()); ++ MarkSection(implicitFrame, 0, lfx2MarkType::lfx2MarkTypeEnd, TimestampNow()); ++ FrameRelease(implicitFrame); ++ } ++ ++ void Lfx2::SetEnabled(bool enabled) { ++ if (enabled && !m_lfxContext) { ++ m_lfxContext = ContextCreate(); ++ } else if (!enabled && m_lfxContext) { ++ if (m_nextFrame) { ++ FrameRelease(m_nextFrame); ++ m_nextFrame = nullptr; ++ } ++ ContextRelease(m_lfxContext); ++ m_lfxContext = nullptr; ++ } ++ } ++ ++ template ++ T Lfx2::GetProcAddress(const char* name) { ++ return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); ++ } ++} +\ No newline at end of file +diff --git a/src/d3d/lfx2.h b/src/d3d/lfx2.h +new file mode 100644 +index 0000000..2e20ad7 +--- /dev/null ++++ b/src/d3d/lfx2.h +@@ -0,0 +1,60 @@ ++#pragma once ++ ++#include "../inc/latencyflex2.h" ++#include "../nvapi_private.h" ++#include "dxvk/dxvk_interfaces.h" ++#include "util/com_pointer.h" ++#include "vkd3d-proton/vkd3d-proton_interfaces.h" ++ ++namespace dxvk { ++ class Lfx2 { ++ public: ++ Lfx2(); ++ virtual ~Lfx2(); ++ ++ [[nodiscard]] virtual bool IsAvailable() const; ++ virtual void Sleep(); ++ virtual void SleepImplicit(Com& extDevice); ++ virtual void Mark(uint64_t frame_id, NV_LATENCY_MARKER_TYPE type, Com& extDevice); ++ virtual void SetEnabled(bool enabled); ++ ++ private: ++#define DECLARE_PFN(x) \ ++ decltype(&::lfx2##x) x {} ++ ++ DECLARE_PFN(ContextCreate); ++ DECLARE_PFN(ContextAddRef); ++ DECLARE_PFN(ContextRelease); ++ DECLARE_PFN(FrameCreate); ++ DECLARE_PFN(FrameAddRef); ++ DECLARE_PFN(FrameRelease); ++ DECLARE_PFN(MarkSection); ++ DECLARE_PFN(SleepUntil); ++ DECLARE_PFN(TimestampNow); ++#ifdef _WIN32 ++ DECLARE_PFN(TimestampFromQpc); ++#endif ++ DECLARE_PFN(ImplicitContextCreate); ++ DECLARE_PFN(ImplicitContextRelease); ++ DECLARE_PFN(ImplicitContextReset); ++ DECLARE_PFN(FrameCreateImplicit); ++ DECLARE_PFN(FrameDequeueImplicit); ++ ++#undef DECLARE_PFN ++ ++ template ++ T GetProcAddress(const char* name); ++ ++ void EnsureFrame(); ++ ++ std::mutex m_frameMapMutex; ++ std::map m_frameMap; ++ std::unordered_map m_callsExpectedByFrame; ++ ++ HMODULE m_lfxModule{}; ++ lfx2Context* m_lfxContext{}; ++ lfx2Frame* m_nextFrame{}; ++ ++ static constexpr uint64_t kMaxInflightFrames = 64; ++ }; ++} +\ No newline at end of file +diff --git a/src/d3d/nvapi_d3d_instance.cpp b/src/d3d/nvapi_d3d_instance.cpp +index 6cdf260..1685953 100644 +--- a/src/d3d/nvapi_d3d_instance.cpp ++++ b/src/d3d/nvapi_d3d_instance.cpp +@@ -1,9 +1,13 @@ + #include "../util/util_log.h" + #include "nvapi_d3d_instance.h" ++#include "util/util_env.h" + + namespace dxvk { + NvapiD3dInstance::NvapiD3dInstance(ResourceFactory& resourceFactory) +- : m_resourceFactory(resourceFactory) {} ++ : m_resourceFactory(resourceFactory) { ++ auto latencyMarkersEnv = env::getEnvVariable("DXVK_NVAPI_USE_LATENCY_MARKERS"); ++ m_useLatencyMarkers = latencyMarkersEnv.empty() || latencyMarkersEnv != "0"; ++ } + + NvapiD3dInstance::~NvapiD3dInstance() = default; + +@@ -17,21 +21,36 @@ namespace dxvk { + return m_lfx->IsAvailable(); + } + ++ Lfx2* NvapiD3dInstance::GetLfx2Instance() const { ++ return &*m_lfx; ++ } ++ + bool NvapiD3dInstance::IsReflexEnabled() const { + return m_isLfxEnabled; + } + + void NvapiD3dInstance::SetReflexEnabled(bool value) { + m_isLfxEnabled = value; ++ m_lfx->SetEnabled(value); + } + +- void NvapiD3dInstance::Sleep() { +- if (m_isLfxEnabled) +- m_lfx->WaitAndBeginFrame(); ++ bool NvapiD3dInstance::UseLatencyMarkers() const { ++ return m_useLatencyMarkers; + } + +- void NvapiD3dInstance::SetTargetFrameTime(uint64_t frameTimeUs) { +- constexpr uint64_t kNanoInMicro = 1000; +- m_lfx->SetTargetFrameTime(frameTimeUs * kNanoInMicro); ++ Com NvapiD3dInstance::GetLfx2DeviceExt(IUnknown* pDevice) { ++ static std::mutex map_mutex; ++ static std::unordered_map cacheMap; ++ ++ std::scoped_lock lock(map_mutex); ++ auto it = cacheMap.find(pDevice); ++ if (it != cacheMap.end()) ++ return it->second; ++ Com lfx2Device; ++ if (FAILED(pDevice->QueryInterface(IID_PPV_ARGS(&lfx2Device)))) ++ lfx2Device = nullptr; ++ ++ cacheMap.emplace(pDevice, lfx2Device.ptr()); ++ return lfx2Device; + } + } +\ No newline at end of file +diff --git a/src/d3d/nvapi_d3d_instance.h b/src/d3d/nvapi_d3d_instance.h +index 5a22371..2d5de3a 100644 +--- a/src/d3d/nvapi_d3d_instance.h ++++ b/src/d3d/nvapi_d3d_instance.h +@@ -13,12 +13,15 @@ namespace dxvk { + [[nodiscard]] bool IsReflexAvailable(); + [[nodiscard]] bool IsReflexEnabled() const; + void SetReflexEnabled(bool value); +- void Sleep(); +- void SetTargetFrameTime(uint64_t frameTimeUs); ++ [[nodiscard]] bool UseLatencyMarkers() const; ++ [[nodiscard]] Lfx2* GetLfx2Instance() const; ++ ++ Com GetLfx2DeviceExt(IUnknown* pDevice); + + private: + ResourceFactory& m_resourceFactory; +- std::unique_ptr m_lfx; ++ std::unique_ptr m_lfx; + bool m_isLfxEnabled = false; ++ bool m_useLatencyMarkers; + }; + } +\ No newline at end of file +diff --git a/src/dxvk/dxvk_interfaces.cpp b/src/dxvk/dxvk_interfaces.cpp +index 734fb75..aefd64a 100644 +--- a/src/dxvk/dxvk_interfaces.cpp ++++ b/src/dxvk/dxvk_interfaces.cpp +@@ -5,5 +5,6 @@ const GUID IDXGIVkInteropFactory1::guid = {0x2a289dbd, 0x2d0a, 0x4a51, {0x89, 0x + const GUID IDXGIVkInteropAdapter::guid = {0x3a6d8f2c, 0xb0e8, 0x4ab4, {0xb4, 0xdc, 0x4f, 0xd2, 0x48, 0x91, 0xbf, 0xa5}}; + const GUID ID3D11VkExtDevice::guid = {0x8a6e3c42, 0xf74c, 0x45b7, {0x82, 0x65, 0xa2, 0x31, 0xb6, 0x77, 0xca, 0x17}}; + const GUID ID3D11VkExtDevice1::guid = {0xcfcf64ef, 0x9586, 0x46d0, {0xbc, 0xa4, 0x97, 0xcf, 0x2c, 0xa6, 0x1b, 0x06}}; ++const GUID ID3DLfx2ExtDevice::guid = {0x851a9f0f, 0x5da0, 0x4850, {0xb5, 0x63, 0xa7, 0xbb, 0xc4, 0x14, 0xf4, 0xe6}}; + const GUID ID3D11VkExtContext::guid = {0xfd0bca13, 0x5cb6, 0x4c3a, {0x98, 0x7e, 0x47, 0x50, 0xde, 0x2c, 0xa7, 0x91}}; + const GUID ID3D11VkExtContext1::guid = {0x874b09b2, 0xae0b, 0x41d8, {0x84, 0x76, 0x5f, 0x3b, 0x7a, 0x0e, 0x87, 0x9d}}; +diff --git a/src/dxvk/dxvk_interfaces.h b/src/dxvk/dxvk_interfaces.h +index 98976b6..2063e54 100644 +--- a/src/dxvk/dxvk_interfaces.h ++++ b/src/dxvk/dxvk_interfaces.h +@@ -17,6 +17,7 @@ enum D3D11_VK_EXTENSION : uint32_t { + D3D11_VK_EXT_BARRIER_CONTROL = 3, + D3D11_VK_NVX_BINARY_IMPORT = 4, + D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5, ++ D3D11_VK_LATENCYFLEX2 = 6, + }; + + enum D3D11_VK_BARRIER_CONTROL : uint32_t { +@@ -117,6 +118,17 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { + uint32_t * pCudaTextureHandle) = 0; + }; + ++MIDL_INTERFACE("851a9f0f-5da0-4850-b563-a7bbc414f4e6") ++ID3DLfx2ExtDevice : public IUnknown { ++ static const GUID guid; ++ ++ virtual void STDMETHODCALLTYPE MarkRenderStart(void* frame) = 0; ++ ++ virtual void STDMETHODCALLTYPE MarkRenderEnd(void* frame) = 0; ++ ++ virtual void STDMETHODCALLTYPE ImplicitBeginFrame(uint64_t * outTimestamp, void* outFrame) = 0; ++}; ++ + MIDL_INTERFACE("fd0bca13-5cb6-4c3a-987e-4750de2ca791") + ID3D11VkExtContext : public IUnknown { + static const GUID guid; +@@ -193,5 +205,6 @@ DXVK_DEFINE_GUID(IDXGIVkInteropFactory1) + DXVK_DEFINE_GUID(IDXGIVkInteropAdapter) + DXVK_DEFINE_GUID(ID3D11VkExtDevice) + DXVK_DEFINE_GUID(ID3D11VkExtDevice1) ++DXVK_DEFINE_GUID(ID3DLfx2ExtDevice) + DXVK_DEFINE_GUID(ID3D11VkExtContext) + DXVK_DEFINE_GUID(ID3D11VkExtContext1) +diff --git a/src/meson.build b/src/meson.build +index 98410c0..1b4f83f 100644 +--- a/src/meson.build ++++ b/src/meson.build +@@ -11,6 +11,7 @@ nvapi_src = files([ + 'sysinfo/nvapi_adapter_registry.cpp', + 'resource_factory.cpp', + 'd3d/lfx.cpp', ++ 'd3d/lfx2.cpp', + 'd3d/nvapi_d3d_instance.cpp', + 'd3d11/nvapi_d3d11_device.cpp', + 'd3d12/nvapi_d3d12_device.cpp', +diff --git a/src/nvapi_d3d.cpp b/src/nvapi_d3d.cpp +index 7f1168b..dfed404 100644 +--- a/src/nvapi_d3d.cpp ++++ b/src/nvapi_d3d.cpp +@@ -1,6 +1,8 @@ + #include "nvapi_private.h" + #include "nvapi_globals.h" + #include "util/util_statuscode.h" ++#include "dxvk/dxvk_interfaces.h" ++#include "d3d12/nvapi_d3d12_device.h" + + extern "C" { + using namespace dxvk; +@@ -115,7 +117,13 @@ extern "C" { + if (!nvapiD3dInstance->IsReflexAvailable()) + return NoImplementation(n, alreadyLoggedNoLfx); + +- nvapiD3dInstance->Sleep(); ++ nvapiD3dInstance->GetLfx2Instance()->Sleep(); ++ if (nvapiD3dInstance->IsReflexEnabled() && !nvapiD3dInstance->UseLatencyMarkers()) { ++ auto lfx2Ext = nvapiD3dInstance->GetLfx2DeviceExt(pDevice); ++ if (lfx2Ext.ptr()) { ++ nvapiD3dInstance->GetLfx2Instance()->SleepImplicit(lfx2Ext); ++ } ++ } + + return Ok(n, alreadyLoggedOk); + } +@@ -124,6 +132,10 @@ extern "C" { + constexpr auto n = __func__; + static bool alreadyLoggedNoLfx = false; + ++ Com context = nvapiD3dInstance->GetLfx2DeviceExt(pDevice); ++ if (!context.ptr()) ++ return NoImplementation(n, alreadyLoggedNoLfx); ++ + if (nvapiAdapterRegistry == nullptr) + return ApiNotInitialized(n); + +@@ -134,10 +146,7 @@ extern "C" { + return NoImplementation(n, alreadyLoggedNoLfx); + + nvapiD3dInstance->SetReflexEnabled(pSetSleepModeParams->bLowLatencyMode); +- if (pSetSleepModeParams->bLowLatencyMode) +- nvapiD3dInstance->SetTargetFrameTime(pSetSleepModeParams->minimumIntervalUs); +- +- return Ok(str::format(n, " (", pSetSleepModeParams->bLowLatencyMode ? (str::format("Enabled/", pSetSleepModeParams->minimumIntervalUs, "us")) : "Disabled", ")")); ++ return Ok(str::format(n, " (", pSetSleepModeParams->bLowLatencyMode ? str::format("Enabled/", pSetSleepModeParams->minimumIntervalUs, "us") : "Disabled", ")")); + } + + NvAPI_Status __cdecl NvAPI_D3D_GetSleepStatus(IUnknown* pDevice, NV_GET_SLEEP_STATUS_PARAMS* pGetSleepStatusParams) { +@@ -164,6 +173,13 @@ extern "C" { + + NvAPI_Status __cdecl NvAPI_D3D_SetLatencyMarker(IUnknown* pDev, NV_LATENCY_MARKER_PARAMS* pSetLatencyMarkerParams) { + static bool alreadyLogged = false; +- return NoImplementation(__func__, alreadyLogged); ++ if (pSetLatencyMarkerParams->version != NV_LATENCY_MARKER_PARAMS_VER1) ++ return IncompatibleStructVersion(__func__); ++ ++ Com context = nvapiD3dInstance->GetLfx2DeviceExt(pDev); ++ Lfx2* lfx2 = nvapiD3dInstance->GetLfx2Instance(); ++ lfx2->Mark(pSetLatencyMarkerParams->frameID, pSetLatencyMarkerParams->markerType, context); ++ ++ return Ok(__func__, alreadyLogged); + } + } +diff --git a/src/nvapi_gpu.cpp b/src/nvapi_gpu.cpp +index 97d3a10..772985f 100644 +--- a/src/nvapi_gpu.cpp ++++ b/src/nvapi_gpu.cpp +@@ -367,77 +367,10 @@ extern "C" { + + NvAPI_Status __cdecl NvAPI_GPU_GetArchInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_ARCH_INFO* pGpuArchInfo) { + constexpr auto n = __func__; +- auto returnAddress = _ReturnAddress(); + +- if (nvapiAdapterRegistry == nullptr) +- return ApiNotInitialized(n); +- +- if (pGpuArchInfo == nullptr) +- return InvalidArgument(n); +- +- auto adapter = reinterpret_cast(hPhysicalGpu); +- if (!nvapiAdapterRegistry->IsAdapter(adapter)) +- return ExpectedPhysicalGpuHandle(n); +- +- if (pGpuArchInfo->version != NV_GPU_ARCH_INFO_VER_1 && pGpuArchInfo->version != NV_GPU_ARCH_INFO_VER_2) +- return IncompatibleStructVersion(n); +- +- auto architectureId = adapter->GetArchitectureId(); +- +- if (env::needsAmpereSpoofing(architectureId, returnAddress)) +- architectureId = NV_GPU_ARCHITECTURE_GA100; +- +- if (env::needsPascalSpoofing(architectureId)) +- architectureId = NV_GPU_ARCHITECTURE_GP100; +- +- // Assume the implementation ID from the architecture ID. No simple way +- // to do a more fine-grained query at this time. Would need wine-nvml +- // usage. +- NV_GPU_ARCH_IMPLEMENTATION_ID implementationId; +- switch (architectureId) { +- case NV_GPU_ARCHITECTURE_AD100: +- implementationId = NV_GPU_ARCH_IMPLEMENTATION_AD102; +- break; +- case NV_GPU_ARCHITECTURE_GA100: +- implementationId = NV_GPU_ARCH_IMPLEMENTATION_GA102; +- break; +- case NV_GPU_ARCHITECTURE_TU100: +- implementationId = NV_GPU_ARCH_IMPLEMENTATION_TU102; +- break; +- case NV_GPU_ARCHITECTURE_GV100: +- implementationId = NV_GPU_ARCH_IMPLEMENTATION_GV100; +- break; +- case NV_GPU_ARCHITECTURE_GP100: +- implementationId = NV_GPU_ARCH_IMPLEMENTATION_GP102; +- break; +- case NV_GPU_ARCHITECTURE_GM200: +- implementationId = NV_GPU_ARCH_IMPLEMENTATION_GM204; +- break; +- case NV_GPU_ARCHITECTURE_GK100: +- implementationId = NV_GPU_ARCH_IMPLEMENTATION_GK104; +- break; +- default: +- return Error(n); +- } +- +- auto revisionId = NV_GPU_CHIP_REV_UNKNOWN; +- +- switch (pGpuArchInfo->version) { +- case NV_GPU_ARCH_INFO_VER_1: { +- auto pGpuArchInfoV1 = reinterpret_cast(pGpuArchInfo); +- pGpuArchInfoV1->architecture = architectureId; +- pGpuArchInfoV1->implementation = implementationId; +- pGpuArchInfoV1->revision = revisionId; +- break; +- } +- case NV_GPU_ARCH_INFO_VER_2: +- pGpuArchInfo->architecture_id = architectureId; +- pGpuArchInfo->implementation_id = implementationId; +- pGpuArchInfo->revision_id = revisionId; +- break; +- default: +- return Error(n); // Unreachable, but just to be sure +- } ++ pGpuArchInfo->architecture_id = NV_GPU_ARCHITECTURE_AD100; ++ pGpuArchInfo->implementation_id = NV_GPU_ARCH_IMPLEMENTATION_AD102; ++ pGpuArchInfo->revision_id = NV_GPU_CHIP_REV_UNKNOWN; + + return Ok(n); + } +diff --git a/src/resource_factory.cpp b/src/resource_factory.cpp +index ce9a7d9..40c8295 100644 +--- a/src/resource_factory.cpp ++++ b/src/resource_factory.cpp +@@ -37,7 +37,7 @@ namespace dxvk { + return std::make_unique(); + } + +- std::unique_ptr ResourceFactory::CreateLfx() { +- return std::make_unique(); ++ std::unique_ptr ResourceFactory::CreateLfx() { ++ return std::make_unique(); + } + } +diff --git a/src/resource_factory.h b/src/resource_factory.h +index eebfbcd..69c2384 100644 +--- a/src/resource_factory.h ++++ b/src/resource_factory.h +@@ -4,7 +4,7 @@ + #include "util/com_pointer.h" + #include "sysinfo/vulkan.h" + #include "sysinfo/nvml.h" +-#include "d3d/lfx.h" ++#include "d3d/lfx2.h" + + namespace dxvk { + class ResourceFactory { +@@ -16,6 +16,6 @@ namespace dxvk { + virtual Com CreateDXGIFactory1(); + virtual std::unique_ptr CreateVulkan(Com& dxgiFactory); + virtual std::unique_ptr CreateNvml(); +- virtual std::unique_ptr CreateLfx(); ++ virtual std::unique_ptr CreateLfx(); + }; + } +-- +2.43.0 + + +From 7004a81746a35b8dc13301431772dbbc36e10fcd Mon Sep 17 00:00:00 2001 +From: FakeMichau <49685661+FakeMichau@users.noreply.github.com> +Date: Sat, 7 Oct 2023 15:12:55 +0200 +Subject: [PATCH 2/3] Remove incomplete support for the first latencyflex + +--- + src/d3d/lfx.cpp | 67 ----------------------------------------- + src/d3d/lfx.h | 26 ---------------- + src/meson.build | 1 - + tests/meson.build | 1 - + tests/mock_factory.h | 5 --- + tests/nvapi_d3d_mocks.h | 7 ----- + 6 files changed, 107 deletions(-) + delete mode 100644 src/d3d/lfx.cpp + delete mode 100644 src/d3d/lfx.h + +diff --git a/src/d3d/lfx.cpp b/src/d3d/lfx.cpp +deleted file mode 100644 +index c5b8ea4..0000000 +--- a/src/d3d/lfx.cpp ++++ /dev/null +@@ -1,67 +0,0 @@ +-#include "lfx.h" +- +-#include "../util/util_string.h" +-#include "../util/util_log.h" +- +-namespace dxvk { +- Lfx::Lfx() { +- const auto lfxModuleName = "latencyflex_layer.dll"; +- const auto lfxModuleNameFallback = "latencyflex_wine.dll"; +- auto useFallbackEntrypoints = false; +- +- m_lfxModule = ::LoadLibraryA(lfxModuleName); +- if (m_lfxModule != nullptr) +- log::write(str::format("Successfully loaded ", lfxModuleName)); +- +- if (m_lfxModule == nullptr && ::GetLastError() == ERROR_MOD_NOT_FOUND) { +- // Try fallback entrypoints. These were used by versions prior to [9c2836f]. +- // The fallback logic can be removed once enough time has passed since the release. +- // [9c2836f]: https://github.com/ishitatsuyuki/LatencyFleX/commit/9c2836faf14196190a915064b53c27e675e47960 +- m_lfxModule = ::LoadLibraryA(lfxModuleNameFallback); +- if (m_lfxModule != nullptr) +- log::write(str::format("Successfully loaded ", lfxModuleNameFallback)); +- +- useFallbackEntrypoints = true; +- } +- +- if (m_lfxModule == nullptr) { +- auto lastError = ::GetLastError(); +- if (lastError != ERROR_MOD_NOT_FOUND) // Ignore library not found +- log::write(str::format("Loading ", !useFallbackEntrypoints ? lfxModuleName : lfxModuleNameFallback, +- " failed with error code: ", lastError)); +- return; +- } +- +- m_lfx_WaitAndBeginFrame = GetProcAddress( +- !useFallbackEntrypoints ? "lfx_WaitAndBeginFrame" : "winelfx_WaitAndBeginFrame"); +- m_lfx_SetTargetFrameTime = GetProcAddress( +- !useFallbackEntrypoints ? "lfx_SetTargetFrameTime" : "winelfx_SetTargetFrameTime"); +- } +- +- Lfx::~Lfx() { +- if (m_lfxModule == nullptr) +- return; +- +- ::FreeLibrary(m_lfxModule); +- m_lfxModule = nullptr; +- } +- +- bool Lfx::IsAvailable() const { +- return m_lfxModule != nullptr; +- } +- +- void Lfx::WaitAndBeginFrame() { +- if (m_lfx_WaitAndBeginFrame) +- m_lfx_WaitAndBeginFrame(); +- } +- +- void Lfx::SetTargetFrameTime(uint64_t frame_time_ns) { +- if (m_lfx_SetTargetFrameTime) +- m_lfx_SetTargetFrameTime(static_cast<__int64>(frame_time_ns)); +- } +- +- template +- T Lfx::GetProcAddress(const char* name) { +- return reinterpret_cast(reinterpret_cast(::GetProcAddress(m_lfxModule, name))); +- } +-} +\ No newline at end of file +diff --git a/src/d3d/lfx.h b/src/d3d/lfx.h +deleted file mode 100644 +index daff36e..0000000 +--- a/src/d3d/lfx.h ++++ /dev/null +@@ -1,26 +0,0 @@ +-#pragma once +- +-#include "../nvapi_private.h" +- +-namespace dxvk { +- class Lfx { +- public: +- Lfx(); +- virtual ~Lfx(); +- +- [[nodiscard]] virtual bool IsAvailable() const; +- virtual void WaitAndBeginFrame(); +- virtual void SetTargetFrameTime(uint64_t frame_time_ns); +- +- private: +- using PFN_lfx_WaitAndBeginFrame = void (*)(); +- using PFN_lfx_SetTargetFrameTime = void (*)(__int64); +- +- HMODULE m_lfxModule{}; +- PFN_lfx_WaitAndBeginFrame m_lfx_WaitAndBeginFrame{}; +- PFN_lfx_SetTargetFrameTime m_lfx_SetTargetFrameTime{}; +- +- template +- T GetProcAddress(const char* name); +- }; +-} +\ No newline at end of file +diff --git a/src/meson.build b/src/meson.build +index 1b4f83f..5b4055b 100644 +--- a/src/meson.build ++++ b/src/meson.build +@@ -10,7 +10,6 @@ nvapi_src = files([ + 'sysinfo/nvapi_adapter.cpp', + 'sysinfo/nvapi_adapter_registry.cpp', + 'resource_factory.cpp', +- 'd3d/lfx.cpp', + 'd3d/lfx2.cpp', + 'd3d/nvapi_d3d_instance.cpp', + 'd3d11/nvapi_d3d11_device.cpp', +diff --git a/tests/meson.build b/tests/meson.build +index 5aca1d5..95b4d0e 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -6,7 +6,6 @@ nvapi_src = files([ + '../src/util/util_log.cpp', + '../src/sysinfo/vulkan.cpp', + '../src/sysinfo/nvml.cpp', +- '../src/d3d/lfx.cpp', + '../src/d3d/nvapi_d3d_instance.cpp', + '../src/sysinfo/nvapi_output.cpp', + '../src/sysinfo/nvapi_adapter.cpp', +diff --git a/tests/mock_factory.h b/tests/mock_factory.h +index 0f41f74..67852f9 100644 +--- a/tests/mock_factory.h ++++ b/tests/mock_factory.h +@@ -30,10 +30,6 @@ class MockFactory : public dxvk::ResourceFactory { + return std::move(m_nvmlMock); + } + +- std::unique_ptr CreateLfx() override { +- return std::move(m_lfxMock); +- } +- + [[nodiscard]] std::array, 1> ConfigureAllowRelease() { + return { + NAMED_ALLOW_CALL(*m_dxgiFactoryMock, Release()) +@@ -44,5 +40,4 @@ class MockFactory : public dxvk::ResourceFactory { + std::unique_ptr m_dxgiFactoryMock; + std::unique_ptr m_vulkanMock; + std::unique_ptr m_nvmlMock; +- std::unique_ptr m_lfxMock; + }; +diff --git a/tests/nvapi_d3d_mocks.h b/tests/nvapi_d3d_mocks.h +index d1e7ac7..b389882 100644 +--- a/tests/nvapi_d3d_mocks.h ++++ b/tests/nvapi_d3d_mocks.h +@@ -1,16 +1,9 @@ + #pragma once + + #include "nvapi_tests_private.h" +-#include "../src/d3d/lfx.h" + + class UnknownMock : public trompeloeil::mock_interface { + MAKE_MOCK2(QueryInterface, HRESULT(REFIID, void**), override); + MAKE_MOCK0(AddRef, ULONG(), override); + MAKE_MOCK0(Release, ULONG(), override); + }; +- +-class LfxMock : public trompeloeil::mock_interface { +- IMPLEMENT_CONST_MOCK0(IsAvailable); +- IMPLEMENT_MOCK0(WaitAndBeginFrame); +- IMPLEMENT_MOCK1(SetTargetFrameTime); +-}; +-- +2.43.0 + + +From b4d01b2ce7367d0be6cfe9cfcf6f0d1884e80ccc Mon Sep 17 00:00:00 2001 +From: FakeMichau <49685661+FakeMichau@users.noreply.github.com> +Date: Wed, 7 Feb 2024 16:39:27 +0100 +Subject: [PATCH 3/3] Disable LFX2 when WINEHAGS is enabled + +--- + src/d3d/lfx2.cpp | 6 +++++- + src/d3d/lfx2.h | 1 + + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/src/d3d/lfx2.cpp b/src/d3d/lfx2.cpp +index b75cf0d..a726afa 100644 +--- a/src/d3d/lfx2.cpp ++++ b/src/d3d/lfx2.cpp +@@ -1,12 +1,16 @@ + #include "lfx2.h" + #include "util/util_log.h" + #include "util/util_string.h" ++#include "util/util_env.h" + + namespace dxvk { + + Lfx2::Lfx2() { + const auto lfxModuleName = "latencyflex2_rust.dll"; + ++ auto hagsEnv = env::getEnvVariable("WINEHAGS"); ++ m_FG = !hagsEnv.empty() && hagsEnv != "0"; ++ + m_lfxModule = ::LoadLibraryA(lfxModuleName); + if (m_lfxModule == nullptr) { + auto lastError = ::GetLastError(); +@@ -70,7 +74,7 @@ namespace dxvk { + } + + void Lfx2::Mark(uint64_t frame_id, NV_LATENCY_MARKER_TYPE type, Com& extDevice) { +- if (!m_lfxContext) ++ if (!m_lfxContext || m_FG) + return; + + uint32_t section; +diff --git a/src/d3d/lfx2.h b/src/d3d/lfx2.h +index 2e20ad7..6221d5b 100644 +--- a/src/d3d/lfx2.h ++++ b/src/d3d/lfx2.h +@@ -47,6 +47,7 @@ namespace dxvk { + + void EnsureFrame(); + ++ bool m_FG; + std::mutex m_frameMapMutex; + std::map m_frameMap; + std::unordered_map m_callsExpectedByFrame; +-- +2.43.0 + diff --git a/patches/lfx2-vkd3d.patch b/patches/lfx2-vkd3d.patch new file mode 100644 index 0000000000..56d332a813 --- /dev/null +++ b/patches/lfx2-vkd3d.patch @@ -0,0 +1,616 @@ +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +new file mode 100644 +index 00000000..6bdc1f84 +--- /dev/null ++++ b/include/latencyflex2.h +@@ -0,0 +1,129 @@ ++#ifndef LATENCYFLEX2_H ++#define LATENCYFLEX2_H ++ ++#define LFX2_DX12 ++ ++#include ++#include ++#include ++#include ++#include ++#ifdef LFX2_DX12 ++#include ++#endif ++ ++#ifdef _WIN32 ++#define LFX2_API __declspec(dllimport) ++#else ++#define LFX2_API ++#endif ++ ++typedef struct lfx2Dx12SubmitAux { ++ ID3D12GraphicsCommandList* executeBefore; ++ ID3D12GraphicsCommandList* executeAfter; ++ ID3D12Fence* fence; ++ uint64_t fenceValue; ++} lfx2Dx12SubmitAux; ++ ++ ++typedef enum lfx2MarkType { ++ lfx2MarkTypeBegin, ++ lfx2MarkTypeEnd, ++} lfx2MarkType; ++ ++typedef struct lfx2Context lfx2Context; ++ ++#if defined(LFX2_DX12) ++typedef struct lfx2Dx12Context lfx2Dx12Context; ++#endif ++ ++/** ++ * A write handle for frame markers. ++ */ ++typedef struct lfx2Frame lfx2Frame; ++ ++typedef struct lfx2ImplicitContext lfx2ImplicitContext; ++ ++typedef uint64_t lfx2Timestamp; ++ ++typedef uint32_t lfx2SectionId; ++ ++#ifdef __cplusplus ++extern "C" { ++#endif // __cplusplus ++ ++#if defined(LFX2_DX12) ++LFX2_API struct lfx2Dx12Context *lfx2Dx12ContextCreate(ID3D12Device* device); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextAddRef(struct lfx2Dx12Context *context); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextRelease(struct lfx2Dx12Context *context); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API ++lfx2Dx12SubmitAux lfx2Dx12ContextBeforeSubmit(struct lfx2Dx12Context *context, ++ ID3D12CommandQueue* queue); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextBeginFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextEndFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++#endif ++ ++LFX2_API lfx2Timestamp lfx2TimestampNow(void); ++ ++#if defined(_WIN32) ++LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); ++#endif ++ ++LFX2_API void lfx2SleepUntil(lfx2Timestamp target); ++ ++LFX2_API struct lfx2Context *lfx2ContextCreate(void); ++ ++LFX2_API void lfx2ContextAddRef(struct lfx2Context *context); ++ ++LFX2_API void lfx2ContextRelease(struct lfx2Context *context); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameCreate(struct lfx2Context *context, ++ lfx2Timestamp *out_timestamp); ++ ++LFX2_API void lfx2FrameAddRef(struct lfx2Frame *frame); ++ ++LFX2_API void lfx2FrameRelease(struct lfx2Frame *frame); ++ ++LFX2_API ++void lfx2MarkSection(struct lfx2Frame *frame, ++ lfx2SectionId section_id, ++ enum lfx2MarkType mark_type, ++ lfx2Timestamp timestamp); ++ ++LFX2_API struct lfx2ImplicitContext *lfx2ImplicitContextCreate(void); ++ ++LFX2_API void lfx2ImplicitContextAddRef(struct lfx2ImplicitContext *context); ++ ++LFX2_API void lfx2ImplicitContextRelease(struct lfx2ImplicitContext *context); ++ ++LFX2_API void lfx2ImplicitContextReset(struct lfx2ImplicitContext *context); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameCreateImplicit(struct lfx2ImplicitContext *context, ++ lfx2Timestamp *out_timestamp); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameDequeueImplicit(struct lfx2ImplicitContext *context, ++ bool critical); ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif // __cplusplus ++ ++#endif /* LATENCYFLEX2_H */ +diff --git a/include/vkd3d_device_vkd3d_ext.idl b/include/vkd3d_device_vkd3d_ext.idl +index 093c96c5..9e3e666b 100644 +--- a/include/vkd3d_device_vkd3d_ext.idl ++++ b/include/vkd3d_device_vkd3d_ext.idl +@@ -81,3 +81,16 @@ interface ID3D12DeviceExt1 : ID3D12DeviceExt + HRESULT CreateResourceFromBorrowedHandle(const D3D12_RESOURCE_DESC1 *desc, UINT64 vk_handle, ID3D12Resource **resource); + HRESULT GetVulkanQueueInfoEx(ID3D12CommandQueue *queue, VkQueue *vk_queue, UINT32 *vk_queue_index, UINT32 *vk_queue_flags, UINT32 *vk_queue_family); + } ++ ++[ ++ uuid(851a9f0f-5da0-4850-b563-a7bbc414f4e6), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3DLfx2ExtDevice : IUnknown ++{ ++ void MarkRenderStart(void *frame); ++ void MarkRenderEnd(void *frame); ++ void ImplicitBeginFrame(UINT64 *out_timestamp, void *out_frame); ++} +\ No newline at end of file +diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c +index 288e7e24..cdf7239d 100644 +--- a/libs/vkd3d/command.c ++++ b/libs/vkd3d/command.c +@@ -17321,6 +17321,75 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + d3d12_command_queue_add_submission(command_queue, &sub); + } + ++static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ++ ID3D12Fence *fence_iface, UINT64 value); ++ ++static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandListsLFX2(ID3D12CommandQueue *iface, ++ UINT command_list_count, ID3D12CommandList *const *command_lists) ++{ ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ struct vkd3d_lfx2_context *lfx2_context = &command_queue->device->lfx2_context; ++ UINT new_command_list_count = command_list_count, i = 0; ++ ID3D12CommandList **new_command_lists; ++ struct lfx2Dx12SubmitAux lfx2_aux; ++ ++ if (!lfx2 || command_queue->desc.Type != D3D12_COMMAND_LIST_TYPE_DIRECT) ++ { ++ d3d12_command_queue_ExecuteCommandLists(iface, command_list_count, command_lists); ++ return; ++ } ++ ++ pthread_mutex_lock(&lfx2_context->current_implicit_frame_lock); ++ if (!lfx2_context->current_implicit_frame) ++ { ++ lfx2_context->current_implicit_frame = lfx2->FrameDequeueImplicit(lfx2_context->implicit_context, false); ++ if (lfx2_context->current_implicit_frame) { ++ lfx2->Dx12ContextBeginFrame(lfx2_context->dx12_context, lfx2_context->current_implicit_frame); ++ } ++ } ++ pthread_mutex_unlock(&lfx2_context->current_implicit_frame_lock); ++ ++ lfx2_aux = lfx2->Dx12ContextBeforeSubmit(lfx2_context->dx12_context, iface); ++ ++ if (lfx2_aux.executeBefore) ++ new_command_list_count++; ++ ++ if (lfx2_aux.executeAfter) ++ new_command_list_count++; ++ ++ if (!(new_command_lists = vkd3d_calloc(new_command_list_count, sizeof(*new_command_lists)))) ++ { ++ ERR("Failed to allocate command list array."); ++ return; ++ } ++ ++ if (lfx2_aux.executeBefore) ++ new_command_lists[i++] = (ID3D12CommandList *)lfx2_aux.executeBefore; ++ ++ memcpy(&new_command_lists[i], command_lists, command_list_count * sizeof(*command_lists)); ++ i += command_list_count; ++ ++ if (lfx2_aux.executeAfter) ++ new_command_lists[i++] = (ID3D12CommandList *)lfx2_aux.executeAfter; ++ ++ d3d12_command_queue_ExecuteCommandLists(iface, new_command_list_count, new_command_lists); ++ ++ if (lfx2_aux.executeBefore) ++ d3d12_command_list_Release((d3d12_command_list_iface *)lfx2_aux.executeBefore); ++ ++ if (lfx2_aux.executeAfter) ++ d3d12_command_list_Release((d3d12_command_list_iface *)lfx2_aux.executeAfter); ++ ++ vkd3d_free(new_command_lists); ++ ++ if (lfx2_aux.fence) ++ { ++ d3d12_command_queue_Signal(iface, lfx2_aux.fence, lfx2_aux.fenceValue); ++ d3d12_fence_Release((d3d12_fence_iface *)lfx2_aux.fence); ++ } ++} ++ + static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface, + UINT metadata, const void *data, UINT size) + { +@@ -17494,7 +17563,7 @@ static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = + /* ID3D12CommandQueue methods */ + d3d12_command_queue_UpdateTileMappings, + d3d12_command_queue_CopyTileMappings, +- d3d12_command_queue_ExecuteCommandLists, ++ d3d12_command_queue_ExecuteCommandListsLFX2, + d3d12_command_queue_SetMarker, + d3d12_command_queue_BeginEvent, + d3d12_command_queue_EndEvent, +diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c +index 9163cb8f..55da65ee 100644 +--- a/libs/vkd3d/device.c ++++ b/libs/vkd3d/device.c +@@ -3408,6 +3408,9 @@ extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ + extern ULONG STDMETHODCALLTYPE d3d12_dxvk_interop_device_AddRef(ID3D12DXVKInteropDevice *iface); + extern ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(ID3DLowLatencyDevice *iface); + ++/* ID3DLfx2ExtDevice */ ++extern ULONG STDMETHODCALLTYPE d3d12_device_lfx2_ext_AddRef(d3d12_device_lfx2_ext_iface *iface); ++ + HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + REFIID riid, void **object) + { +@@ -3462,6 +3465,14 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + return S_OK; + } + ++ if (IsEqualGUID(riid, &IID_ID3DLfx2ExtDevice)) ++ { ++ struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ d3d12_device_lfx2_ext_AddRef(&device->ID3D12DeviceLfx2_iface); ++ *object = &device->ID3D12DeviceLfx2_iface; ++ return S_OK; ++ } ++ + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; +@@ -3508,6 +3519,7 @@ static void d3d12_device_destroy(struct d3d12_device *device) + vkd3d_private_store_destroy(&device->private_store); + + vkd3d_cleanup_format_info(device); ++ vkd3d_lfx2_context_free(&device->lfx2_context); + vkd3d_memory_info_cleanup(&device->memory_info, device); + vkd3d_address_binding_tracker_cleanup(&device->address_binding_tracker, device); + vkd3d_queue_timeline_trace_cleanup(&device->queue_timeline_trace); +@@ -7257,7 +7269,7 @@ static D3D12_RESOURCE_STATES vkd3d_barrier_layout_to_resource_state(D3D12_BARRIE + } + + static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource3(d3d12_device_iface *iface, +- const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, ++ const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC1 *desc, D3D12_BARRIER_LAYOUT initial_layout, + const D3D12_CLEAR_VALUE *optimized_clear_value, ID3D12ProtectedResourceSession *protected_session, + UINT32 num_castable_formats, const DXGI_FORMAT *castable_formats, REFIID iid, void **resource) +@@ -8642,6 +8654,7 @@ static void vkd3d_scratch_pool_init(struct d3d12_device *device) + device->scratch_pools[VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS].scratch_buffer_size = + VKD3D_SCRATCH_BUFFER_COUNT_INDIRECT_PREPROCESS; + } ++extern CONST_VTBL struct ID3DLfx2ExtDeviceVtbl d3d12_device_lfx2_ext_vtbl; + + static HRESULT d3d12_device_init(struct d3d12_device *device, + struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info) +@@ -8689,6 +8702,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + + device->ID3D12DeviceExt_iface.lpVtbl = &d3d12_device_vkd3d_ext_vtbl; + device->ID3D12DXVKInteropDevice_iface.lpVtbl = &d3d12_dxvk_interop_device_vtbl; ++ device->ID3D12DeviceLfx2_iface.lpVtbl = &d3d12_device_lfx2_ext_vtbl; + device->ID3DLowLatencyDevice_iface.lpVtbl = &d3d_low_latency_device_vtbl; + + if ((rc = rwlock_init(&device->vertex_input_lock))) +@@ -8759,6 +8773,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + } + #endif + ++ vkd3d_lfx2_context_init(&device->lfx2_context, (d3d12_device_iface *)device); ++ + if (vkd3d_descriptor_debug_active_qa_checks()) + { + if (FAILED(hr = vkd3d_descriptor_debug_alloc_global_info(&device->descriptor_qa_global_info, +@@ -8800,6 +8816,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + out_cleanup_descriptor_qa_global_info: + vkd3d_descriptor_debug_free_global_info(device->descriptor_qa_global_info, device); + out_cleanup_breadcrumb_tracer: ++ vkd3d_lfx2_context_free(&device->lfx2_context); + #ifdef VKD3D_ENABLE_BREADCRUMBS + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) + vkd3d_breadcrumb_tracer_cleanup(&device->breadcrumb_tracer, device); +diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c +index 9b84c296..78f41303 100644 +--- a/libs/vkd3d/device_vkd3d_ext.c ++++ b/libs/vkd3d/device_vkd3d_ext.c +@@ -268,6 +268,65 @@ CONST_VTBL struct ID3D12DeviceExt1Vtbl d3d12_device_vkd3d_ext_vtbl = + d3d12_device_vkd3d_ext_GetVulkanQueueInfoEx, + }; + ++static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceLfx2(d3d12_device_lfx2_ext_iface *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12DeviceLfx2_iface); ++} ++ ++ULONG STDMETHODCALLTYPE d3d12_device_lfx2_ext_AddRef(d3d12_device_lfx2_ext_iface *iface) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ return d3d12_device_add_ref(device); ++} ++ ++static ULONG STDMETHODCALLTYPE d3d12_device_lfx2_ext_Release(d3d12_device_lfx2_ext_iface *iface) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ return d3d12_device_release(device); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_lfx2_ext_QueryInterface(d3d12_device_lfx2_ext_iface *iface, REFIID iid, void **out) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); ++ return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_lfx2_ext_ImplicitBeginFrame(d3d12_device_lfx2_ext_iface *iface, UINT64 *out_timestamp, ++ void *out_frame) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ ++ *(lfx2Frame**)out_frame = lfx2->FrameCreateImplicit(device->lfx2_context.implicit_context, out_timestamp); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_lfx2_ext_MarkRenderStart(d3d12_device_lfx2_ext_iface *iface, void *frame) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ lfx2->Dx12ContextBeginFrame(device->lfx2_context.dx12_context, (lfx2Frame*)frame); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_lfx2_ext_MarkRenderEnd(d3d12_device_lfx2_ext_iface *iface, void *frame) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ lfx2->Dx12ContextEndFrame(device->lfx2_context.dx12_context, (lfx2Frame*)frame); ++} ++ ++CONST_VTBL struct ID3DLfx2ExtDeviceVtbl d3d12_device_lfx2_ext_vtbl = ++{ ++ /* IUnknown methods */ ++ d3d12_device_lfx2_ext_QueryInterface, ++ d3d12_device_lfx2_ext_AddRef, ++ d3d12_device_lfx2_ext_Release, ++ ++ /* ID3D12DeviceLfx2 methods */ ++ d3d12_device_lfx2_ext_MarkRenderStart, ++ d3d12_device_lfx2_ext_MarkRenderEnd, ++ d3d12_device_lfx2_ext_ImplicitBeginFrame, ++}; + + static inline struct d3d12_device *d3d12_device_from_ID3D12DXVKInteropDevice(ID3D12DXVKInteropDevice *iface) + { +diff --git a/libs/vkd3d/lfx2.c b/libs/vkd3d/lfx2.c +new file mode 100644 +index 00000000..5fd50b91 +--- /dev/null ++++ b/libs/vkd3d/lfx2.c +@@ -0,0 +1,70 @@ ++#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API ++ ++#include "vkd3d_private.h" ++ ++static pthread_once_t library_once = PTHREAD_ONCE_INIT; ++static struct vkd3d_lfx2_vtable lfx2_vtable; ++static BOOL lfx2_available; ++ ++static void vkd3d_lfx2_load(void) ++{ ++ HMODULE module = LoadLibraryA("latencyflex2_rust.dll"); ++ if (!module) ++ { ++ lfx2_available = false; ++ return; ++ } ++ ++#define LOAD_FUNCTION(name) lfx2_vtable.name = (void *)GetProcAddress(module, "lfx2" #name) ++ ++ LOAD_FUNCTION(Dx12ContextCreate); ++ LOAD_FUNCTION(Dx12ContextRelease); ++ LOAD_FUNCTION(Dx12ContextBeforeSubmit); ++ LOAD_FUNCTION(Dx12ContextBeginFrame); ++ LOAD_FUNCTION(Dx12ContextEndFrame); ++ LOAD_FUNCTION(TimestampNow); ++ LOAD_FUNCTION(TimestampFromQpc); ++ LOAD_FUNCTION(ImplicitContextCreate); ++ LOAD_FUNCTION(ImplicitContextRelease); ++ LOAD_FUNCTION(ImplicitContextReset); ++ LOAD_FUNCTION(FrameCreateImplicit); ++ LOAD_FUNCTION(FrameDequeueImplicit); ++ LOAD_FUNCTION(FrameRelease); ++ ++#undef LOAD_FUNCTION ++ ++ lfx2_available = true; ++} ++ ++struct vkd3d_lfx2_vtable *vkd3d_lfx2_get_vtable(void) ++{ ++ pthread_once(&library_once, vkd3d_lfx2_load); ++ return lfx2_available ? &lfx2_vtable : NULL; ++} ++ ++void vkd3d_lfx2_context_init(struct vkd3d_lfx2_context *context, d3d12_device_iface *device) ++{ ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ if (!lfx2) ++ return; ++ ++ pthread_mutex_init(&context->current_implicit_frame_lock, NULL); ++ context->current_implicit_frame = NULL; ++ context->dx12_context = lfx2->Dx12ContextCreate((ID3D12Device *)device); ++ context->implicit_context = lfx2->ImplicitContextCreate(); ++} ++ ++void vkd3d_lfx2_context_free(struct vkd3d_lfx2_context *context) ++{ ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ if (!lfx2) ++ return; ++ ++ if (context->current_implicit_frame) ++ lfx2->FrameRelease(context->current_implicit_frame); ++ if (context->implicit_context) ++ lfx2->ImplicitContextRelease(context->implicit_context); ++ if (context->dx12_context) ++ lfx2->Dx12ContextRelease(context->dx12_context); ++ pthread_mutex_destroy(&context->current_implicit_frame_lock); ++} +\ No newline at end of file +diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build +index a5c965f3..3b28e100 100644 +--- a/libs/vkd3d/meson.build ++++ b/libs/vkd3d/meson.build +@@ -77,7 +77,8 @@ vkd3d_src = [ + 'acceleration_structure.c', + 'swapchain.c', + 'queue_timeline.c', +- 'address_binding_tracker.c' ++ 'address_binding_tracker.c', ++ 'lfx2.c' + ] + + if enable_renderdoc +diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c +index c4f0c2cc..6f41ca1e 100644 +--- a/libs/vkd3d/swapchain.c ++++ b/libs/vkd3d/swapchain.c +@@ -955,6 +955,8 @@ static void dxgi_vk_swap_chain_wait_internal_handle(struct dxgi_vk_swap_chain *c + static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *iface, UINT SyncInterval, UINT PresentFlags, const DXGI_PRESENT_PARAMETERS *pPresentParameters) + { + struct dxgi_vk_swap_chain *chain = impl_from_IDXGIVkSwapChain(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ struct vkd3d_lfx2_context *lfx2_context = &chain->queue->device->lfx2_context; + struct dxgi_vk_swap_chain_present_request *request; + struct vkd3d_queue_timeline_trace_cookie cookie; + bool low_latency_enable; +@@ -968,6 +970,22 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if + if (PresentFlags & DXGI_PRESENT_TEST) + return S_OK; + ++ pthread_mutex_lock(&lfx2_context->current_implicit_frame_lock); ++ if (!lfx2_context->current_implicit_frame) ++ { ++ lfx2_context->current_implicit_frame = lfx2->FrameDequeueImplicit(lfx2_context->implicit_context, true); ++ if (lfx2_context->current_implicit_frame) { ++ lfx2->Dx12ContextBeginFrame(lfx2_context->dx12_context, lfx2_context->current_implicit_frame); ++ } ++ } ++ if (lfx2_context->current_implicit_frame) ++ { ++ lfx2->Dx12ContextEndFrame(lfx2_context->dx12_context, lfx2_context->current_implicit_frame); ++ lfx2->FrameRelease(lfx2_context->current_implicit_frame); ++ lfx2_context->current_implicit_frame = NULL; ++ } ++ pthread_mutex_unlock(&lfx2_context->current_implicit_frame_lock); ++ + /* If we missed the event signal last frame, we have to wait for it now. + * Otherwise, we end up in a floating state where our waits and thread signals might not stay in sync anymore. */ + if (chain->outstanding_present_request) +@@ -1654,6 +1672,7 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk + VkSwapchainLatencyCreateInfoNV swapchain_latency_create_info; + VkSwapchainPresentModesCreateInfoEXT present_modes_info; + VkDevice vk_device = chain->queue->device->vk_device; ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); + VkCommandPoolCreateInfo command_pool_create_info; + VkSwapchainCreateInfoKHR swapchain_create_info; + VkPresentModeKHR present_mode_group[2]; +@@ -1673,6 +1692,9 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk + if (chain->present.is_surface_lost) + return; + ++ if (lfx2) ++ lfx2->ImplicitContextReset(chain->queue->device->lfx2_context.implicit_context); ++ + /* If we fail to query formats we are hosed, treat it as a SURFACE_LOST scenario. */ + if (!dxgi_vk_swap_chain_update_formats(chain)) + { +diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h +index 64858c37..8fd577e4 100644 +--- a/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/vkd3d_private.h +@@ -48,6 +48,8 @@ + #include + #include + ++#include "latencyflex2.h" ++ + #define VK_CALL(f) (vk_procs->f) + + #define MAKE_MAGIC(a,b,c,d) (((uint32_t)a) | (((uint32_t)b) << 8) | (((uint32_t)c) << 16) | (((uint32_t)d) << 24)) +@@ -4533,6 +4535,13 @@ struct vkd3d_device_frame_markers + uint64_t consumed_present_id; + }; + ++struct vkd3d_lfx2_context { ++ pthread_mutex_t current_implicit_frame_lock; ++ lfx2ImplicitContext *implicit_context; ++ lfx2Dx12Context *dx12_context; ++ lfx2Frame *current_implicit_frame; ++}; ++ + /* ID3D12Device */ + typedef ID3D12Device12 d3d12_device_iface; + +@@ -4541,6 +4550,7 @@ struct vkd3d_descriptor_qa_heap_buffer_data; + + /* ID3D12DeviceExt */ + typedef ID3D12DeviceExt1 d3d12_device_vkd3d_ext_iface; ++typedef ID3DLfx2ExtDevice d3d12_device_lfx2_ext_iface; + + /* ID3D12DXVKInteropDevice */ + typedef ID3D12DXVKInteropDevice d3d12_dxvk_interop_device_iface; +@@ -4766,6 +4776,7 @@ struct d3d12_device + d3d12_device_vkd3d_ext_iface ID3D12DeviceExt_iface; + d3d12_dxvk_interop_device_iface ID3D12DXVKInteropDevice_iface; + d3d_low_latency_device_iface ID3DLowLatencyDevice_iface; ++ d3d12_device_lfx2_ext_iface ID3D12DeviceLfx2_iface; + LONG refcount; + + VkDevice vk_device; +@@ -4831,6 +4842,7 @@ struct d3d12_device + struct hash_map vertex_input_pipelines; + rwlock_t fragment_output_lock; + struct hash_map fragment_output_pipelines; ++ struct vkd3d_lfx2_context lfx2_context; + #ifdef VKD3D_ENABLE_BREADCRUMBS + struct vkd3d_breadcrumb_tracer breadcrumb_tracer; + #endif +@@ -5736,4 +5748,24 @@ static inline const void *vk_find_pnext(const void *pnext, VkStructureType sType + return base_in; + } + ++struct vkd3d_lfx2_vtable { ++ struct lfx2Dx12Context *(*Dx12ContextCreate)(ID3D12Device *device); ++ void (*Dx12ContextRelease)(struct lfx2Dx12Context *context); ++ lfx2Dx12SubmitAux (*Dx12ContextBeforeSubmit)(struct lfx2Dx12Context *context, ID3D12CommandQueue *queue); ++ void (*Dx12ContextBeginFrame)(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++ void (*Dx12ContextEndFrame)(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++ lfx2Timestamp (*TimestampNow)(void); ++ lfx2Timestamp (*TimestampFromQpc)(uint64_t qpc); ++ struct lfx2ImplicitContext *(*ImplicitContextCreate)(void); ++ void (*ImplicitContextRelease)(struct lfx2ImplicitContext *context); ++ void (*ImplicitContextReset)(struct lfx2ImplicitContext *context); ++ struct lfx2Frame *(*FrameCreateImplicit)(struct lfx2ImplicitContext *context, lfx2Timestamp *out_timestamp); ++ struct lfx2Frame *(*FrameDequeueImplicit)(struct lfx2ImplicitContext *context, bool critical); ++ void (*FrameRelease)(struct lfx2Frame *frame); ++}; ++ ++struct vkd3d_lfx2_vtable *vkd3d_lfx2_get_vtable(void); ++void vkd3d_lfx2_context_init(struct vkd3d_lfx2_context *context, d3d12_device_iface *device); ++void vkd3d_lfx2_context_free(struct vkd3d_lfx2_context *context); ++ + #endif /* __VKD3D_PRIVATE_H */ diff --git a/patches/lfx2-vkd3d.patch.old b/patches/lfx2-vkd3d.patch.old new file mode 100644 index 0000000000..bbf76e6900 --- /dev/null +++ b/patches/lfx2-vkd3d.patch.old @@ -0,0 +1,616 @@ +diff --git a/include/latencyflex2.h b/include/latencyflex2.h +new file mode 100644 +index 00000000..6bdc1f84 +--- /dev/null ++++ b/include/latencyflex2.h +@@ -0,0 +1,129 @@ ++#ifndef LATENCYFLEX2_H ++#define LATENCYFLEX2_H ++ ++#define LFX2_DX12 ++ ++#include ++#include ++#include ++#include ++#include ++#ifdef LFX2_DX12 ++#include ++#endif ++ ++#ifdef _WIN32 ++#define LFX2_API __declspec(dllimport) ++#else ++#define LFX2_API ++#endif ++ ++typedef struct lfx2Dx12SubmitAux { ++ ID3D12GraphicsCommandList* executeBefore; ++ ID3D12GraphicsCommandList* executeAfter; ++ ID3D12Fence* fence; ++ uint64_t fenceValue; ++} lfx2Dx12SubmitAux; ++ ++ ++typedef enum lfx2MarkType { ++ lfx2MarkTypeBegin, ++ lfx2MarkTypeEnd, ++} lfx2MarkType; ++ ++typedef struct lfx2Context lfx2Context; ++ ++#if defined(LFX2_DX12) ++typedef struct lfx2Dx12Context lfx2Dx12Context; ++#endif ++ ++/** ++ * A write handle for frame markers. ++ */ ++typedef struct lfx2Frame lfx2Frame; ++ ++typedef struct lfx2ImplicitContext lfx2ImplicitContext; ++ ++typedef uint64_t lfx2Timestamp; ++ ++typedef uint32_t lfx2SectionId; ++ ++#ifdef __cplusplus ++extern "C" { ++#endif // __cplusplus ++ ++#if defined(LFX2_DX12) ++LFX2_API struct lfx2Dx12Context *lfx2Dx12ContextCreate(ID3D12Device* device); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextAddRef(struct lfx2Dx12Context *context); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextRelease(struct lfx2Dx12Context *context); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API ++lfx2Dx12SubmitAux lfx2Dx12ContextBeforeSubmit(struct lfx2Dx12Context *context, ++ ID3D12CommandQueue* queue); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextBeginFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++#endif ++ ++#if defined(LFX2_DX12) ++LFX2_API void lfx2Dx12ContextEndFrame(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++#endif ++ ++LFX2_API lfx2Timestamp lfx2TimestampNow(void); ++ ++#if defined(_WIN32) ++LFX2_API lfx2Timestamp lfx2TimestampFromQpc(uint64_t qpc); ++#endif ++ ++LFX2_API void lfx2SleepUntil(lfx2Timestamp target); ++ ++LFX2_API struct lfx2Context *lfx2ContextCreate(void); ++ ++LFX2_API void lfx2ContextAddRef(struct lfx2Context *context); ++ ++LFX2_API void lfx2ContextRelease(struct lfx2Context *context); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameCreate(struct lfx2Context *context, ++ lfx2Timestamp *out_timestamp); ++ ++LFX2_API void lfx2FrameAddRef(struct lfx2Frame *frame); ++ ++LFX2_API void lfx2FrameRelease(struct lfx2Frame *frame); ++ ++LFX2_API ++void lfx2MarkSection(struct lfx2Frame *frame, ++ lfx2SectionId section_id, ++ enum lfx2MarkType mark_type, ++ lfx2Timestamp timestamp); ++ ++LFX2_API struct lfx2ImplicitContext *lfx2ImplicitContextCreate(void); ++ ++LFX2_API void lfx2ImplicitContextAddRef(struct lfx2ImplicitContext *context); ++ ++LFX2_API void lfx2ImplicitContextRelease(struct lfx2ImplicitContext *context); ++ ++LFX2_API void lfx2ImplicitContextReset(struct lfx2ImplicitContext *context); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameCreateImplicit(struct lfx2ImplicitContext *context, ++ lfx2Timestamp *out_timestamp); ++ ++LFX2_API ++struct lfx2Frame *lfx2FrameDequeueImplicit(struct lfx2ImplicitContext *context, ++ bool critical); ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif // __cplusplus ++ ++#endif /* LATENCYFLEX2_H */ +diff --git a/include/vkd3d_device_vkd3d_ext.idl b/include/vkd3d_device_vkd3d_ext.idl +index 3e615d76..92c5d447 100644 +--- a/include/vkd3d_device_vkd3d_ext.idl ++++ b/include/vkd3d_device_vkd3d_ext.idl +@@ -54,3 +54,16 @@ interface ID3D12DXVKInteropDevice : IUnknown + HRESULT LockCommandQueue(ID3D12CommandQueue *queue); + HRESULT UnlockCommandQueue(ID3D12CommandQueue *queue); + } ++ ++[ ++ uuid(851a9f0f-5da0-4850-b563-a7bbc414f4e6), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3DLfx2ExtDevice : IUnknown ++{ ++ void MarkRenderStart(void *frame); ++ void MarkRenderEnd(void *frame); ++ void ImplicitBeginFrame(UINT64 *out_timestamp, void *out_frame); ++} +\ No newline at end of file +diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c +index a3f7a3ad..67e67b28 100644 +--- a/libs/vkd3d/command.c ++++ b/libs/vkd3d/command.c +@@ -17140,6 +17140,75 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + d3d12_command_queue_add_submission(command_queue, &sub); + } + ++static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ++ ID3D12Fence *fence_iface, UINT64 value); ++ ++static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandListsLFX2(ID3D12CommandQueue *iface, ++ UINT command_list_count, ID3D12CommandList *const *command_lists) ++{ ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ struct vkd3d_lfx2_context *lfx2_context = &command_queue->device->lfx2_context; ++ UINT new_command_list_count = command_list_count, i = 0; ++ ID3D12CommandList **new_command_lists; ++ struct lfx2Dx12SubmitAux lfx2_aux; ++ ++ if (!lfx2 || command_queue->desc.Type != D3D12_COMMAND_LIST_TYPE_DIRECT) ++ { ++ d3d12_command_queue_ExecuteCommandLists(iface, command_list_count, command_lists); ++ return; ++ } ++ ++ pthread_mutex_lock(&lfx2_context->current_implicit_frame_lock); ++ if (!lfx2_context->current_implicit_frame) ++ { ++ lfx2_context->current_implicit_frame = lfx2->FrameDequeueImplicit(lfx2_context->implicit_context, false); ++ if (lfx2_context->current_implicit_frame) { ++ lfx2->Dx12ContextBeginFrame(lfx2_context->dx12_context, lfx2_context->current_implicit_frame); ++ } ++ } ++ pthread_mutex_unlock(&lfx2_context->current_implicit_frame_lock); ++ ++ lfx2_aux = lfx2->Dx12ContextBeforeSubmit(lfx2_context->dx12_context, iface); ++ ++ if (lfx2_aux.executeBefore) ++ new_command_list_count++; ++ ++ if (lfx2_aux.executeAfter) ++ new_command_list_count++; ++ ++ if (!(new_command_lists = vkd3d_calloc(new_command_list_count, sizeof(*new_command_lists)))) ++ { ++ ERR("Failed to allocate command list array."); ++ return; ++ } ++ ++ if (lfx2_aux.executeBefore) ++ new_command_lists[i++] = (ID3D12CommandList *)lfx2_aux.executeBefore; ++ ++ memcpy(&new_command_lists[i], command_lists, command_list_count * sizeof(*command_lists)); ++ i += command_list_count; ++ ++ if (lfx2_aux.executeAfter) ++ new_command_lists[i++] = (ID3D12CommandList *)lfx2_aux.executeAfter; ++ ++ d3d12_command_queue_ExecuteCommandLists(iface, new_command_list_count, new_command_lists); ++ ++ if (lfx2_aux.executeBefore) ++ d3d12_command_list_Release((d3d12_command_list_iface *)lfx2_aux.executeBefore); ++ ++ if (lfx2_aux.executeAfter) ++ d3d12_command_list_Release((d3d12_command_list_iface *)lfx2_aux.executeAfter); ++ ++ vkd3d_free(new_command_lists); ++ ++ if (lfx2_aux.fence) ++ { ++ d3d12_command_queue_Signal(iface, lfx2_aux.fence, lfx2_aux.fenceValue); ++ d3d12_fence_Release((d3d12_fence_iface *)lfx2_aux.fence); ++ } ++} ++ + static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface, + UINT metadata, const void *data, UINT size) + { +@@ -17311,7 +17380,7 @@ static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = + /* ID3D12CommandQueue methods */ + d3d12_command_queue_UpdateTileMappings, + d3d12_command_queue_CopyTileMappings, +- d3d12_command_queue_ExecuteCommandLists, ++ d3d12_command_queue_ExecuteCommandListsLFX2, + d3d12_command_queue_SetMarker, + d3d12_command_queue_BeginEvent, + d3d12_command_queue_EndEvent, +diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c +index 0b1bd876..0689029f 100644 +--- a/libs/vkd3d/device.c ++++ b/libs/vkd3d/device.c +@@ -3283,6 +3283,9 @@ void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vk + extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface); + extern ULONG STDMETHODCALLTYPE d3d12_dxvk_interop_device_AddRef(ID3D12DXVKInteropDevice *iface); + ++/* ID3DLfx2ExtDevice */ ++extern ULONG STDMETHODCALLTYPE d3d12_device_lfx2_ext_AddRef(d3d12_device_lfx2_ext_iface *iface); ++ + HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + REFIID riid, void **object) + { +@@ -3328,6 +3331,14 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + return S_OK; + } + ++ if (IsEqualGUID(riid, &IID_ID3DLfx2ExtDevice)) ++ { ++ struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ d3d12_device_lfx2_ext_AddRef(&device->ID3D12DeviceLfx2_iface); ++ *object = &device->ID3D12DeviceLfx2_iface; ++ return S_OK; ++ } ++ + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; +@@ -3374,6 +3385,7 @@ static void d3d12_device_destroy(struct d3d12_device *device) + vkd3d_private_store_destroy(&device->private_store); + + vkd3d_cleanup_format_info(device); ++ vkd3d_lfx2_context_free(&device->lfx2_context); + vkd3d_memory_info_cleanup(&device->memory_info, device); + vkd3d_queue_timeline_trace_cleanup(&device->queue_timeline_trace); + vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); +@@ -7007,7 +7019,7 @@ static D3D12_RESOURCE_STATES vkd3d_barrier_layout_to_resource_state(D3D12_BARRIE + } + + static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource3(d3d12_device_iface *iface, +- const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, ++ const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC1 *desc, D3D12_BARRIER_LAYOUT initial_layout, + const D3D12_CLEAR_VALUE *optimized_clear_value, ID3D12ProtectedResourceSession *protected_session, + UINT32 num_castable_formats, const DXGI_FORMAT *castable_formats, REFIID iid, void **resource) +@@ -8369,6 +8381,7 @@ static void vkd3d_scratch_pool_init(struct d3d12_device *device) + device->scratch_pools[VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS].scratch_buffer_size = + VKD3D_SCRATCH_BUFFER_COUNT_INDIRECT_PREPROCESS; + } ++extern CONST_VTBL struct ID3DLfx2ExtDeviceVtbl d3d12_device_lfx2_ext_vtbl; + + static HRESULT d3d12_device_init(struct d3d12_device *device, + struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info) +@@ -8414,6 +8427,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + + device->ID3D12DeviceExt_iface.lpVtbl = &d3d12_device_vkd3d_ext_vtbl; + device->ID3D12DXVKInteropDevice_iface.lpVtbl = &d3d12_dxvk_interop_device_vtbl; ++ device->ID3D12DeviceLfx2_iface.lpVtbl = &d3d12_device_lfx2_ext_vtbl; + + if ((rc = rwlock_init(&device->vertex_input_lock))) + { +@@ -8480,6 +8494,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + } + #endif + ++ vkd3d_lfx2_context_init(&device->lfx2_context, (d3d12_device_iface *)device); ++ + if (vkd3d_descriptor_debug_active_qa_checks()) + { + if (FAILED(hr = vkd3d_descriptor_debug_alloc_global_info(&device->descriptor_qa_global_info, +@@ -8521,6 +8537,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + out_cleanup_descriptor_qa_global_info: + vkd3d_descriptor_debug_free_global_info(device->descriptor_qa_global_info, device); + out_cleanup_breadcrumb_tracer: ++ vkd3d_lfx2_context_free(&device->lfx2_context); + #ifdef VKD3D_ENABLE_BREADCRUMBS + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) + vkd3d_breadcrumb_tracer_cleanup(&device->breadcrumb_tracer, device); +diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c +index 5bb7eca8..e778f479 100644 +--- a/libs/vkd3d/device_vkd3d_ext.c ++++ b/libs/vkd3d/device_vkd3d_ext.c +@@ -232,6 +232,65 @@ CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl = + d3d12_device_vkd3d_ext_CaptureUAVInfo + }; + ++static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceLfx2(d3d12_device_lfx2_ext_iface *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12DeviceLfx2_iface); ++} ++ ++ULONG STDMETHODCALLTYPE d3d12_device_lfx2_ext_AddRef(d3d12_device_lfx2_ext_iface *iface) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ return d3d12_device_add_ref(device); ++} ++ ++static ULONG STDMETHODCALLTYPE d3d12_device_lfx2_ext_Release(d3d12_device_lfx2_ext_iface *iface) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ return d3d12_device_release(device); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_lfx2_ext_QueryInterface(d3d12_device_lfx2_ext_iface *iface, REFIID iid, void **out) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); ++ return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_lfx2_ext_ImplicitBeginFrame(d3d12_device_lfx2_ext_iface *iface, UINT64 *out_timestamp, ++ void *out_frame) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ ++ *(lfx2Frame**)out_frame = lfx2->FrameCreateImplicit(device->lfx2_context.implicit_context, out_timestamp); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_lfx2_ext_MarkRenderStart(d3d12_device_lfx2_ext_iface *iface, void *frame) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ lfx2->Dx12ContextBeginFrame(device->lfx2_context.dx12_context, (lfx2Frame*)frame); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_lfx2_ext_MarkRenderEnd(d3d12_device_lfx2_ext_iface *iface, void *frame) ++{ ++ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceLfx2(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ lfx2->Dx12ContextEndFrame(device->lfx2_context.dx12_context, (lfx2Frame*)frame); ++} ++ ++CONST_VTBL struct ID3DLfx2ExtDeviceVtbl d3d12_device_lfx2_ext_vtbl = ++{ ++ /* IUnknown methods */ ++ d3d12_device_lfx2_ext_QueryInterface, ++ d3d12_device_lfx2_ext_AddRef, ++ d3d12_device_lfx2_ext_Release, ++ ++ /* ID3D12DeviceLfx2 methods */ ++ d3d12_device_lfx2_ext_MarkRenderStart, ++ d3d12_device_lfx2_ext_MarkRenderEnd, ++ d3d12_device_lfx2_ext_ImplicitBeginFrame, ++}; + + static inline struct d3d12_device *d3d12_device_from_ID3D12DXVKInteropDevice(ID3D12DXVKInteropDevice *iface) + { +diff --git a/libs/vkd3d/lfx2.c b/libs/vkd3d/lfx2.c +new file mode 100644 +index 00000000..5fd50b91 +--- /dev/null ++++ b/libs/vkd3d/lfx2.c +@@ -0,0 +1,70 @@ ++#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API ++ ++#include "vkd3d_private.h" ++ ++static pthread_once_t library_once = PTHREAD_ONCE_INIT; ++static struct vkd3d_lfx2_vtable lfx2_vtable; ++static BOOL lfx2_available; ++ ++static void vkd3d_lfx2_load(void) ++{ ++ HMODULE module = LoadLibraryA("latencyflex2_rust.dll"); ++ if (!module) ++ { ++ lfx2_available = false; ++ return; ++ } ++ ++#define LOAD_FUNCTION(name) lfx2_vtable.name = (void *)GetProcAddress(module, "lfx2" #name) ++ ++ LOAD_FUNCTION(Dx12ContextCreate); ++ LOAD_FUNCTION(Dx12ContextRelease); ++ LOAD_FUNCTION(Dx12ContextBeforeSubmit); ++ LOAD_FUNCTION(Dx12ContextBeginFrame); ++ LOAD_FUNCTION(Dx12ContextEndFrame); ++ LOAD_FUNCTION(TimestampNow); ++ LOAD_FUNCTION(TimestampFromQpc); ++ LOAD_FUNCTION(ImplicitContextCreate); ++ LOAD_FUNCTION(ImplicitContextRelease); ++ LOAD_FUNCTION(ImplicitContextReset); ++ LOAD_FUNCTION(FrameCreateImplicit); ++ LOAD_FUNCTION(FrameDequeueImplicit); ++ LOAD_FUNCTION(FrameRelease); ++ ++#undef LOAD_FUNCTION ++ ++ lfx2_available = true; ++} ++ ++struct vkd3d_lfx2_vtable *vkd3d_lfx2_get_vtable(void) ++{ ++ pthread_once(&library_once, vkd3d_lfx2_load); ++ return lfx2_available ? &lfx2_vtable : NULL; ++} ++ ++void vkd3d_lfx2_context_init(struct vkd3d_lfx2_context *context, d3d12_device_iface *device) ++{ ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ if (!lfx2) ++ return; ++ ++ pthread_mutex_init(&context->current_implicit_frame_lock, NULL); ++ context->current_implicit_frame = NULL; ++ context->dx12_context = lfx2->Dx12ContextCreate((ID3D12Device *)device); ++ context->implicit_context = lfx2->ImplicitContextCreate(); ++} ++ ++void vkd3d_lfx2_context_free(struct vkd3d_lfx2_context *context) ++{ ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ if (!lfx2) ++ return; ++ ++ if (context->current_implicit_frame) ++ lfx2->FrameRelease(context->current_implicit_frame); ++ if (context->implicit_context) ++ lfx2->ImplicitContextRelease(context->implicit_context); ++ if (context->dx12_context) ++ lfx2->Dx12ContextRelease(context->dx12_context); ++ pthread_mutex_destroy(&context->current_implicit_frame_lock); ++} +\ No newline at end of file +diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build +index 04394fd9..ad82667f 100644 +--- a/libs/vkd3d/meson.build ++++ b/libs/vkd3d/meson.build +@@ -75,7 +75,8 @@ vkd3d_src = [ + 'raytracing_pipeline.c', + 'acceleration_structure.c', + 'swapchain.c', +- 'queue_timeline.c' ++ 'queue_timeline.c', ++ 'lfx2.c' + ] + + if enable_renderdoc +diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c +index d0bc526d..d5366c14 100644 +--- a/libs/vkd3d/swapchain.c ++++ b/libs/vkd3d/swapchain.c +@@ -837,6 +837,8 @@ static void dxgi_vk_swap_chain_present_callback(void *chain); + static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *iface, UINT SyncInterval, UINT PresentFlags, const DXGI_PRESENT_PARAMETERS *pPresentParameters) + { + struct dxgi_vk_swap_chain *chain = impl_from_IDXGIVkSwapChain(iface); ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); ++ struct vkd3d_lfx2_context *lfx2_context = &chain->queue->device->lfx2_context; + struct dxgi_vk_swap_chain_present_request *request; + struct vkd3d_queue_timeline_trace_cookie cookie; + TRACE("iface %p, SyncInterval %u, PresentFlags #%x, pPresentParameters %p.\n", +@@ -848,6 +850,22 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if + if (PresentFlags & DXGI_PRESENT_TEST) + return S_OK; + ++ pthread_mutex_lock(&lfx2_context->current_implicit_frame_lock); ++ if (!lfx2_context->current_implicit_frame) ++ { ++ lfx2_context->current_implicit_frame = lfx2->FrameDequeueImplicit(lfx2_context->implicit_context, true); ++ if (lfx2_context->current_implicit_frame) { ++ lfx2->Dx12ContextBeginFrame(lfx2_context->dx12_context, lfx2_context->current_implicit_frame); ++ } ++ } ++ if (lfx2_context->current_implicit_frame) ++ { ++ lfx2->Dx12ContextEndFrame(lfx2_context->dx12_context, lfx2_context->current_implicit_frame); ++ lfx2->FrameRelease(lfx2_context->current_implicit_frame); ++ lfx2_context->current_implicit_frame = NULL; ++ } ++ pthread_mutex_unlock(&lfx2_context->current_implicit_frame_lock); ++ + /* If we missed the event signal last frame, we have to wait for it now. + * Otherwise, we end up in a floating state where our waits and thread signals might not stay in sync anymore. */ + if (chain->outstanding_present_request) +@@ -1454,6 +1472,7 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk + VkPhysicalDevice vk_physical_device = chain->queue->device->vk_physical_device; + VkSwapchainPresentModesCreateInfoEXT present_modes_info; + VkDevice vk_device = chain->queue->device->vk_device; ++ struct vkd3d_lfx2_vtable *lfx2 = vkd3d_lfx2_get_vtable(); + VkCommandPoolCreateInfo command_pool_create_info; + VkSwapchainCreateInfoKHR swapchain_create_info; + VkPresentModeKHR present_mode_group[2]; +@@ -1473,6 +1492,9 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk + if (chain->present.is_surface_lost) + return; + ++ if (lfx2) ++ lfx2->ImplicitContextReset(chain->queue->device->lfx2_context.implicit_context); ++ + /* If we fail to query formats we are hosed, treat it as a SURFACE_LOST scenario. */ + if (!dxgi_vk_swap_chain_update_formats(chain)) + { +diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h +index 935f3dc8..1eea15da 100644 +--- a/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/vkd3d_private.h +@@ -47,6 +47,8 @@ + #include + #include + ++#include "latencyflex2.h" ++ + #define VK_CALL(f) (vk_procs->f) + + #define MAKE_MAGIC(a,b,c,d) (((uint32_t)a) | (((uint32_t)b) << 8) | (((uint32_t)c) << 16) | (((uint32_t)d) << 24)) +@@ -4456,6 +4458,13 @@ struct vkd3d_cached_command_allocator + uint32_t vk_family_index; + }; + ++struct vkd3d_lfx2_context { ++ pthread_mutex_t current_implicit_frame_lock; ++ lfx2ImplicitContext *implicit_context; ++ lfx2Dx12Context *dx12_context; ++ lfx2Frame *current_implicit_frame; ++}; ++ + /* ID3D12Device */ + typedef ID3D12Device12 d3d12_device_iface; + +@@ -4464,6 +4473,7 @@ struct vkd3d_descriptor_qa_heap_buffer_data; + + /* ID3D12DeviceExt */ + typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface; ++typedef ID3DLfx2ExtDevice d3d12_device_lfx2_ext_iface; + + /* ID3D12DXVKInteropDevice */ + typedef ID3D12DXVKInteropDevice d3d12_dxvk_interop_device_iface; +@@ -4591,6 +4601,7 @@ struct d3d12_device + d3d12_device_iface ID3D12Device_iface; + d3d12_device_vkd3d_ext_iface ID3D12DeviceExt_iface; + d3d12_dxvk_interop_device_iface ID3D12DXVKInteropDevice_iface; ++ d3d12_device_lfx2_ext_iface ID3D12DeviceLfx2_iface; + LONG refcount; + + VkDevice vk_device; +@@ -4654,6 +4665,7 @@ struct d3d12_device + struct hash_map vertex_input_pipelines; + rwlock_t fragment_output_lock; + struct hash_map fragment_output_pipelines; ++ struct vkd3d_lfx2_context lfx2_context; + #ifdef VKD3D_ENABLE_BREADCRUMBS + struct vkd3d_breadcrumb_tracer breadcrumb_tracer; + #endif +@@ -5508,4 +5520,24 @@ HANDLE vkd3d_open_kmt_handle(HANDLE kmt_handle); + #define VKD3D_DRIVER_VERSION_PATCH_NV(v) (((v) >> 6) & 0xff) + #define VKD3D_DRIVER_VERSION_MAKE_NV(major, minor, patch) (((uint32_t)(major) << 22) | ((uint32_t)(minor) << 14) | ((uint32_t)(patch) << 6)) + ++struct vkd3d_lfx2_vtable { ++ struct lfx2Dx12Context *(*Dx12ContextCreate)(ID3D12Device *device); ++ void (*Dx12ContextRelease)(struct lfx2Dx12Context *context); ++ lfx2Dx12SubmitAux (*Dx12ContextBeforeSubmit)(struct lfx2Dx12Context *context, ID3D12CommandQueue *queue); ++ void (*Dx12ContextBeginFrame)(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++ void (*Dx12ContextEndFrame)(struct lfx2Dx12Context *context, struct lfx2Frame *frame); ++ lfx2Timestamp (*TimestampNow)(void); ++ lfx2Timestamp (*TimestampFromQpc)(uint64_t qpc); ++ struct lfx2ImplicitContext *(*ImplicitContextCreate)(void); ++ void (*ImplicitContextRelease)(struct lfx2ImplicitContext *context); ++ void (*ImplicitContextReset)(struct lfx2ImplicitContext *context); ++ struct lfx2Frame *(*FrameCreateImplicit)(struct lfx2ImplicitContext *context, lfx2Timestamp *out_timestamp); ++ struct lfx2Frame *(*FrameDequeueImplicit)(struct lfx2ImplicitContext *context, bool critical); ++ void (*FrameRelease)(struct lfx2Frame *frame); ++}; ++ ++struct vkd3d_lfx2_vtable *vkd3d_lfx2_get_vtable(void); ++void vkd3d_lfx2_context_init(struct vkd3d_lfx2_context *context, d3d12_device_iface *device); ++void vkd3d_lfx2_context_free(struct vkd3d_lfx2_context *context); ++ + #endif /* __VKD3D_PRIVATE_H */ diff --git a/patches/makefile.patch b/patches/makefile.patch new file mode 100644 index 0000000000..db9736712e --- /dev/null +++ b/patches/makefile.patch @@ -0,0 +1,21 @@ +--- a/Makefile.in ++++ b/Makefile.in +@@ -650,6 +650,18 @@ + touch $@ + + ++## ++## LatencyFleX2 and Upscalers ++## ++$(OBJ)/.vkd3d-proton-post-build32: ++ mkdir -p "$(DST_DIR)"/lib/wine/lfx2 ++ cp $(SRCDIR)/lfx2-bin/latencyflex2_rust_32.dll "$(DST_DIR)"/lib/wine/lfx2/latencyflex2_rust.dll ++ ++$(OBJ)/.vkd3d-proton-post-build64: ++ mkdir -p "$(DST_DIR)"/lib64/wine/lfx2 ++ cp $(SRCDIR)/lfx2-bin/latencyflex2_rust.dll "$(DST_DIR)"/lib64/wine/lfx2 ++ mkdir -p "$(DST_DIR)"/upscalers ++ cp -r $(SRCDIR)/upscalers/* "$(DST_DIR)"/upscalers + + ## + ## mediaconv diff --git a/patches/proton.patch b/patches/proton.patch new file mode 100644 index 0000000000..7a1181a60f --- /dev/null +++ b/patches/proton.patch @@ -0,0 +1,130 @@ +--- a/proton ++++ b/proton +@@ -414,6 +414,7 @@ + self.bin_dir = self.path("files/bin/") + self.lib_dir = self.path("files/lib/") + self.lib64_dir = self.path("files/lib64/") ++ self.upscalers_dir = self.path("files/upscalers/") + self.fonts_dir = self.path("files/share/fonts/") + self.wine_fonts_dir = self.path("files/share/wine/fonts/") + self.wine_inf = self.path("files/share/wine/wine.inf") +@@ -800,6 +801,8 @@ + # collect configuration info + steamdir = os.environ["STEAM_COMPAT_CLIENT_INSTALL_PATH"] + ++ use_upscaler = "PROTON_UPSCALER" in os.environ and nonzero(os.environ["PROTON_UPSCALER"]) ++ + use_wined3d = "wined3d" in g_session.compat_config + use_dxvk_dxgi = not use_wined3d and \ + not ("WINEDLLOVERRIDES" in g_session.env and "dxgi=b" in g_session.env["WINEDLLOVERRIDES"]) +@@ -843,6 +846,7 @@ + g_proton.fonts_dir, + g_proton.lib_dir, + g_proton.lib64_dir, ++ g_proton.upscalers_dir, + steamdir, + getmtimestr(steamdir, 'legacycompat', 'steamclient.dll'), + getmtimestr(steamdir, 'legacycompat', 'steamclient64.dll'), +@@ -853,6 +857,7 @@ + str(use_dxvk_dxgi), + builtin_dll_copy, + str(use_nvapi), ++ str(use_upscaler), + )) + + # check whether any prefix config has changed +@@ -957,6 +962,11 @@ + prefix=self.prefix_dir, track_file=tracked_files, link_debug=True) + g_session.dlloverrides[f] = "n" + ++ try_copy(g_proton.lib64_dir + "wine/lfx2/latencyflex2_rust.dll", "drive_c/windows/system32", ++ prefix=self.prefix_dir, track_file=tracked_files, link_debug=True) ++ try_copy(g_proton.lib_dir + "wine/lfx2/latencyflex2_rust.dll", "drive_c/windows/syswow64", ++ prefix=self.prefix_dir, track_file=tracked_files, link_debug=True) ++ + # If the user requested the NVAPI be available, copy it into place. + # If they didn't, clean up any stray nvapi DLLs. + if use_nvapi: +@@ -982,11 +992,59 @@ + # Try to detect known DLLs that ship with the NVIDIA Linux Driver + # and add them into the prefix + nvidia_wine_dll_dir = find_nvidia_wine_dll_dir() +- if nvidia_wine_dll_dir: ++ if nvidia_wine_dll_dir and not use_upscaler: + for dll in ["_nvngx.dll", "nvngx.dll"]: + try_copy(nvidia_wine_dll_dir + "/" + dll, "drive_c/windows/system32", optional=True, + prefix=self.prefix_dir, track_file=tracked_files, link_debug=True) + ++ if use_upscaler: ++ # very hacky way to do that ++ # but that way there's no need to call wine early ++ # it's needed so that nvngx.dll is loaded ++ with open(os.path.join(self.prefix_dir, "system.reg"), 'rt+') as system_reg: ++ count = 0 ++ for line in system_reg: ++ count += "41FCC608-8496-4DEF-B43E-7D9BD675A6FF" in line ++ if count < 2: ++ system_reg.writelines([ ++ '\n[System\\\\ControlSet001\\\\Services\\\\nvlddmkm] 1699294680\n', ++ '#time=1da10dd9364206e\n', ++ '"{41FCC608-8496-4DEF-B43E-7D9BD675A6FF}"=hex:01\n', ++ '\n', ++ '[Software\\\\NVIDIA Corporation\\\\Global] 1699294680\n', ++ '#time=1da10dd93641b32\n', ++ '"{41FCC608-8496-4DEF-B43E-7D9BD675A6FF}"=hex:01\n' ++ ]) ++ ++ upscaler_to_use = os.environ["PROTON_UPSCALER"] ++ upscaler_dir = os.path.join(g_proton.upscalers_dir, upscaler_to_use) ++ if os.path.exists(upscaler_dir): ++ files = os.listdir(upscaler_dir) ++ if [file for file in files if file.endswith('.dll')]: ++ for f in files: ++ try_copy(os.path.join(upscaler_dir, f), "drive_c/windows/system32", ++ prefix=self.prefix_dir, track_file=tracked_files, link_debug=True) ++ sys.stderr.write("Upscaler: Using PROTON_UPSCALER=" + upscaler_to_use + os.linesep) ++ else: ++ sys.stderr.write("Upscaler: No DLL file found in the given folder" + os.linesep) ++ else: ++ sys.stderr.write("Upscaler: Folder " + upscaler_to_use + " doesn't exist" + os.linesep) ++ sys.stderr.flush() ++ else: ++ files_to_remove = [] ++ if os.path.exists(g_proton.upscalers_dir): ++ upscalers = os.listdir(g_proton.upscalers_dir) ++ for upscaler in upscalers: ++ upscaler = os.path.join(g_proton.upscalers_dir, upscaler) ++ for file in os.listdir(upscaler): ++ files_to_remove.append(file) ++ files_to_remove = list(set(files_to_remove)) ++ for file in files_to_remove: ++ path_to_remove = self.prefix_dir + "drive_c/windows/system32/" + file ++ if file_exists(path_to_remove, follow_symlinks=False): ++ os.unlink(path_to_remove) ++ ++ + setup_game_dir_drive() + setup_steam_dir_drive() + +@@ -1453,6 +1511,8 @@ + self.check_environment("PROTON_ENABLE_NVAPI", "enablenvapi") + self.check_environment("PROTON_FORCE_NVAPI", "forcenvapi") + self.check_environment("PROTON_ENABLE_AMD_AGS", "enableamdags") ++ self.check_environment("LFX2", "forcelfx2") ++ self.check_environment("LFX2", "enablenvapi") + + if "noesync" in self.compat_config: + self.env.pop("WINEESYNC", "") +@@ -1595,6 +1655,12 @@ + s = dll + "=" + setting + append_to_env_str(self.env, "WINEDLLOVERRIDES", s, ";") + ++ #enable lfx2 ++ if "forcelfx2" in self.compat_config: ++ self.env["DXVK_NVAPI_DRIVER_VERSION"] = "53713" ++ self.env["DXVK_NVAPI_ALLOW_OTHER_DRIVERS"] = "1" ++ self.env["DXVK_CONFIG"] = "dxgi.customVendorId = 10de;dxgi.hideAmdGpu = True;dxgi.hideNvidiaGpu = False;dxgi.customDeviceId = 2684;dxgi.customDeviceDesc = \"NVIDIA GeForce RTX 4090\"" ++ + def dump_dbg_env(self, f): + f.write("PATH=\"" + self.env["PATH"] + "\" \\\n") + f.write("\tTERM=\"xterm\" \\\n") #XXX diff --git a/patches/revert-patches.sh b/patches/revert-patches.sh new file mode 100755 index 0000000000..486bcbe2ef --- /dev/null +++ b/patches/revert-patches.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +git checkout -- proton +git checkout -- Makefile.in + +pushd dxvk || exit +git reset --hard HEAD +git clean -xdf +popd || exit + +pushd vkd3d-proton || exit +git reset --hard HEAD +git clean -xdf +popd || exit + +pushd wine || exit +git reset --hard HEAD +git clean -xdf +popd || exit + +pushd dxvk-nvapi || exit +git reset --hard HEAD +git clean -xdf +popd || exit diff --git a/patches/wine-hags-spoof.patch b/patches/wine-hags-spoof.patch new file mode 100644 index 0000000000..fe09c001e8 --- /dev/null +++ b/patches/wine-hags-spoof.patch @@ -0,0 +1,77 @@ +diff --git a/dlls/win32u/driver.c b/dlls/win32u/driver.c +index 7fcdbc4c383..ef73974bfb8 100644 +--- a/dlls/win32u/driver.c ++++ b/dlls/win32u/driver.c +@@ -1619,16 +1619,43 @@ NTSTATUS WINAPI NtGdiDdDDIDestroyDevice( const D3DKMT_DESTROYDEVICE *desc ) + return status; + } + ++static BOOL check_hags_enabled( void ) ++{ ++ const char *winehags = getenv( "WINEHAGS" ); ++ return winehags && *winehags && *winehags != '0'; ++} ++ + /****************************************************************************** + * NtGdiDdDDIQueryAdapterInfo (win32u.@) + */ + NTSTATUS WINAPI NtGdiDdDDIQueryAdapterInfo( D3DKMT_QUERYADAPTERINFO *desc ) + { ++ D3DKMT_WDDM_2_7_CAPS *d3dkmt_wddm_2_7_caps; ++ + if (!desc) + return STATUS_INVALID_PARAMETER; + +- FIXME("desc %p, type %d stub\n", desc, desc->Type); +- return STATUS_NOT_IMPLEMENTED; ++ TRACE("desc %p, type %d\n", desc, desc->Type); ++ ++ switch (desc->Type) ++ { ++ case KMTQAITYPE_WDDM_2_7_CAPS: ++ if (!desc->pPrivateDriverData || desc->PrivateDriverDataSize != sizeof(D3DKMT_WDDM_2_7_CAPS)) ++ return STATUS_INVALID_PARAMETER; ++ ++ d3dkmt_wddm_2_7_caps = desc->pPrivateDriverData; ++ d3dkmt_wddm_2_7_caps->HwSchSupported = 1; ++ d3dkmt_wddm_2_7_caps->HwSchEnabled = check_hags_enabled() ? 1 : 0; ++ d3dkmt_wddm_2_7_caps->HwSchEnabledByDefault = 0; ++ d3dkmt_wddm_2_7_caps->IndependentVidPnVSyncControl = 0; ++ break; ++ ++ default: ++ FIXME("type %d not supported\n", desc->Type); ++ return STATUS_NOT_IMPLEMENTED; ++ } ++ ++ return STATUS_SUCCESS; + } + + /****************************************************************************** +diff --git a/include/ddk/d3dkmthk.h b/include/ddk/d3dkmthk.h +index b7b99e2c146..a6ba2117cce 100644 +--- a/include/ddk/d3dkmthk.h ++++ b/include/ddk/d3dkmthk.h +@@ -773,6 +773,22 @@ typedef struct _D3DKMT_ENUMADAPTERS2 + D3DKMT_ADAPTERINFO *pAdapters; + } D3DKMT_ENUMADAPTERS2; + ++typedef struct _D3DKMT_WDDM_2_7_CAPS ++{ ++ union ++ { ++ struct ++ { ++ UINT HwSchSupported : 1; ++ UINT HwSchEnabled : 1; ++ UINT HwSchEnabledByDefault : 1; ++ UINT IndependentVidPnVSyncControl : 1; ++ UINT Reserved : 28; ++ }; ++ UINT Value; ++ }; ++} D3DKMT_WDDM_2_7_CAPS; ++ + #ifdef __cplusplus + extern "C" + {