Compare commits
570 Commits
valid-fl-1
...
master
Author | SHA1 | Date |
---|---|---|
Joshua Ashton | d00d035321 | |
Joshua Ashton | 253dc9027a | |
Derek Lesho | 146f5b8a74 | |
Hans-Kristian Arntzen | db4a8544a1 | |
Hans-Kristian Arntzen | 1d25b29413 | |
Hans-Kristian Arntzen | 34a04a1a7f | |
Hans-Kristian Arntzen | b839fe14bb | |
Hans-Kristian Arntzen | d3a76eee90 | |
Hans-Kristian Arntzen | 481680ecd8 | |
Hans-Kristian Arntzen | 11c82c84d1 | |
Hans-Kristian Arntzen | c0b9682c69 | |
Hans-Kristian Arntzen | 9d8abd2db5 | |
Derek Lesho | df1829e407 | |
Hans-Kristian Arntzen | be2aafff1a | |
Derek Lesho | 849537614a | |
Derek Lesho | f487db4756 | |
Hans-Kristian Arntzen | 6265a7b5ce | |
Hans-Kristian Arntzen | 4f4c96bb11 | |
Derek Lesho | a2439e766f | |
Hans-Kristian Arntzen | 21799b202b | |
Hans-Kristian Arntzen | 4ff504b52d | |
Hans-Kristian Arntzen | 6335e411bb | |
Hans-Kristian Arntzen | 11c943dd7e | |
Hans-Kristian Arntzen | 5b73139f18 | |
Hans-Kristian Arntzen | 73700f4c3a | |
Hans-Kristian Arntzen | a917d60ca5 | |
Hans-Kristian Arntzen | 8d780458f1 | |
Hans-Kristian Arntzen | 8da6ca6772 | |
Hans-Kristian Arntzen | 766da69afb | |
Hans-Kristian Arntzen | b7a960f94f | |
Hans-Kristian Arntzen | ee39209798 | |
Hans-Kristian Arntzen | afb87e013f | |
Hans-Kristian Arntzen | 433262c254 | |
Hans-Kristian Arntzen | 277bbe35e8 | |
Hans-Kristian Arntzen | 9451fdcab9 | |
Hans-Kristian Arntzen | 0640f44560 | |
Hans-Kristian Arntzen | b287864cd1 | |
Hans-Kristian Arntzen | 0a7b13fe7f | |
Hans-Kristian Arntzen | f704cb9776 | |
Hans-Kristian Arntzen | e17a7cb40c | |
Hans-Kristian Arntzen | 9e45c72256 | |
Hans-Kristian Arntzen | 2a8c762025 | |
Hans-Kristian Arntzen | 3b8a13e63d | |
Hans-Kristian Arntzen | 65804bbde5 | |
Hans-Kristian Arntzen | 233ff38175 | |
Hans-Kristian Arntzen | 4a07d9c038 | |
Hans-Kristian Arntzen | bcdac3180a | |
Hans-Kristian Arntzen | df11b5ba5a | |
Hans-Kristian Arntzen | e138a5117a | |
Hans-Kristian Arntzen | 96fdb71ae4 | |
Hans-Kristian Arntzen | fe707989fe | |
Hans-Kristian Arntzen | 6d3c5d53b0 | |
Hans-Kristian Arntzen | f93a581dae | |
Hans-Kristian Arntzen | b7bbdcabd4 | |
Hans-Kristian Arntzen | a28e4b6e11 | |
Hans-Kristian Arntzen | eda0b2fab2 | |
Hans-Kristian Arntzen | 7f5dbcfc40 | |
Hans-Kristian Arntzen | d333159c86 | |
Hans-Kristian Arntzen | 74eb676cfb | |
Hans-Kristian Arntzen | 5033904e10 | |
Hans-Kristian Arntzen | b34931eb17 | |
Hans-Kristian Arntzen | 7410f53912 | |
Hans-Kristian Arntzen | 089d2c6cb7 | |
Hans-Kristian Arntzen | 03fdbac59e | |
Hans-Kristian Arntzen | 7832eeb60d | |
Hans-Kristian Arntzen | 8a94c3ce0e | |
Hans-Kristian Arntzen | ddb425c5cb | |
Hans-Kristian Arntzen | ad7459551d | |
Hans-Kristian Arntzen | e3c36a47dd | |
Hans-Kristian Arntzen | ee8b8374b4 | |
Hans-Kristian Arntzen | ce00c9322d | |
Hans-Kristian Arntzen | b88b04e4f1 | |
Hans-Kristian Arntzen | 4a121b9aaa | |
Hans-Kristian Arntzen | 0ef6a8b798 | |
Hans-Kristian Arntzen | 49b6e67e7d | |
Hans-Kristian Arntzen | 2ef3fd469c | |
Hans-Kristian Arntzen | 22778b99be | |
Hans-Kristian Arntzen | b8b2a93aa6 | |
Hans-Kristian Arntzen | 14470d5456 | |
Hans-Kristian Arntzen | 3aad4edf6e | |
Hans-Kristian Arntzen | 3c92b3a1bc | |
Hans-Kristian Arntzen | 8473355a98 | |
Hans-Kristian Arntzen | 1438ff5637 | |
Hans-Kristian Arntzen | c3ee963d2f | |
Hans-Kristian Arntzen | 684e41fabe | |
Philip Rebohle | 1d869e3e21 | |
Tatsuyuki Ishi | 02c7ec404c | |
Hans-Kristian Arntzen | 9b5f3bfc26 | |
Hans-Kristian Arntzen | b4ab6c3f08 | |
Hans-Kristian Arntzen | 707af8152e | |
Hans-Kristian Arntzen | bc759be2af | |
Hans-Kristian Arntzen | 18f1d1c72e | |
Hans-Kristian Arntzen | 1b704287e5 | |
Hans-Kristian Arntzen | f975f09bb1 | |
Hans-Kristian Arntzen | 619a54810d | |
Hans-Kristian Arntzen | cecb8d6ebc | |
Hans-Kristian Arntzen | 8ae391e675 | |
Hans-Kristian Arntzen | a30205589f | |
Hans-Kristian Arntzen | abdef77695 | |
Hans-Kristian Arntzen | c132073df8 | |
Hans-Kristian Arntzen | 128852200a | |
Hans-Kristian Arntzen | 717026f903 | |
Hans-Kristian Arntzen | b849bd4256 | |
Georg Lehmann | d8905afd5d | |
Hans-Kristian Arntzen | de5b751468 | |
Hans-Kristian Arntzen | 219d9698b3 | |
Hans-Kristian Arntzen | acef5429c5 | |
Hans-Kristian Arntzen | 135aff4685 | |
Hans-Kristian Arntzen | 2f6a9e0d55 | |
Hans-Kristian Arntzen | 3a19dea7c7 | |
Tatsuyuki Ishi | 39d07dea2c | |
Tatsuyuki Ishi | 3577ca3144 | |
Tatsuyuki Ishi | 829ac72e3d | |
Hans-Kristian Arntzen | c64916686d | |
Hans-Kristian Arntzen | c4b00bbe1e | |
Hans-Kristian Arntzen | fd05839eb9 | |
Hans-Kristian Arntzen | 46470017a3 | |
Georg Lehmann | cbca29dd90 | |
Hans-Kristian Arntzen | c3fb6a6c5e | |
Hans-Kristian Arntzen | e8f1936ee2 | |
Hans-Kristian Arntzen | 4166eb042b | |
Hans-Kristian Arntzen | 7a002698f3 | |
Hans-Kristian Arntzen | 896e6fb868 | |
Hans-Kristian Arntzen | 8989360087 | |
Hans-Kristian Arntzen | f804ddc4c7 | |
Hans-Kristian Arntzen | 3b0d7e043d | |
Hans-Kristian Arntzen | 75e0506404 | |
Hans-Kristian Arntzen | 0f9d7dd10d | |
Hans-Kristian Arntzen | 7acc33ae39 | |
Hans-Kristian Arntzen | 7916d2a6d8 | |
Hans-Kristian Arntzen | 48157c29e8 | |
Hans-Kristian Arntzen | 467db76f90 | |
Hans-Kristian Arntzen | 2953ef8688 | |
Hans-Kristian Arntzen | f964532619 | |
Hans-Kristian Arntzen | 5a0c8289d8 | |
Hans-Kristian Arntzen | cca7613bca | |
Philip Rebohle | 910f15dff8 | |
Hans-Kristian Arntzen | a94e9b8b6a | |
Hans-Kristian Arntzen | 4ac0a3b455 | |
Hans-Kristian Arntzen | 300058d9a7 | |
Hans-Kristian Arntzen | 2e16a777ca | |
Hans-Kristian Arntzen | ac211d5f6a | |
Hans-Kristian Arntzen | 1dc4bbe5f2 | |
Tatsuyuki Ishi | 2965b7e379 | |
Tatsuyuki Ishi | 0d9c0a3903 | |
Robin Kertels | 1a773cfb71 | |
Robin Kertels | cdabda7805 | |
Robin Kertels | 8ac7aaca99 | |
Robin Kertels | 7e7c472005 | |
Hans-Kristian Arntzen | 71940797d1 | |
Hans-Kristian Arntzen | 4603c25d69 | |
Hans-Kristian Arntzen | 97201b8e93 | |
Hans-Kristian Arntzen | 51199752dd | |
Hans-Kristian Arntzen | ebe589d622 | |
Hans-Kristian Arntzen | 55a6847c61 | |
Hans-Kristian Arntzen | 04c020525c | |
Dean Beeler | 063ce7e4bd | |
Hans-Kristian Arntzen | 2c54e18245 | |
Philip Rebohle | bb2e35c539 | |
Philip Rebohle | d5ad5bb1de | |
Philip Rebohle | beb58f8472 | |
Hans-Kristian Arntzen | 358f95aff2 | |
Philip Rebohle | 119e00ed45 | |
Philip Rebohle | beaedbd857 | |
Philip Rebohle | 81927c5895 | |
Philip Rebohle | e7a6af4971 | |
Philip Rebohle | a1d5e6f39a | |
Hans-Kristian Arntzen | 4a05360a0a | |
Hans-Kristian Arntzen | 0c4df9b32c | |
Hans-Kristian Arntzen | 25c4bc18e7 | |
Hans-Kristian Arntzen | 30ec6b7f1f | |
Hans-Kristian Arntzen | c47a6a904b | |
Hans-Kristian Arntzen | 5044975152 | |
Hans-Kristian Arntzen | 8dc8b72807 | |
Hans-Kristian Arntzen | ae0dafa3a1 | |
Hans-Kristian Arntzen | 6c8542f7d6 | |
Hans-Kristian Arntzen | 2dcb1e2efc | |
Hans-Kristian Arntzen | 3095ed84d3 | |
Hans-Kristian Arntzen | db9b9a13de | |
Hans-Kristian Arntzen | 637834dc75 | |
Hans-Kristian Arntzen | 93928424a9 | |
Hans-Kristian Arntzen | c8b143c0bd | |
Hans-Kristian Arntzen | ca0a186a4b | |
Philip Rebohle | c9101b8ec3 | |
Philip Rebohle | 829c02bf90 | |
Philip Rebohle | e4184830c5 | |
Philip Rebohle | d1425ee4d1 | |
Denis Barkar | 8dda6df729 | |
Joshua Ashton | 2ed513b99a | |
Hans-Kristian Arntzen | 19e088cdfc | |
Hans-Kristian Arntzen | 241078d7e8 | |
Hans-Kristian Arntzen | e01589a33b | |
Hans-Kristian Arntzen | 2e704c5a5e | |
Hans-Kristian Arntzen | 6f43f450c8 | |
Hans-Kristian Arntzen | cfeaa18b09 | |
Hans-Kristian Arntzen | da63f0beac | |
Hans-Kristian Arntzen | 35e777f8a0 | |
Hans-Kristian Arntzen | 095a36cbaf | |
Philip Rebohle | 6378f1b880 | |
Philip Rebohle | 307190e96b | |
Hans-Kristian Arntzen | 2e8fb27182 | |
Hans-Kristian Arntzen | 1b5f7e8fc3 | |
Hans-Kristian Arntzen | cf65a78570 | |
Philip Rebohle | 1d3957fe6d | |
Philip Rebohle | c9abcfa656 | |
Hans-Kristian Arntzen | 03427c6ee6 | |
Hans-Kristian Arntzen | 09682f8417 | |
Hans-Kristian Arntzen | 6273780e50 | |
Hans-Kristian Arntzen | 6e915dd2c0 | |
Philip Rebohle | 34f5fc6a31 | |
Hans-Kristian Arntzen | 63530501a5 | |
Hans-Kristian Arntzen | dd6534f3f8 | |
Hans-Kristian Arntzen | 09997b4dd8 | |
Hans-Kristian Arntzen | 6d35f98e59 | |
Hans-Kristian Arntzen | e61cc0234a | |
Hans-Kristian Arntzen | c54895b4b7 | |
Hans-Kristian Arntzen | a6700d3d85 | |
Hans-Kristian Arntzen | f0cac9d97c | |
Hans-Kristian Arntzen | 08c0ea209f | |
Hans-Kristian Arntzen | 64d42c08ee | |
Hans-Kristian Arntzen | 3d8ef2b349 | |
Hans-Kristian Arntzen | 33b9166fec | |
Hans-Kristian Arntzen | 972ce74ac6 | |
Robin Kertels | 5f97d1eb70 | |
Robin Kertels | a6ea442819 | |
Hans-Kristian Arntzen | 365dd05557 | |
Hans-Kristian Arntzen | 5017b3723c | |
Hans-Kristian Arntzen | 6a4f2842cb | |
Hans-Kristian Arntzen | 18a5315db4 | |
Hans-Kristian Arntzen | 7c228139c3 | |
Hans-Kristian Arntzen | 30b4abcea1 | |
Hans-Kristian Arntzen | 17b1ffb41a | |
Hans-Kristian Arntzen | f9da3bf564 | |
Hans-Kristian Arntzen | 5c70a24de1 | |
Hans-Kristian Arntzen | c6149b47cd | |
Hans-Kristian Arntzen | cc08339624 | |
Hans-Kristian Arntzen | 422f6804fb | |
Georg Lehmann | 7d4ed66881 | |
Georg Lehmann | 14a06680d9 | |
Hans-Kristian Arntzen | c9bac85dd1 | |
Hans-Kristian Arntzen | 409dc57645 | |
Hans-Kristian Arntzen | b330900659 | |
Hans-Kristian Arntzen | 92a8c0ad78 | |
Hans-Kristian Arntzen | c864f1322f | |
Philip Rebohle | 9a408367dc | |
Philip Rebohle | 51e6b2bbbe | |
Philip Rebohle | 94f82d1085 | |
Philip Rebohle | 1a68267962 | |
Philip Rebohle | c4f88951fc | |
Philip Rebohle | 9673ac173d | |
Philip Rebohle | 3783eaf4f7 | |
Philip Rebohle | 024ef02f9b | |
Philip Rebohle | 549d4ee63f | |
Philip Rebohle | 6186cc1f0e | |
Philip Rebohle | 2c92ab7d1e | |
Philip Rebohle | ba04b02bf6 | |
Hans-Kristian Arntzen | 9fbae668fe | |
Hans-Kristian Arntzen | ce45297695 | |
LemiSt24 | c411d0d0c2 | |
Hans-Kristian Arntzen | 3e5aab6fb3 | |
Hans-Kristian Arntzen | bc40528b6f | |
Hans-Kristian Arntzen | 7cd3b9c917 | |
Hans-Kristian Arntzen | 9a63df07b8 | |
Hans-Kristian Arntzen | 277f485321 | |
Mike Blumenkrantz | 1d76803aff | |
Hans-Kristian Arntzen | dc622fc715 | |
Hans-Kristian Arntzen | 9817c52d24 | |
Hans-Kristian Arntzen | a8229390f9 | |
Hans-Kristian Arntzen | 12c73ee18a | |
Hans-Kristian Arntzen | f39ece9a7c | |
Hans-Kristian Arntzen | c19eaac376 | |
Hans-Kristian Arntzen | 54fbadcc94 | |
Hans-Kristian Arntzen | 4b07535909 | |
Hans-Kristian Arntzen | 84d632f194 | |
Hans-Kristian Arntzen | b309913b6d | |
Hans-Kristian Arntzen | dc752991ef | |
Hans-Kristian Arntzen | c29d005ef4 | |
Hans-Kristian Arntzen | 8a46c21254 | |
Hans-Kristian Arntzen | 76ca492a39 | |
Hans-Kristian Arntzen | 83c4e62660 | |
Hans-Kristian Arntzen | 4bea653504 | |
Hans-Kristian Arntzen | edbf49aad4 | |
Hans-Kristian Arntzen | e0af8f2810 | |
Hans-Kristian Arntzen | b066e72243 | |
Hans-Kristian Arntzen | 15704b2419 | |
Hans-Kristian Arntzen | c725c29bb6 | |
Hans-Kristian Arntzen | 2f6a91e772 | |
Hans-Kristian Arntzen | 719a38a5fe | |
Joshua Ashton | 2278da339a | |
Hans-Kristian Arntzen | 1cc8afcc8e | |
Hans-Kristian Arntzen | 1112106db0 | |
Hans-Kristian Arntzen | 624bf53f8b | |
Hans-Kristian Arntzen | b363d8d2e4 | |
Hans-Kristian Arntzen | 5d345f47cc | |
Georg Lehmann | a078197e16 | |
Krzysztof Bogacki | 9029d1ae23 | |
Krzysztof Bogacki | ae7081eb62 | |
Hans-Kristian Arntzen | 33f17cc74d | |
Hans-Kristian Arntzen | 3b8265dccc | |
Hans-Kristian Arntzen | a2eddc181b | |
Hans-Kristian Arntzen | 47337d5e0b | |
Hans-Kristian Arntzen | f03940ef4b | |
Hans-Kristian Arntzen | e5e662ce22 | |
Hans-Kristian Arntzen | bc3b25fb0e | |
Hans-Kristian Arntzen | 05a5d366d5 | |
Hans-Kristian Arntzen | 1d39c25a59 | |
Hans-Kristian Arntzen | 5e526d506b | |
Hans-Kristian Arntzen | 91ca2ed8ba | |
Hans-Kristian Arntzen | 2ca7ce62da | |
Hans-Kristian Arntzen | 907acce30c | |
Hans-Kristian Arntzen | 8b92d8e0bc | |
Hans-Kristian Arntzen | 81a215d0bf | |
Hans-Kristian Arntzen | 29d956c6c4 | |
Hans-Kristian Arntzen | 49d0eb37e3 | |
Hans-Kristian Arntzen | 1da9ad900c | |
Philip Rebohle | 8f81aaa710 | |
Philip Rebohle | 91976b2edd | |
Philip Rebohle | 6aa73b3d53 | |
Krzysztof Bogacki | ab47aaf36d | |
Hans-Kristian Arntzen | 833f56154c | |
Hans-Kristian Arntzen | 86f8f41490 | |
Hans-Kristian Arntzen | a3f1a0e3cd | |
Hans-Kristian Arntzen | e90b573896 | |
Hans-Kristian Arntzen | 8196b85408 | |
Hans-Kristian Arntzen | a2c1527acd | |
Hans-Kristian Arntzen | 3839144848 | |
Hans-Kristian Arntzen | 6e697a54b6 | |
Hans-Kristian Arntzen | 41c977d616 | |
Hans-Kristian Arntzen | 7da708ea69 | |
Georg Lehmann | 2c76840ff8 | |
Georg Lehmann | 182ebd7e00 | |
Georg Lehmann | c69b73ffcf | |
Hans-Kristian Arntzen | 1409ebab1f | |
Hans-Kristian Arntzen | 7d0743345a | |
Hans-Kristian Arntzen | 2b0a161a0d | |
Philip Rebohle | 1af62abfe7 | |
Hans-Kristian Arntzen | 338157eb04 | |
Hans-Kristian Arntzen | 5c492e9e6c | |
Hans-Kristian Arntzen | 68ce4b4116 | |
Hans-Kristian Arntzen | 0f46a8a7d5 | |
Hans-Kristian Arntzen | 6cba8b9945 | |
Hans-Kristian Arntzen | e5efa8594e | |
Hans-Kristian Arntzen | 39c1f9d07a | |
Robin Kertels | 35be1329ed | |
Samuel Pitoiset | f6a4e0fb71 | |
Samuel Pitoiset | f6fe3e0183 | |
Samuel Pitoiset | 870dda927d | |
Samuel Pitoiset | b42a7193fc | |
Hans-Kristian Arntzen | db943f2341 | |
Hans-Kristian Arntzen | 9162e82fb3 | |
Hans-Kristian Arntzen | d13424bf22 | |
Philip Rebohle | 5923c53111 | |
Joshua Ashton | bd2be76132 | |
Joshua Ashton | d94fdd1ca9 | |
Philip Rebohle | 1354ecabb4 | |
Hans-Kristian Arntzen | c0a3fa8adc | |
Hans-Kristian Arntzen | 459cae5673 | |
Hans-Kristian Arntzen | 7502b4c4c8 | |
Hans-Kristian Arntzen | 18b31a73ec | |
Hans-Kristian Arntzen | fffd6e935c | |
Hans-Kristian Arntzen | 72f26c5699 | |
Arkadiusz Hiler | 93d105adae | |
Hans-Kristian Arntzen | 9c3549360d | |
Hans-Kristian Arntzen | d2fd3de7c1 | |
Hans-Kristian Arntzen | d9636d5c67 | |
Hans-Kristian Arntzen | 2c80431003 | |
Hans-Kristian Arntzen | 9a59ded1c4 | |
Philip Rebohle | f5a6d49e87 | |
Philip Rebohle | a99914b6ea | |
Philip Rebohle | 4000397570 | |
Philip Rebohle | 0de25ac3cd | |
Philip Rebohle | ab111dcdbe | |
Philip Rebohle | 99d949f5fb | |
Philip Rebohle | 9624102dcb | |
Philip Rebohle | 42b8fc3338 | |
Hans-Kristian Arntzen | 6a7eee33b5 | |
Hans-Kristian Arntzen | 8305ddec92 | |
Hans-Kristian Arntzen | 3c9b8cb040 | |
Georg Lehmann | 4240ab7559 | |
Hans-Kristian Arntzen | 7391e38602 | |
Philip Rebohle | 9185edb42a | |
Philip Rebohle | b03c1fcb5f | |
Philip Rebohle | 3b6a4ab988 | |
Philip Rebohle | d61f562a3e | |
Philip Rebohle | 930e7cb251 | |
Hans-Kristian Arntzen | 6ad67bdecd | |
Joshua Ashton | 046524f2a1 | |
Joshua Ashton | 7241164e2d | |
Hans-Kristian Arntzen | 99e067d681 | |
Georg Lehmann | 344f8d1ed4 | |
Robin Kertels | 19a1dce393 | |
Hans-Kristian Arntzen | 3fefc540c8 | |
Hans-Kristian Arntzen | 16d8bae263 | |
Hans-Kristian Arntzen | 0251b4045c | |
Hans-Kristian Arntzen | 54da1dc9b2 | |
Hans-Kristian Arntzen | a0eb938c7f | |
Hans-Kristian Arntzen | 2da535fbbf | |
Hans-Kristian Arntzen | 3937e1a298 | |
Hans-Kristian Arntzen | 45ae742526 | |
Hans-Kristian Arntzen | b53a4a98a6 | |
Hans-Kristian Arntzen | 3210832ad9 | |
Hans-Kristian Arntzen | 58aab78a5b | |
Hans-Kristian Arntzen | e605d19ef7 | |
Hans-Kristian Arntzen | 7986e241f3 | |
Hans-Kristian Arntzen | 164273521f | |
Hans-Kristian Arntzen | db89d403d6 | |
Hans-Kristian Arntzen | 35d2f1e87f | |
Hans-Kristian Arntzen | 2b11c70129 | |
Hans-Kristian Arntzen | 6966cd2f33 | |
Hans-Kristian Arntzen | 7cc435c0bc | |
Hans-Kristian Arntzen | de64ebd1d1 | |
Hans-Kristian Arntzen | 23ad0247e3 | |
Hans-Kristian Arntzen | a392e82d1c | |
Hans-Kristian Arntzen | 570ecd5f79 | |
Hans-Kristian Arntzen | 1d99a80f22 | |
Hans-Kristian Arntzen | b47282e78a | |
Hans-Kristian Arntzen | cd2218e9c3 | |
Hans-Kristian Arntzen | 6255eaec32 | |
Hans-Kristian Arntzen | daa96ba879 | |
David Gow | 2a8b5471ca | |
Robin Kertels | 430c77d3b3 | |
Hans-Kristian Arntzen | c20852435d | |
Hans-Kristian Arntzen | cd04aa63e6 | |
Hans-Kristian Arntzen | 85c75a042f | |
Hans-Kristian Arntzen | 30436436cd | |
Georg Lehmann | eb48213bfa | |
Georg Lehmann | fd690e3831 | |
Georg Lehmann | 07d53a82cc | |
Georg Lehmann | 4c37b4c341 | |
Georg Lehmann | c8d633cb51 | |
Hans-Kristian Arntzen | 8ff91b23d6 | |
Hans-Kristian Arntzen | aadccb66cf | |
Hans-Kristian Arntzen | 8977eaef88 | |
Hans-Kristian Arntzen | cbef48f90a | |
Hans-Kristian Arntzen | 6548e4fd00 | |
Hans-Kristian Arntzen | ae185271ff | |
Hans-Kristian Arntzen | 1a57aa841a | |
Hans-Kristian Arntzen | e74213c576 | |
Danylo Piliaiev | f6c61a3eae | |
Hans-Kristian Arntzen | 5657f79974 | |
Hans-Kristian Arntzen | a0a29bae43 | |
ifedorov | 0abe8a21dd | |
Hans-Kristian Arntzen | 9a1b7ab002 | |
Hans-Kristian Arntzen | 55e16539db | |
Philip Rebohle | 9477d4af3d | |
Hans-Kristian Arntzen | d4dfccece9 | |
Philip Rebohle | 890ba87a7c | |
Hans-Kristian Arntzen | 740e23ea8a | |
Hans-Kristian Arntzen | be8d6ec7ad | |
Hans-Kristian Arntzen | 26bd08bbde | |
Hans-Kristian Arntzen | 32c5abf496 | |
Hans-Kristian Arntzen | 2152500014 | |
Hans-Kristian Arntzen | 4a774f872c | |
Hans-Kristian Arntzen | 3b415dbc89 | |
Hans-Kristian Arntzen | dda02faf89 | |
Hans-Kristian Arntzen | c3a92a0dad | |
Hans-Kristian Arntzen | 8beb7dde89 | |
Philip Rebohle | dd23492348 | |
Hans-Kristian Arntzen | 0c60791bb1 | |
Hans-Kristian Arntzen | f98702603d | |
Hans-Kristian Arntzen | 1417eb6244 | |
Hans-Kristian Arntzen | ae204143d5 | |
Hans-Kristian Arntzen | 3b0a430975 | |
Hans-Kristian Arntzen | e522053954 | |
Hans-Kristian Arntzen | 1c0b760b7d | |
Hans-Kristian Arntzen | 6866b45637 | |
Hans-Kristian Arntzen | e6836c6255 | |
Hans-Kristian Arntzen | 105882466b | |
Hans-Kristian Arntzen | a3202444c8 | |
Hans-Kristian Arntzen | a36b987bf1 | |
Hans-Kristian Arntzen | 3182882e21 | |
Hans-Kristian Arntzen | 99365bcaec | |
Hans-Kristian Arntzen | 158deeff22 | |
Hans-Kristian Arntzen | 1ca9ec7284 | |
Hans-Kristian Arntzen | 08a7d7a165 | |
Hans-Kristian Arntzen | d83ce4392b | |
Hans-Kristian Arntzen | c672429c70 | |
Hans-Kristian Arntzen | 235541ace5 | |
Hans-Kristian Arntzen | f605b88e90 | |
Hans-Kristian Arntzen | 90d52abe94 | |
Hans-Kristian Arntzen | 74f62784e4 | |
Hans-Kristian Arntzen | 393ef6261b | |
Hans-Kristian Arntzen | 6802d9e5a3 | |
Hans-Kristian Arntzen | 67be905421 | |
Hans-Kristian Arntzen | b661c9b8ba | |
Hans-Kristian Arntzen | ac9d98b2b4 | |
Hans-Kristian Arntzen | 1e42acf492 | |
Hans-Kristian Arntzen | 4244441aca | |
Hans-Kristian Arntzen | 0f2e448659 | |
Hans-Kristian Arntzen | c58edfabe1 | |
Hans-Kristian Arntzen | ab4e847e74 | |
Hans-Kristian Arntzen | 385c3dc012 | |
Hans-Kristian Arntzen | d74cfe1883 | |
Hans-Kristian Arntzen | a2f350117f | |
Hans-Kristian Arntzen | 26dc9e7da5 | |
Hans-Kristian Arntzen | 7ee8eac818 | |
Hans-Kristian Arntzen | cddb98acc6 | |
Hans-Kristian Arntzen | 4075809a91 | |
Hans-Kristian Arntzen | 9065f312d5 | |
Hans-Kristian Arntzen | 9415191111 | |
Joshua Ashton | c9ff20d4ac | |
Joshua Ashton | 7a66669e92 | |
Joshua Ashton | d91d47d827 | |
Joshua Ashton | 70ee02bce0 | |
Joshua Ashton | 4c959c8a77 | |
Joshua Ashton | 6dbb4f6dfe | |
Hans-Kristian Arntzen | 0f802b151e | |
Hans-Kristian Arntzen | cd3d759b95 | |
Hans-Kristian Arntzen | 50d41d8f02 | |
Hans-Kristian Arntzen | f58b23e8e7 | |
Hans-Kristian Arntzen | 807232ceff | |
Danylo Piliaiev | 77c67e2bf5 | |
Hans-Kristian Arntzen | 4ff1166230 | |
Hans-Kristian Arntzen | d9cd18b1ca | |
Hans-Kristian Arntzen | 7b4423eee5 | |
Hans-Kristian Arntzen | 4edd76d8bb | |
Hans-Kristian Arntzen | 4f7e4ee753 | |
Hans-Kristian Arntzen | af822939fb | |
Hans-Kristian Arntzen | b0f3512b8b | |
Hans-Kristian Arntzen | 173b565ccf | |
Hans-Kristian Arntzen | 0b11fad67c | |
Hans-Kristian Arntzen | 6f0677eb2e | |
Hans-Kristian Arntzen | 0c2ddb89cd | |
Hans-Kristian Arntzen | 6863f1c6a8 | |
Joshua Ashton | bde3ad8e01 | |
Joshua Ashton | cabc31fc4c | |
Joshua Ashton | bfaf72386f | |
Joshua Ashton | b84c3ff163 | |
Joshua Ashton | 7c993ae1a6 | |
Joshua Ashton | 875fbe5f50 | |
Joshua Ashton | 2334c136e3 | |
Joshua Ashton | 8d5308c9a1 | |
Joshua Ashton | 27e66b5c4a | |
Joshua Ashton | 26d8011b06 | |
Joshua Ashton | e597adb83a | |
Joshua Ashton | 3b3bd37f93 | |
Conor McCarthy | da8daa860b | |
Conor McCarthy | 446c7423ce | |
Conor McCarthy | d366ba47ac | |
Georg Lehmann | cf4fb44629 | |
Georg Lehmann | edeb0658b7 | |
Georg Lehmann | 0afa6732ad | |
Georg Lehmann | 1946e42367 | |
David McCloskey | a19619ccbf | |
Hans-Kristian Arntzen | 173b8ecef0 | |
Hans-Kristian Arntzen | fa4d2182b1 | |
Hans-Kristian Arntzen | 2b13d06f82 | |
Hans-Kristian Arntzen | e687d489ab | |
Hans-Kristian Arntzen | a4b082a828 | |
Hans-Kristian Arntzen | 1d51818d8f | |
Hans-Kristian Arntzen | a8f623e60d | |
Hans-Kristian Arntzen | 12066a2b67 | |
Hans-Kristian Arntzen | 710fa98918 | |
Hans-Kristian Arntzen | cec741706d | |
Hans-Kristian Arntzen | abdaeb136d | |
Hans-Kristian Arntzen | e0451bb541 | |
Hans-Kristian Arntzen | cb94cfd10c | |
Hans-Kristian Arntzen | 426cdc9218 | |
Hans-Kristian Arntzen | 69d4f55219 | |
Hans-Kristian Arntzen | a590db2508 | |
Hans-Kristian Arntzen | d402255349 | |
Hans-Kristian Arntzen | b4521ebbd8 | |
Hans-Kristian Arntzen | 8d49d3e9ae | |
Hans-Kristian Arntzen | 9fd422a0fd | |
Hans-Kristian Arntzen | 41295eff6c | |
Hans-Kristian Arntzen | 132638be67 | |
Hans-Kristian Arntzen | 50f2c35b44 | |
Hans-Kristian Arntzen | 961fef84de | |
Joshua Ashton | 9c0fa91ca5 | |
Hans-Kristian Arntzen | 3081887757 |
|
@ -15,7 +15,7 @@ jobs:
|
|||
|
||||
- name: Build release
|
||||
id: build-release
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v5
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export VERSION_NAME="${GITHUB_REF##*/}-${GITHUB_SHA##*/}"
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
|
||||
- name: Build MinGW x86
|
||||
id: build-mingw-x86
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v5
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win32.txt --buildtype release build-mingw-x86
|
||||
|
@ -26,7 +26,7 @@ jobs:
|
|||
|
||||
- name: Build MinGW x64
|
||||
id: build-mingw-x64
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v5
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win64.txt --buildtype release build-mingw-x64
|
||||
|
@ -34,7 +34,7 @@ jobs:
|
|||
|
||||
- name: Build Native GCC x86
|
||||
id: build-native-gcc-x86
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v5
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="gcc -m32"
|
||||
|
@ -45,7 +45,7 @@ jobs:
|
|||
|
||||
- name: Build Native GCC x64
|
||||
id: build-native-gcc-x64
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v5
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="gcc"
|
||||
|
@ -55,7 +55,7 @@ jobs:
|
|||
|
||||
- name: Build Native Clang x86
|
||||
id: build-native-clang-x86
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v5
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="clang -m32"
|
||||
|
@ -66,7 +66,7 @@ jobs:
|
|||
|
||||
- name: Build Native Clang x64
|
||||
id: build-native-clang-x64
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v5
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="clang"
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
name: Test Builds on Windows
|
||||
|
||||
on: [push, pull_request, workflow_dispatch]
|
||||
|
||||
jobs:
|
||||
build-set-windows:
|
||||
runs-on: windows-2022
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
id: checkout-code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Setup widl and glslangValidator
|
||||
shell: pwsh
|
||||
run: |
|
||||
choco install strawberryperl vulkan-sdk -y
|
||||
Write-Output "C:\Strawberry\c\bin" | Out-File -FilePath "${Env:GITHUB_PATH}" -Append
|
||||
Write-Output "$([System.Environment]::GetEnvironmentVariable('VULKAN_SDK', 'Machine'))\Bin" `
|
||||
| Out-File -FilePath "${Env:GITHUB_PATH}" -Append
|
||||
|
||||
- name: Setup Meson
|
||||
shell: pwsh
|
||||
run: pip install meson
|
||||
|
||||
- name: Find Visual Studio
|
||||
shell: pwsh
|
||||
run: |
|
||||
$installationPath = Get-VSSetupInstance `
|
||||
| Select-VSSetupInstance -Require Microsoft.VisualStudio.Workload.NativeDesktop -Latest `
|
||||
| Select-Object -ExpandProperty InstallationPath
|
||||
Write-Output "VSDEVCMD=${installationPath}\Common7\Tools\VsDevCmd.bat" `
|
||||
| Out-File -FilePath "${Env:GITHUB_ENV}" -Append
|
||||
|
||||
- name: Build MSVC x86
|
||||
shell: pwsh
|
||||
run: |
|
||||
& "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x86 -host_arch=x64 -no_logo && set" `
|
||||
| % { , ($_ -Split '=', 2) } `
|
||||
| % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) }
|
||||
meson -Denable_tests=True -Denable_extras=True --buildtype release --backend vs2022 build-msvc-x86
|
||||
msbuild -m build-msvc-x86/vkd3d-proton.sln
|
||||
|
||||
- name: Build MSVC x64
|
||||
shell: pwsh
|
||||
run: |
|
||||
& "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x64 -host_arch=x64 -no_logo && set" `
|
||||
| % { , ($_ -Split '=', 2) } `
|
||||
| % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) }
|
||||
meson -Denable_tests=True -Denable_extras=True --buildtype release --backend vs2022 build-msvc-x64
|
||||
msbuild -m build-msvc-x64/vkd3d-proton.sln
|
|
@ -0,0 +1,4 @@
|
|||
Conor McCarthy <cmccarthy@codeweavers.com>
|
||||
Ivan Fedorov <ifedorov@nvidia.com>
|
||||
James Beddek <telans@protonmail.com>
|
||||
Roshan Chaudhari <rochaudhari@nvidia.com>
|
35
AUTHORS
35
AUTHORS
|
@ -1,7 +1,34 @@
|
|||
Alexander Gabello
|
||||
Alexandre Julliard
|
||||
Andrew Eikum
|
||||
Arkadiusz Hiler
|
||||
Biswapriyo Nath
|
||||
Chip Davis
|
||||
Henri Verbeet
|
||||
Józef Kucia
|
||||
Sven Hesse
|
||||
Conor McCarthy
|
||||
Danylo Piliaiev
|
||||
David Gow
|
||||
David McCloskey
|
||||
Derek Lesho
|
||||
Fabian Bornschein
|
||||
Georg Lehmann
|
||||
Hans-Kristian Arntzen
|
||||
Philip Rebohle
|
||||
Henri Verbeet
|
||||
Ivan Fedorov
|
||||
Jactry Zeng
|
||||
James Beddek
|
||||
Jens Peters
|
||||
Joshua Ashton
|
||||
Józef Kucia
|
||||
Juuso Alasuutari
|
||||
Krzysztof Bogacki
|
||||
Paul Gofman
|
||||
Philip Rebohle
|
||||
Rémi Bernon
|
||||
Robin Kertels
|
||||
Rodrigo Locatti
|
||||
Roshan Chaudhari
|
||||
Samuel Pitoiset
|
||||
Sveinar Søpler
|
||||
Sven Hesse
|
||||
Thomas Crider
|
||||
Zhiyi Zhang
|
||||
|
|
158
CHANGELOG.md
158
CHANGELOG.md
|
@ -1,5 +1,163 @@
|
|||
# Change Log
|
||||
|
||||
## 2.6
|
||||
|
||||
It has been a long while since 2.5, and this release rolls up a lot of fixes, features and optimizations.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix black screen rendering bug in Horizon Zero Dawn after latest game updates.
|
||||
- Fix crashes on startup in Final Fantasy VII: Remake and Warframe.
|
||||
- Fix crashes in Guardians of the Galaxy when interacting with certain game objects.
|
||||
- Fix hang on game shutdown in Elden Ring.
|
||||
- Fix broken geometry rendering in Age of Empires: IV.
|
||||
|
||||
### Optimization
|
||||
|
||||
- Improve generated shader code for vectorized load-store operations in DXIL.
|
||||
- Greatly reduce CPU overhead for descriptor copy operations,
|
||||
which is a key contributor to CPU overhead in D3D12.
|
||||
|
||||
### Features
|
||||
|
||||
#### Pipeline library rewrite
|
||||
|
||||
Support D3D12 pipeline libraries better where we can now also cache
|
||||
generated SPIR-V from DXBC/DXIL.
|
||||
Massively reduces subsequent load times in Monster Hunter: Rise,
|
||||
and helps other titles like Guardian of the Galaxy and Elden Ring.
|
||||
Also lays the groundwork for internal driver caches down the line for games which do not use this API.
|
||||
Also, deduplicates binary blobs for reduced disk size requirements.
|
||||
|
||||
#### Shader models
|
||||
|
||||
Shader model 6.6 is now fully implemented. This includes support for:
|
||||
- ResourceDescriptorHeap[] direct access
|
||||
- 64-bit atomics
|
||||
- IsHelperLane()
|
||||
- Compute shader derivatives
|
||||
- WaveSize attribute
|
||||
- Packed math intrinsics
|
||||
|
||||
#### Minor features
|
||||
|
||||
- Handle API feature MinResourceLODClamp correctly if `VK_EXT_image_view_min_lod` is supported.
|
||||
- Expose CastFullyTypedFormat feature.
|
||||
- Expose some advanced shader features on Intel related to UAV formats (`VK_KHR_format_feature_flags2`).
|
||||
- Support COLOR -> STENCIL copies.
|
||||
|
||||
### Workarounds
|
||||
|
||||
- Workaround DEATHLOOP not emitting synchronization commands correctly. Fixes menu flicker on RADV.
|
||||
- Workaround quirky API usage in Elden Ring. Removes many kinds of stutter and chug when traversing the scenery.
|
||||
- Workaround certain environments failing to create Vulkan device if some `VK_NVX_*` extensions are enabled.
|
||||
- Workaround glitched foliage rendering in Horizon Zero Dawn after latest game updates.
|
||||
- Workaround some questionable UE4 shaders causing glitched rendering on RADV.
|
||||
|
||||
### Note on future Vulkan driver requirements
|
||||
|
||||
2.6 is expected to be the last vkd3d-proton release before we require some newer Vulkan extensions.
|
||||
`VK_KHR_dynamic_rendering` and `VK_EXT_extended_dynamic_state`
|
||||
(and likely `dynamic_state_2` as well) will be required.
|
||||
|
||||
`VK_KHR_dynamic_rendering` in particular requires up-to-date drivers and the legacy render pass path
|
||||
will be abandoned in favor of it. Supporting both paths at the same time is not practical.
|
||||
Moving to `VK_KHR_dynamic_rendering` allows us to fix some critical flaws with the legacy API
|
||||
which caused potential shader compilation stutters and extra CPU overhead.
|
||||
|
||||
## 2.5
|
||||
|
||||
This is a release with a little bit of everything!
|
||||
|
||||
### Features
|
||||
|
||||
#### DXR progress
|
||||
|
||||
DXR has seen significant work in the background.
|
||||
|
||||
- DXR 1.1 is now experimentally exposed. It can be enabled with `VKD3D_CONFIG=dxr11`.
|
||||
Note that DXR 1.1 cannot be fully implemented in `VK_KHR_ray_tracing`'s current form, in particular
|
||||
DispatchRays() indirect is not compatible yet,
|
||||
although we have not observed a game which requires this API feature.
|
||||
- DXR 1.1 inline raytracing support is fully implemented.
|
||||
- DXR 1.0 support is more or less feature complete.
|
||||
Some weird edge cases remain, but will likely not be implemented unless required by a game.
|
||||
`VKD3D_CONFIG=dxr` will eventually be dropped when it matures.
|
||||
|
||||
Some new DXR games are starting to come alive, especially with DXR 1.1 enabled,
|
||||
but there are significant bugs as well that we currently cannot easily debug.
|
||||
Some experimental results on NVIDIA:
|
||||
|
||||
- **Control** - already worked
|
||||
- **DEATHLOOP** - appears to work correctly
|
||||
- **Cyberpunk 2077** - DXR can be enabled, but GPU timeouts
|
||||
- **World of Warcraft** - according to a user, it works, but we have not confirmed ourselves
|
||||
- **Metro Exodus: Enhanced Edition** -
|
||||
gets ingame and appears to work? Not sure if it looks correct.
|
||||
Heavy CPU stutter for some reason ...
|
||||
- **Metro Exodus** (original release) - GPU timeouts when enabling DXR
|
||||
- **Resident Evil: Village** - Appears to work, but the visual difference is subtle.
|
||||
|
||||
It's worth experimenting with these and others.
|
||||
DXR is incredibly complicated, so expect bugs.
|
||||
From here, DXR support is mostly a case of stamping out issues one by one.
|
||||
|
||||
#### NVIDIA DLSS
|
||||
|
||||
NVIDIA contributed integration APIs in vkd3d-proton which enables DLSS support in D3D12 titles in Proton.
|
||||
See Proton documentation for how to enable NvAPI support.
|
||||
|
||||
#### Shader models
|
||||
|
||||
A fair bit of work went into DXIL translation support to catch up with native drivers.
|
||||
|
||||
- Shader model 6.5 is exposed.
|
||||
Shader model 6.6 should be straight forward once that becomes relevant.
|
||||
- Shader model 6.4 implementation takes advantage of `VK_KHR_shader_integer_dot_product` when supported.
|
||||
- Proper fallback for FP16 math on GPUs which do not expose native FP16 support (Polaris, Pascal).
|
||||
Notably fixes AMD FSR shaders in Resident Evil: Village (and others).
|
||||
- Shader model 6.1 SV_Barycentric support implemented (NVIDIA only for now).
|
||||
- Support shader model 6.2 FP32 denorm control.
|
||||
|
||||
### Performance
|
||||
|
||||
Resizable BAR can improve GPU performance about 10-15% in the best case, depends a lot on the game.
|
||||
Horizon Zero Dawn and Death Stranding in particular improve massively with this change.
|
||||
|
||||
By default, vkd3d-proton will now take advantage of PCI-e BAR memory types through heuristics
|
||||
as D3D12 does not expose direct support for resizable BAR, and native D3D12 drivers are known to use heuristics as well.
|
||||
Without resizable BAR enabled in BIOS/vBIOS, we only get 256 MiB which can help performance,
|
||||
but many games will improve performance even more
|
||||
when we are allowed to use more than that.
|
||||
There is an upper limit for how much VRAM is dedicated to this purpose.
|
||||
We also added `VKD3D_CONFIG=no_upload_hvv` to disable all uses of PCI-e BAR memory.
|
||||
|
||||
Other performance improvements:
|
||||
|
||||
- Avoid redundant descriptor update work in certain scenarios (NVIDIA contribution).
|
||||
- Minor tweaks here and there to reduce CPU overhead.
|
||||
|
||||
### Fixes and workarounds
|
||||
|
||||
- Fix behavior for swap chain presentation latency HANDLE. Fixes spurious deadlocks in some cases.
|
||||
- Fix many issues related to depth-stencil handling, which fixed various issues in DEATHLOOP, F1 2021, WRC 10.
|
||||
- Fix DIRT 5 rendering issues and crashes. Should be fully playable now.
|
||||
- Fix some Diablo II Resurrected rendering issues.
|
||||
- Workaround shader bugs in Psychonauts 2.
|
||||
- Workaround some Unreal Engine 4 shader bugs which multiple titles trigger.
|
||||
- Fix some stability issues when VRAM is exhausted on NVIDIA.
|
||||
- Fix CPU crash in boot-up sequence of Far Cry 6 (game is still kinda buggy though, but gets in-game).
|
||||
- Fix various bugs with host visible images. Fixes DEATHLOOP.
|
||||
- Fix various DXIL conversion bugs.
|
||||
- Add Invariant geometry workarounds for specific games which require it.
|
||||
- Fix how d3d12.dll exports symbols to be more in line with MSVC.
|
||||
- Fix some edge cases in bitfield instructions.
|
||||
- Work around extreme CPU memory bloat on the specific NVIDIA driver versions which had this bug.
|
||||
- Fix regression in Evil Genius 2: World Domination.
|
||||
- Fix crashes in Hitman 3.
|
||||
- Fix terrain rendering in Anno 1800.
|
||||
- Various correctness and crash fixes.
|
||||
|
||||
## 2.4
|
||||
|
||||
This is a release which focuses on performance and bug-fixes.
|
||||
|
|
2
COPYING
2
COPYING
|
@ -1,4 +1,4 @@
|
|||
Copyright 2016-2020 the vkd3d-proton project authors (see the file AUTHORS for a
|
||||
Copyright 2016-2022 the vkd3d-proton project authors (see the file AUTHORS for a
|
||||
complete list)
|
||||
|
||||
vkd3d-proton is free software; you can redistribute it and/or modify it under
|
||||
|
|
84
README.md
84
README.md
|
@ -22,36 +22,36 @@ There are some hard requirements on drivers to be able to implement D3D12 in a r
|
|||
- `VK_EXT_descriptor_indexing` with at least 1000000 UpdateAfterBind descriptors for all types except UniformBuffer.
|
||||
Essentially all features in `VkPhysicalDeviceDescriptorIndexingFeatures` must be supported.
|
||||
- `VK_KHR_timeline_semaphore`
|
||||
- `VK_KHR_create_renderpass2`
|
||||
- `VK_KHR_sampler_mirror_clamp_to_edge`
|
||||
- `VK_EXT_robustness2`
|
||||
- `VK_KHR_separate_depth_stencil_layouts`
|
||||
- `VK_KHR_bind_memory2`
|
||||
- `VK_KHR_copy_commands2`
|
||||
- `VK_KHR_dynamic_rendering`
|
||||
- `VK_EXT_extended_dynamic_state`
|
||||
- `VK_EXT_extended_dynamic_state2`
|
||||
|
||||
Some notable extensions that **should** be supported for optimal or correct behavior.
|
||||
These extensions will likely become mandatory later.
|
||||
|
||||
- `VK_KHR_buffer_device_address`
|
||||
- `VK_EXT_extended_dynamic_state`
|
||||
- `VK_EXT_image_view_min_lod`
|
||||
|
||||
`VK_VALVE_mutable_descriptor_type` is also highly recommended, but not mandatory.
|
||||
|
||||
### AMD (RADV)
|
||||
|
||||
For AMD, RADV is the recommended driver and the one that sees most testing on AMD GPUs.
|
||||
The recommendation here is to use a driver built from Git.
|
||||
The minimum requirement at the moment is Mesa 22.0 since it supports `VK_KHR_dynamic_rendering`.
|
||||
|
||||
NOTE: For older Mesa versions, use the v2.6 release.
|
||||
|
||||
### NVIDIA
|
||||
|
||||
The [Vulkan beta drivers](https://developer.nvidia.com/vulkan-driver) generally contain the latest
|
||||
driver fixes that we identify while getting games to work.
|
||||
At least Linux 455.26.01 (2020-10-20) is recommended as it contains fixes for:
|
||||
|
||||
> Reduce host memory consumption for descriptor memory when VkDescriptorSetVariableDescriptorCountAllocateInfo is used.
|
||||
|
||||
> Fixed a bug in a barrier optimization that allowed some back-to-back copies to run unordered
|
||||
|
||||
These fixes should find their way into stable drivers eventually, but if you're having issues, test the latest development drivers,
|
||||
as that is what we test against.
|
||||
The latest drivers (stable, beta or Vulkan beta tracks) are always preferred.
|
||||
If you're having problems, always try the latest drivers.
|
||||
|
||||
### Intel
|
||||
|
||||
|
@ -152,9 +152,15 @@ commas or semicolons.
|
|||
- `skip_application_workarounds` - Skips all application workarounds.
|
||||
For debugging purposes.
|
||||
- `dxr` - Enables DXR support if supported by device.
|
||||
- `dxr11` - Enables DXR tier 1.1 support if supported by device.
|
||||
- `force_static_cbv` - Unsafe speed hack on NVIDIA. May or may not give a significant performance uplift.
|
||||
- `single_queue` - Do not use asynchronous compute or transfer queues.
|
||||
- `upload_hvv` - Attempt to use host-visible VRAM (large/resizable BAR) for the UPLOAD heap. May improve performance at the cost of using additional video memory over system memory.
|
||||
- `no_upload_hvv` - Blocks any attempt to use host-visible VRAM (large/resizable BAR) for the UPLOAD heap.
|
||||
May free up vital VRAM in certain critical situations, at cost of lower GPU performance.
|
||||
A fraction of VRAM is reserved for resizable BAR allocations either way,
|
||||
so it should not be a real issue even on lower VRAM cards.
|
||||
- `force_host_cached` - Forces all host visible allocations to be CACHED, which greatly accelerates captures.
|
||||
- `no_invariant_position` - Avoids workarounds for invariant position. The workaround is enabled by default.
|
||||
- `VKD3D_DEBUG` - controls the debug level for log messages produced by
|
||||
vkd3d-proton. Accepts the following values: none, err, info, fixme, warn, trace.
|
||||
- `VKD3D_SHADER_DEBUG` - controls the debug level for log messages produced by
|
||||
|
@ -179,6 +185,39 @@ commas or semicolons.
|
|||
- `VKD3D_PROFILE_PATH` - If profiling is enabled in the build, a profiling block is
|
||||
emitted to `${VKD3D_PROFILE_PATH}.${pid}`.
|
||||
|
||||
## Shader cache
|
||||
|
||||
By default, vkd3d-proton manages its own driver cache.
|
||||
This cache is intended to cache DXBC/DXIL -> SPIR-V conversion.
|
||||
This reduces stutter (when pipelines are created last minute and app relies on hot driver cache)
|
||||
and load times (when applications do the right thing of loading PSOs up front).
|
||||
|
||||
Behavior is designed to be close to DXVK state cache.
|
||||
|
||||
#### Default behavior
|
||||
|
||||
`vkd3d-proton.cache` (and `vkd3d-proton.cache.write`) are placed in the current working directory.
|
||||
Generally, this is the game install folder when running in Steam.
|
||||
|
||||
#### Custom directory
|
||||
|
||||
`VKD3D_SHADER_CACHE_PATH=/path/to/directory` overrides the directory where `vkd3d-proton.cache` is placed.
|
||||
|
||||
#### Disable cache
|
||||
|
||||
`VKD3D_SHADER_CACHE_PATH=0` disables the internal cache, and any caching would have to be explicitly managed
|
||||
by application.
|
||||
|
||||
### Behavior of ID3D12PipelineLibrary
|
||||
|
||||
When explicit shader cache is used, the need for application managed pipeline libraries is greatly diminished,
|
||||
and the cache applications interact with is a dummy cache.
|
||||
If the vkd3d-proton shader cache is disabled, ID3D12PipelineLibrary stores everything relevant for a full cache,
|
||||
i.e. SPIR-V and PSO driver cache blob.
|
||||
`VKD3D_CONFIG=pipeline_library_app_cache` is an alternative to `VKD3D_SHADER_CACHE_PATH=0` and can be
|
||||
automatically enabled based on app-profiles if relevant in the future if applications manage the caches better
|
||||
than vkd3d-proton can do automagically.
|
||||
|
||||
## CPU profiling (development)
|
||||
|
||||
Pass `-Denable_profiling=true` to Meson to enable a profiled build. With a profiled build, use `VKD3D_PROFILE_PATH` environment variable.
|
||||
|
@ -200,12 +239,26 @@ pass `-Denable_renderdoc=true` to Meson.
|
|||
vkd3d-proton will automatically make a capture when a specific shader is encountered.
|
||||
- `VKD3D_AUTO_CAPTURE_COUNTS` - A comma-separated list of indices. This can be used to control which queue submissions to capture.
|
||||
E.g., use `VKD3D_AUTO_CAPTURE_COUNTS=0,4,10` to capture the 0th (first submission), 4th and 10th submissions which are candidates for capturing.
|
||||
If `VKD3D_AUTO_CAPTURE_COUNTS` is `-1`, the entire app runtime can be turned into one big capture.
|
||||
This is only intended to be used when capturing something like the test suite,
|
||||
or tiny applications with a finite runtime to make it easier to debug cross submission work.
|
||||
|
||||
If only `VKD3D_AUTO_CAPTURE_COUNTS` is set, any queue submission is considered for capturing.
|
||||
If only `VKD3D_AUTO_CAPTURE_SHADER` is set, `VKD3D_AUTO_CAPTURE_COUNTS` is considered to be equal to `"0"`, i.e. a capture is only
|
||||
made on first encounter with the target shader.
|
||||
If both are set, the capture counter is only incremented and considered when a submission contains the use of the target shader.
|
||||
|
||||
### Breadcrumbs debugging
|
||||
|
||||
For debugging GPU hangs, it's useful to know where crashes happen.
|
||||
If the build has trace enabled (non-release builds), breadcrumbs support is also enabled.
|
||||
|
||||
`VKD3D_CONFIG=breadcrumbs` will instrument command lists with `VK_AMD_buffer_marker` or `VK_NV_device_checkpoints`.
|
||||
On GPU device lost or timeout, crash dumps are written to the log.
|
||||
For best results on RADV, use `RADV_DEBUG=syncshaders`. The logs will print a digested form of the command lists
|
||||
which were executing at the time, and attempt to narrow down the possible range of commands which could
|
||||
have caused a crash.
|
||||
|
||||
### Shader logging
|
||||
|
||||
It is possible to log the output of replaced shaders, essentially a custom shader printf. To enable this feature, `VK_KHR_buffer_device_address` must be supported.
|
||||
|
@ -217,8 +270,11 @@ and avoids any possible accidental hiding of bugs by introducing validation laye
|
|||
Using `debugPrintEXT` is also possible if that fits better with your debugging scenario.
|
||||
With this shader replacement scheme, we're able to add shader logging as unintrusive as possible.
|
||||
|
||||
Replaced shaders will need to include `debug_channel.h` from `include/shader-debug`.
|
||||
Use `glslc -I/path/to/vkd3d-proton/include/shader-debug --target-env=vulkan1.1` when compiling replaced shaders.
|
||||
```
|
||||
# Inside folder full of override shaders, build everything with:
|
||||
make -C /path/to/include/shader-debug M=$PWD
|
||||
```
|
||||
The shader can then include `#include "debug_channel.h"` and use various functions below.
|
||||
|
||||
```
|
||||
void DEBUG_CHANNEL_INIT(uvec3 ID);
|
||||
|
|
|
@ -456,13 +456,8 @@ static void cxg_mesh_create(ID3D12Device *device, float inner_radius, float oute
|
|||
float r0, r1, r2;
|
||||
float angle, da;
|
||||
|
||||
if (!(vertices = calloc(tooth_count, 12 * sizeof(*vertices))))
|
||||
return;
|
||||
if (!(faces = calloc(tooth_count, 20 * sizeof(*faces))))
|
||||
{
|
||||
free(vertices);
|
||||
return;
|
||||
}
|
||||
vertices = calloc(tooth_count, 12 * sizeof(*vertices));
|
||||
faces = calloc(tooth_count, 20 * sizeof(*faces));
|
||||
|
||||
r0 = inner_radius;
|
||||
r1 = outer_radius - tooth_depth / 2.0f;
|
||||
|
|
|
@ -174,9 +174,9 @@ static inline struct hash_map_entry *hash_map_insert(struct hash_map *hash_map,
|
|||
if (!(target->flags & HASH_MAP_ENTRY_OCCUPIED))
|
||||
{
|
||||
hash_map->used_count += 1;
|
||||
memcpy(target, entry, hash_map->entry_size);
|
||||
target->flags = HASH_MAP_ENTRY_OCCUPIED;
|
||||
target->hash_value = hash_value;
|
||||
memcpy(target + 1, entry + 1, hash_map->entry_size - sizeof(*entry));
|
||||
}
|
||||
|
||||
/* If target is occupied, we already have an entry in the hashmap.
|
||||
|
@ -193,6 +193,7 @@ static inline void hash_map_init(struct hash_map *hash_map, pfn_hash_func hash_f
|
|||
hash_map->entry_size = entry_size;
|
||||
hash_map->entry_count = 0;
|
||||
hash_map->used_count = 0;
|
||||
assert(entry_size > sizeof(struct hash_map_entry));
|
||||
}
|
||||
|
||||
static inline void hash_map_clear(struct hash_map *hash_map)
|
||||
|
@ -212,4 +213,43 @@ static inline uint32_t hash_uint64(uint64_t n)
|
|||
return hash_combine((uint32_t)n, (uint32_t)(n >> 32));
|
||||
}
|
||||
|
||||
/* A somewhat stronger hash when we're meant to store the hash (pipeline caches, etc). Based on FNV-1a. */
|
||||
static inline uint64_t hash_fnv1_init()
|
||||
{
|
||||
return 0xcbf29ce484222325ull;
|
||||
}
|
||||
|
||||
static inline uint64_t hash_fnv1_iterate_u8(uint64_t h, uint8_t value)
|
||||
{
|
||||
return (h * 0x100000001b3ull) ^ value;
|
||||
}
|
||||
|
||||
static inline uint64_t hash_fnv1_iterate_u32(uint64_t h, uint32_t value)
|
||||
{
|
||||
return (h * 0x100000001b3ull) ^ value;
|
||||
}
|
||||
|
||||
static inline uint64_t hash_fnv1_iterate_f32(uint64_t h, float value)
|
||||
{
|
||||
union u { float f32; uint32_t u32; } v;
|
||||
v.f32 = value;
|
||||
return hash_fnv1_iterate_u32(h, v.u32);
|
||||
}
|
||||
|
||||
static inline uint64_t hash_fnv1_iterate_u64(uint64_t h, uint64_t value)
|
||||
{
|
||||
h = hash_fnv1_iterate_u32(h, value & UINT32_MAX);
|
||||
h = hash_fnv1_iterate_u32(h, value >> 32);
|
||||
return h;
|
||||
}
|
||||
|
||||
static inline uint64_t hash_fnv1_iterate_string(uint64_t h, const char *str)
|
||||
{
|
||||
if (str)
|
||||
while (*str)
|
||||
h = hash_fnv1_iterate_u8(h, *str++);
|
||||
h = hash_fnv1_iterate_u8(h, 0);
|
||||
return h;
|
||||
}
|
||||
|
||||
#endif /* __VKD3D_HASHMAP_H */
|
||||
|
|
|
@ -262,7 +262,7 @@ static inline uint64_t vkd3d_atomic_uint64_compare_exchange(UINT64* target, uint
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_WIN64)
|
||||
#if INTPTR_MAX == INT64_MAX
|
||||
# define vkd3d_atomic_ptr_load_explicit(target, order) ((void *)vkd3d_atomic_uint64_load_explicit((uint64_t *)target, order))
|
||||
# define vkd3d_atomic_ptr_store_explicit(target, value, order) (vkd3d_atomic_uint64_store_explicit((uint64_t *)target, (uint64_t)value, order))
|
||||
# define vkd3d_atomic_ptr_exchange_explicit(target, value, order) ((void *)vkd3d_atomic_uint64_exchange_explicit((uint64_t *)target, (uint64_t)value, order))
|
||||
|
|
|
@ -27,9 +27,12 @@
|
|||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
|
@ -42,8 +45,15 @@
|
|||
|
||||
#define MEMBER_SIZE(t, m) sizeof(((t *)0)->m)
|
||||
|
||||
static inline uint64_t align64(uint64_t addr, uint64_t alignment)
|
||||
{
|
||||
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
|
||||
return (addr + (alignment - 1)) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
static inline size_t align(size_t addr, size_t alignment)
|
||||
{
|
||||
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
|
||||
return (addr + (alignment - 1)) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
|
@ -113,8 +123,7 @@ static inline unsigned int vkd3d_bitmask_tzcnt32(uint32_t mask)
|
|||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned long result;
|
||||
_BitScanForward(&result, mask) ? result : 32;
|
||||
return result;
|
||||
return _BitScanForward(&result, mask) ? result : 32;
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
return mask ? __builtin_ctz(mask) : 32;
|
||||
#else
|
||||
|
@ -203,6 +212,14 @@ static inline unsigned int vkd3d_log2i(unsigned int x)
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned int vkd3d_log2i_ceil(unsigned int x)
|
||||
{
|
||||
if (x == 1)
|
||||
return 0;
|
||||
else
|
||||
return vkd3d_log2i(x - 1) + 1;
|
||||
}
|
||||
|
||||
static inline int ascii_isupper(int c)
|
||||
{
|
||||
return 'A' <= c && c <= 'Z';
|
||||
|
@ -277,4 +294,36 @@ static inline void *void_ptr_offset(void *ptr, size_t offset)
|
|||
#define VKD3D_THREAD_LOCAL __thread
|
||||
#endif
|
||||
|
||||
static inline uint64_t vkd3d_get_current_time_ns(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER li, lf;
|
||||
uint64_t whole, part;
|
||||
QueryPerformanceCounter(&li);
|
||||
QueryPerformanceFrequency(&lf);
|
||||
whole = (li.QuadPart / lf.QuadPart) * 1000000000;
|
||||
part = ((li.QuadPart % lf.QuadPart) * 1000000000) / lf.QuadPart;
|
||||
return whole + part;
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
|
||||
static inline uint64_t vkd3d_get_current_time_ticks(void)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
return __builtin_ia32_rdtsc();
|
||||
#else
|
||||
return vkd3d_get_current_time_ns();
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* __VKD3D_COMMON_H */
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef __VKD3D_FILE_UTILS_H
|
||||
#define __VKD3D_FILE_UTILS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct vkd3d_memory_mapped_file
|
||||
{
|
||||
void *mapped;
|
||||
size_t mapped_size;
|
||||
};
|
||||
|
||||
/* On failure, ensures the struct is cleared to zero.
|
||||
* A reference to the file is kept through the memory mapping. */
|
||||
bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file);
|
||||
/* Clears out file on unmap. */
|
||||
void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file);
|
||||
bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path);
|
||||
bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path);
|
||||
bool vkd3d_file_delete(const char *path);
|
||||
FILE *vkd3d_file_open_exclusive_write(const char *path);
|
||||
|
||||
#endif
|
|
@ -23,6 +23,7 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "vkd3d_common.h"
|
||||
#include "vkd3d_debug.h"
|
||||
|
||||
static inline void *vkd3d_malloc(size_t size)
|
||||
|
@ -57,12 +58,12 @@ static inline void vkd3d_free(void *ptr)
|
|||
bool vkd3d_array_reserve(void **elements, size_t *capacity,
|
||||
size_t element_count, size_t element_size);
|
||||
|
||||
static inline void *vkd3d_malloc_aligned(size_t size, size_t align)
|
||||
static inline void *vkd3d_malloc_aligned(size_t size, size_t alignment)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, align);
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
return aligned_alloc(align, size);
|
||||
return aligned_alloc(alignment, align(size, alignment));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,8 @@ int vkd3d_dlclose(vkd3d_module_t handle);
|
|||
|
||||
const char *vkd3d_dlerror(void);
|
||||
|
||||
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size);
|
||||
|
||||
bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -21,39 +21,15 @@
|
|||
|
||||
#include "vkd3d_windows.h"
|
||||
#include "vkd3d_spinlock.h"
|
||||
#include <stdint.h>
|
||||
#include "vkd3d_common.h"
|
||||
|
||||
#ifdef VKD3D_ENABLE_PROFILING
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
void vkd3d_init_profiling(void);
|
||||
bool vkd3d_uses_profiling(void);
|
||||
unsigned int vkd3d_profiling_register_region(const char *name, spinlock_t *lock, uint32_t *latch);
|
||||
void vkd3d_profiling_notify_work(unsigned int index, uint64_t start_ticks, uint64_t end_ticks, unsigned int iteration_count);
|
||||
|
||||
static inline uint64_t vkd3d_profiling_get_tick_count(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER li, lf;
|
||||
uint64_t whole, part;
|
||||
QueryPerformanceCounter(&li);
|
||||
QueryPerformanceFrequency(&lf);
|
||||
whole = (li.QuadPart / lf.QuadPart) * 1000000000;
|
||||
part = ((li.QuadPart % lf.QuadPart) * 1000000000) / lf.QuadPart;
|
||||
return whole + part;
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define VKD3D_REGION_DECL(name) \
|
||||
static uint32_t _vkd3d_region_latch_##name; \
|
||||
static spinlock_t _vkd3d_region_lock_##name; \
|
||||
|
@ -65,12 +41,12 @@ static inline uint64_t vkd3d_profiling_get_tick_count(void)
|
|||
do { \
|
||||
if (!(_vkd3d_region_index_##name = vkd3d_atomic_uint32_load_explicit(&_vkd3d_region_latch_##name, vkd3d_memory_order_acquire))) \
|
||||
_vkd3d_region_index_##name = vkd3d_profiling_register_region(#name, &_vkd3d_region_lock_##name, &_vkd3d_region_latch_##name); \
|
||||
_vkd3d_region_begin_tick_##name = vkd3d_profiling_get_tick_count(); \
|
||||
_vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ticks(); \
|
||||
} while(0)
|
||||
|
||||
#define VKD3D_REGION_END_ITERATIONS(name, iter) \
|
||||
do { \
|
||||
_vkd3d_region_end_tick_##name = vkd3d_profiling_get_tick_count(); \
|
||||
_vkd3d_region_end_tick_##name = vkd3d_get_current_time_ticks(); \
|
||||
vkd3d_profiling_notify_work(_vkd3d_region_index_##name, _vkd3d_region_begin_tick_##name, _vkd3d_region_end_tick_##name, iter); \
|
||||
} while(0)
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ WCHAR *vkd3d_dup_demangled_entry_point(const char *str);
|
|||
char *vkd3d_dup_demangled_entry_point_ascii(const char *str);
|
||||
|
||||
bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b);
|
||||
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b);
|
||||
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t n, const WCHAR *b);
|
||||
|
||||
char *vkd3d_strdup(const char *str);
|
||||
|
@ -37,4 +38,45 @@ char *vkd3d_strdup_n(const char *str, size_t n);
|
|||
WCHAR *vkd3d_wstrdup(const WCHAR *str);
|
||||
WCHAR *vkd3d_wstrdup_n(const WCHAR *str, size_t n);
|
||||
|
||||
static inline bool vkd3d_string_ends_with_n(const char *str, size_t str_len, const char *ending, size_t ending_len)
|
||||
{
|
||||
return str_len >= ending_len && !strncmp(str + (str_len - ending_len), ending, ending_len);
|
||||
}
|
||||
|
||||
static inline bool vkd3d_string_ends_with(const char *str, const char *ending)
|
||||
{
|
||||
return vkd3d_string_ends_with_n(str, strlen(str), ending, strlen(ending));
|
||||
}
|
||||
|
||||
enum vkd3d_string_compare_mode
|
||||
{
|
||||
VKD3D_STRING_COMPARE_NEVER,
|
||||
VKD3D_STRING_COMPARE_ALWAYS,
|
||||
VKD3D_STRING_COMPARE_EXACT,
|
||||
VKD3D_STRING_COMPARE_STARTS_WITH,
|
||||
VKD3D_STRING_COMPARE_ENDS_WITH,
|
||||
VKD3D_STRING_COMPARE_CONTAINS,
|
||||
};
|
||||
|
||||
static inline bool vkd3d_string_compare(enum vkd3d_string_compare_mode mode, const char *string, const char *comparator)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
default:
|
||||
case VKD3D_STRING_COMPARE_NEVER:
|
||||
return false;
|
||||
case VKD3D_STRING_COMPARE_ALWAYS:
|
||||
return true;
|
||||
case VKD3D_STRING_COMPARE_EXACT:
|
||||
return !strcmp(string, comparator);
|
||||
case VKD3D_STRING_COMPARE_STARTS_WITH:
|
||||
return !strncmp(string, comparator, strlen(comparator));
|
||||
case VKD3D_STRING_COMPARE_ENDS_WITH:
|
||||
return vkd3d_string_ends_with(string, comparator);
|
||||
case VKD3D_STRING_COMPARE_CONTAINS:
|
||||
return strstr(string, comparator) != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* __VKD3D_STRING_H */
|
|
@ -51,6 +51,8 @@ typedef struct pthread_cond
|
|||
CONDITION_VARIABLE cond;
|
||||
} pthread_cond_t;
|
||||
|
||||
typedef pthread_cond_t condvar_reltime_t;
|
||||
|
||||
static DWORD WINAPI win32_thread_wrapper_routine(void *arg)
|
||||
{
|
||||
pthread_t thread = arg;
|
||||
|
@ -114,6 +116,48 @@ static inline int pthread_mutex_destroy(pthread_mutex_t *lock)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* SRWLocks distinguish between write and read unlocks, but pthread interface does not,
|
||||
* so make a trivial wrapper type instead to avoid any possible API conflicts. */
|
||||
typedef struct rwlock
|
||||
{
|
||||
SRWLOCK rwlock;
|
||||
} rwlock_t;
|
||||
|
||||
static inline int rwlock_init(rwlock_t *lock)
|
||||
{
|
||||
InitializeSRWLock(&lock->rwlock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rwlock_lock_write(rwlock_t *lock)
|
||||
{
|
||||
AcquireSRWLockExclusive(&lock->rwlock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rwlock_lock_read(rwlock_t *lock)
|
||||
{
|
||||
AcquireSRWLockShared(&lock->rwlock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rwlock_unlock_write(rwlock_t *lock)
|
||||
{
|
||||
ReleaseSRWLockExclusive(&lock->rwlock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rwlock_unlock_read(rwlock_t *lock)
|
||||
{
|
||||
ReleaseSRWLockShared(&lock->rwlock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rwlock_destroy(rwlock_t *lock)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int pthread_cond_init(pthread_cond_t *cond, void *attr)
|
||||
{
|
||||
(void)attr;
|
||||
|
@ -145,6 +189,32 @@ static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *lock)
|
|||
return ret ? 0 : -1;
|
||||
}
|
||||
|
||||
static inline int condvar_reltime_init(condvar_reltime_t *cond)
|
||||
{
|
||||
return pthread_cond_init(cond, NULL);
|
||||
}
|
||||
|
||||
static inline int condvar_reltime_destroy(condvar_reltime_t *cond)
|
||||
{
|
||||
return pthread_cond_destroy(cond);
|
||||
}
|
||||
|
||||
static inline int condvar_reltime_signal(condvar_reltime_t *cond)
|
||||
{
|
||||
return pthread_cond_signal(cond);
|
||||
}
|
||||
|
||||
static inline int condvar_reltime_wait_timeout_seconds(condvar_reltime_t *cond, pthread_mutex_t *lock, unsigned int seconds)
|
||||
{
|
||||
BOOL ret = SleepConditionVariableSRW(&cond->cond, &lock->lock, seconds * 1000, 0);
|
||||
if (ret)
|
||||
return 0;
|
||||
else if (GetLastError() == ERROR_TIMEOUT)
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline void vkd3d_set_thread_name(const char *name)
|
||||
{
|
||||
(void)name;
|
||||
|
@ -168,10 +238,96 @@ static inline void pthread_once(pthread_once_t *once, void (*func)(void))
|
|||
}
|
||||
#else
|
||||
#include <pthread.h>
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
|
||||
static inline void vkd3d_set_thread_name(const char *name)
|
||||
{
|
||||
pthread_setname_np(pthread_self(), name);
|
||||
}
|
||||
|
||||
typedef struct rwlock
|
||||
{
|
||||
pthread_rwlock_t rwlock;
|
||||
} rwlock_t;
|
||||
|
||||
static inline int rwlock_init(rwlock_t *lock)
|
||||
{
|
||||
return pthread_rwlock_init(&lock->rwlock, NULL);
|
||||
}
|
||||
|
||||
static inline int rwlock_lock_write(rwlock_t *lock)
|
||||
{
|
||||
return pthread_rwlock_wrlock(&lock->rwlock);
|
||||
}
|
||||
|
||||
static inline int rwlock_lock_read(rwlock_t *lock)
|
||||
{
|
||||
return pthread_rwlock_rdlock(&lock->rwlock);
|
||||
}
|
||||
|
||||
static inline int rwlock_unlock_write(rwlock_t *lock)
|
||||
{
|
||||
return pthread_rwlock_unlock(&lock->rwlock);
|
||||
}
|
||||
|
||||
static inline int rwlock_unlock_read(rwlock_t *lock)
|
||||
{
|
||||
return pthread_rwlock_unlock(&lock->rwlock);
|
||||
}
|
||||
|
||||
static inline int rwlock_destroy(rwlock_t *lock)
|
||||
{
|
||||
return pthread_rwlock_destroy(&lock->rwlock);
|
||||
}
|
||||
|
||||
typedef struct condvar_reltime
|
||||
{
|
||||
pthread_cond_t cond;
|
||||
} condvar_reltime_t;
|
||||
|
||||
static inline int condvar_reltime_init(condvar_reltime_t *cond)
|
||||
{
|
||||
pthread_condattr_t attr;
|
||||
int rc;
|
||||
|
||||
pthread_condattr_init(&attr);
|
||||
pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
|
||||
rc = pthread_cond_init(&cond->cond, &attr);
|
||||
pthread_condattr_destroy(&attr);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline void condvar_reltime_destroy(condvar_reltime_t *cond)
|
||||
{
|
||||
pthread_cond_destroy(&cond->cond);
|
||||
}
|
||||
|
||||
static inline int condvar_reltime_signal(condvar_reltime_t *cond)
|
||||
{
|
||||
return pthread_cond_signal(&cond->cond);
|
||||
}
|
||||
|
||||
static inline int condvar_reltime_wait_timeout_seconds(condvar_reltime_t *cond, pthread_mutex_t *lock, unsigned int seconds)
|
||||
{
|
||||
struct timespec ts;
|
||||
int rc;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
ts.tv_sec += seconds;
|
||||
|
||||
/* This is absolute time. */
|
||||
rc = pthread_cond_timedwait(&cond->cond, lock, &ts);
|
||||
|
||||
if (rc == ETIMEDOUT)
|
||||
return 1;
|
||||
else if (rc == 0)
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define PTHREAD_ONCE_CALLBACK
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
#ifndef __VULKAN_PRIVATE_EXTENSIONS_H__
|
||||
#define __VULKAN_PRIVATE_EXTENSIONS_H__
|
||||
|
||||
/* Nothing here at the moment. Add hacks here! */
|
||||
|
||||
#endif
|
|
@ -0,0 +1,71 @@
|
|||
INCLUDE_DIR := $(CURDIR)
|
||||
|
||||
VERT_SOURCES := $(wildcard $(M)/*.vert)
|
||||
FRAG_SOURCES := $(wildcard $(M)/*.frag)
|
||||
COMP_SOURCES := $(wildcard $(M)/*.comp)
|
||||
TESC_SOURCES := $(wildcard $(M)/*.tesc)
|
||||
TESE_SOURCES := $(wildcard $(M)/*.tese)
|
||||
GEOM_SOURCES := $(wildcard $(M)/*.geom)
|
||||
RGEN_SOURCES := $(wildcard $(M)/*.rgen)
|
||||
RINT_SOURCES := $(wildcard $(M)/*.rint)
|
||||
RAHIT_SOURCES := $(wildcard $(M)/*.rahit)
|
||||
RCHIT_SOURCES := $(wildcard $(M)/*.rchit)
|
||||
RMISS_SOURCES := $(wildcard $(M)/*.rmiss)
|
||||
RCALL_SOURCES := $(wildcard $(M)/*.rcall)
|
||||
|
||||
SPV_OBJECTS := \
|
||||
$(VERT_SOURCES:.vert=.spv) \
|
||||
$(FRAG_SOURCES:.frag=.spv) \
|
||||
$(COMP_SOURCES:.comp=.spv) \
|
||||
$(TESC_SOURCES:.tesc=.spv) \
|
||||
$(TESE_SOURCES:.tese=.spv) \
|
||||
$(GEOM_SOURCES:.geom=.spv) \
|
||||
$(RGEN_SOURCES:.rgen=.spv) \
|
||||
$(RINT_SOURCES:.rint=.spv) \
|
||||
$(RAHIT_SOURCES:.rahit=.spv) \
|
||||
$(RCHIT_SOURCES:.rchit=.spv) \
|
||||
$(RMISS_SOURCES:.rmiss=.spv) \
|
||||
$(RCALL_SOURCES:.rcall=.spv)
|
||||
|
||||
%.spv: %.vert
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.frag
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 -DDEBUG_CHANNEL_HELPER_LANES $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.comp
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.geom
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.tesc
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.tese
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rgen
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rint
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rahit
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rchit
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rmiss
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rcall
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
all: $(SPV_OBJECTS)
|
||||
|
||||
clean:
|
||||
rm -f $(SPV_OBJECTS)
|
||||
|
||||
.PHONY: clean
|
|
@ -23,14 +23,17 @@
|
|||
#extension GL_ARB_gpu_shader_int64 : require
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#ifdef DEBUG_CHANNEL_HELPER_LANES
|
||||
#extension GL_EXT_demote_to_helper_invocation : require
|
||||
#endif
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) buffer ControlBlock
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer ControlBlock
|
||||
{
|
||||
uint message_counter;
|
||||
uint instance_counter;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) buffer RingBuffer
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer RingBuffer
|
||||
{
|
||||
uint data[];
|
||||
};
|
||||
|
@ -48,24 +51,73 @@ const uint DEBUG_CHANNEL_FMT_F32 = 2;
|
|||
const uint DEBUG_CHANNEL_FMT_HEX_ALL = DEBUG_CHANNEL_FMT_HEX * 0x55555555u;
|
||||
const uint DEBUG_CHANNEL_FMT_I32_ALL = DEBUG_CHANNEL_FMT_I32 * 0x55555555u;
|
||||
const uint DEBUG_CHANNEL_FMT_F32_ALL = DEBUG_CHANNEL_FMT_F32 * 0x55555555u;
|
||||
const uint DEBUG_CHANNEL_WORD_COOKIE = 0xdeadca70u; /* Let host fish for this cookie in device lost scenarios. */
|
||||
|
||||
uint DEBUG_CHANNEL_INSTANCE_COUNTER;
|
||||
uvec3 DEBUG_CHANNEL_ID;
|
||||
|
||||
/* Need to make sure the elected subgroup can have side effects. */
|
||||
#ifdef DEBUG_CHANNEL_HELPER_LANES
|
||||
bool DEBUG_CHANNEL_ELECT()
|
||||
{
|
||||
bool elected = false;
|
||||
if (!helperInvocationEXT())
|
||||
elected = subgroupElect();
|
||||
return elected;
|
||||
}
|
||||
#else
|
||||
bool DEBUG_CHANNEL_ELECT()
|
||||
{
|
||||
return subgroupElect();
|
||||
}
|
||||
#endif
|
||||
|
||||
void DEBUG_CHANNEL_INIT(uvec3 id)
|
||||
{
|
||||
if (!DEBUG_SHADER_RING_ACTIVE)
|
||||
return;
|
||||
DEBUG_CHANNEL_ID = id;
|
||||
uint inst;
|
||||
if (subgroupElect())
|
||||
#ifdef DEBUG_CHANNEL_HELPER_LANES
|
||||
if (!helperInvocationEXT())
|
||||
{
|
||||
/* Elect and broadcast must happen without helper lanes here.
|
||||
* We must perform the instance increment with side effects,
|
||||
* and broadcast first must pick the elected lane. */
|
||||
if (subgroupElect())
|
||||
inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
|
||||
DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
|
||||
}
|
||||
/* Helper lanes cannot write debug messages, since they cannot have side effects.
|
||||
* Leave it undefined, and we should ensure SGPR propagation either way ... */
|
||||
#else
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
|
||||
DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
|
||||
#endif
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_WRITE_HEADER(RingBuffer buf, uint offset, uint num_words, uint fmt)
|
||||
void DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3 id, uint inst)
|
||||
{
|
||||
if (!DEBUG_SHADER_RING_ACTIVE)
|
||||
return;
|
||||
DEBUG_CHANNEL_ID = id;
|
||||
DEBUG_CHANNEL_INSTANCE_COUNTER = inst;
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_UNLOCK_MESSAGE(RingBuffer buf, uint offset, uint num_words)
|
||||
{
|
||||
memoryBarrierBuffer();
|
||||
/* Make sure this word is made visible last. This way the ring thread can avoid reading bogus messages.
|
||||
* If the host thread observed a num_word of 0, we know a message was allocated, but we don't necessarily
|
||||
* have a complete write yet.
|
||||
* In a device lost scenario, we can try to fish for valid messages. */
|
||||
buf.data[(offset + 0) & DEBUG_SHADER_RING_MASK] = num_words | DEBUG_CHANNEL_WORD_COOKIE;
|
||||
memoryBarrierBuffer();
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_WRITE_HEADER(RingBuffer buf, uint offset, uint fmt)
|
||||
{
|
||||
buf.data[(offset + 0) & DEBUG_SHADER_RING_MASK] = num_words;
|
||||
buf.data[(offset + 1) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH);
|
||||
buf.data[(offset + 2) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH >> 32);
|
||||
buf.data[(offset + 3) & DEBUG_SHADER_RING_MASK] = DEBUG_CHANNEL_INSTANCE_COUNTER;
|
||||
|
@ -87,7 +139,9 @@ void DEBUG_CHANNEL_MSG_()
|
|||
return;
|
||||
uint words = 8;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(RingBuffer(DEBUG_SHADER_RING_BDA), offset, words, 0);
|
||||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, 0);
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0)
|
||||
|
@ -97,8 +151,9 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 9;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1)
|
||||
|
@ -108,9 +163,10 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 10;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2)
|
||||
|
@ -120,10 +176,11 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 11;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
|
||||
buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2, uint v3)
|
||||
|
@ -133,11 +190,12 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2, uint v3)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 12;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
|
||||
buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
|
||||
buf.data[(offset + 11) & DEBUG_SHADER_RING_MASK] = v3;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG()
|
||||
|
@ -205,4 +263,76 @@ void DEBUG_CHANNEL_MSG(float v0, float v1, float v2, float v3)
|
|||
DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_F32_ALL, floatBitsToUint(v0), floatBitsToUint(v1), floatBitsToUint(v2), floatBitsToUint(v3));
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2, uint v3)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2, int v3)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2, float v3)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
# define VK_USE_PLATFORM_WIN32_KHR
|
||||
# endif
|
||||
# include <vulkan/vulkan.h>
|
||||
# include "private/vulkan_private_extensions.h"
|
||||
#endif /* VKD3D_NO_VULKAN_H */
|
||||
|
||||
#define VKD3D_MIN_API_VERSION VK_API_VERSION_1_1
|
||||
|
@ -58,20 +59,39 @@
|
|||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
enum vkd3d_config_flags
|
||||
{
|
||||
VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001,
|
||||
VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS = 0x00000002,
|
||||
VKD3D_CONFIG_FLAG_DEBUG_UTILS = 0x00000004,
|
||||
VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV = 0x00000008,
|
||||
VKD3D_CONFIG_FLAG_DXR = 0x00000010,
|
||||
VKD3D_CONFIG_FLAG_SINGLE_QUEUE = 0x00000020,
|
||||
VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS = 0x00000040,
|
||||
VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE = 0x00000080,
|
||||
VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE = 0x00000100,
|
||||
VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE = 0x00000200,
|
||||
VKD3D_CONFIG_FLAG_UPLOAD_HVV = 0x00000400,
|
||||
};
|
||||
#define VKD3D_CONFIG_FLAG_VULKAN_DEBUG (1ull << 0)
|
||||
#define VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS (1ull << 1)
|
||||
#define VKD3D_CONFIG_FLAG_DEBUG_UTILS (1ull << 2)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV (1ull << 3)
|
||||
#define VKD3D_CONFIG_FLAG_DXR (1ull << 4)
|
||||
#define VKD3D_CONFIG_FLAG_SINGLE_QUEUE (1ull << 5)
|
||||
#define VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS (1ull << 6)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE (1ull << 7)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE (1ull << 8)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE (1ull << 9)
|
||||
#define VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV (1ull << 10)
|
||||
#define VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET (1ull << 11)
|
||||
#define VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE (1ull << 12)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED (1ull << 13)
|
||||
#define VKD3D_CONFIG_FLAG_DXR11 (1ull << 14)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION (1ull << 15)
|
||||
#define VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE (1ull << 16)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV (1ull << 17)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV (1ull << 18)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG (1ull << 19)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV (1ull << 20)
|
||||
#define VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET (1ull << 21)
|
||||
#define VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR (1ull << 22)
|
||||
#define VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS (1ull << 23)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_MISMATCH_DRIVER (1ull << 24)
|
||||
#define VKD3D_CONFIG_FLAG_BREADCRUMBS (1ull << 25)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY (1ull << 26)
|
||||
#define VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC (1ull << 27)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28)
|
||||
#define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29)
|
||||
#define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16 (1ull << 31)
|
||||
#define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32)
|
||||
|
||||
typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);
|
||||
|
||||
|
|
|
@ -26,11 +26,11 @@ cpp_quote("#ifndef _D3D12_CONSTANTS")
|
|||
cpp_quote("#define _D3D12_CONSTANTS")
|
||||
|
||||
cpp_quote("#ifndef D3D12_ERROR_ADAPTER_NOT_FOUND")
|
||||
cpp_quote("#define D3D12_ERROR_ADAPTER_NOT_FOUND 0x887e0001")
|
||||
cpp_quote("#define D3D12_ERROR_ADAPTER_NOT_FOUND ((HRESULT)0x887e0001)")
|
||||
cpp_quote("#endif")
|
||||
|
||||
cpp_quote("#ifndef D3D12_ERROR_DRIVER_VERSION_MISMATCH")
|
||||
cpp_quote("#define D3D12_ERROR_DRIVER_VERSION_MISMATCH 0x887e0002")
|
||||
cpp_quote("#define D3D12_ERROR_DRIVER_VERSION_MISMATCH ((HRESULT)0x887e0002)")
|
||||
cpp_quote("#endif")
|
||||
|
||||
const UINT D3D12_CS_TGSM_REGISTER_COUNT = 8192;
|
||||
|
@ -285,6 +285,12 @@ typedef enum D3D12_WRITEBUFFERIMMEDIATE_MODE
|
|||
D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_OUT = 0x2,
|
||||
} D3D12_WRITEBUFFERIMMEDIATE_MODE;
|
||||
|
||||
typedef enum D3D12_WAVE_MMA_TIER
|
||||
{
|
||||
D3D12_WAVE_MMA_TIER_NOT_SUPPORTED = 0,
|
||||
D3D12_WAVE_MMA_TIER_1_0 = 10,
|
||||
} D3D12_WAVE_MMA_TIER;
|
||||
|
||||
interface ID3D12Fence;
|
||||
interface ID3D12RootSignature;
|
||||
interface ID3D12Heap;
|
||||
|
@ -453,6 +459,32 @@ typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS7
|
|||
D3D12_SAMPLER_FEEDBACK_TIER SamplerFeedbackTier;
|
||||
} D3D12_FEATURE_DATA_D3D12_OPTIONS7;
|
||||
|
||||
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS8
|
||||
{
|
||||
BOOL UnalignedBlockTexturesSupported;
|
||||
} D3D12_FEATURE_DATA_D3D12_OPTIONS8;
|
||||
|
||||
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS9
|
||||
{
|
||||
BOOL MeshShaderPipelineStatsSupported;
|
||||
BOOL MeshShaderSupportsFullRangeRenderTargetArrayIndex;
|
||||
BOOL AtomicInt64OnTypedResourceSupported;
|
||||
BOOL AtomicInt64OnGroupSharedSupported;
|
||||
BOOL DerivativesInMeshAndAmplificationShadersSupported;
|
||||
D3D12_WAVE_MMA_TIER WaveMMATier;
|
||||
} D3D12_FEATURE_DATA_D3D12_OPTIONS9;
|
||||
|
||||
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS10
|
||||
{
|
||||
BOOL VariableRateShadingSumCombinerSupported;
|
||||
BOOL MeshShaderPerPrimitiveShadingRateSupported;
|
||||
} D3D12_FEATURE_DATA_D3D12_OPTIONS10;
|
||||
|
||||
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS11
|
||||
{
|
||||
BOOL AtomicInt64OnDescriptorHeapResourceSupported;
|
||||
} D3D12_FEATURE_DATA_D3D12_OPTIONS11;
|
||||
|
||||
typedef struct D3D12_FEATURE_DATA_FORMAT_SUPPORT
|
||||
{
|
||||
DXGI_FORMAT Format;
|
||||
|
@ -1104,6 +1136,8 @@ typedef enum D3D12_ROOT_SIGNATURE_FLAGS
|
|||
D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE = 0x80,
|
||||
D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS = 0x100,
|
||||
D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS = 0x200,
|
||||
D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED = 0x400,
|
||||
D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED = 0x800,
|
||||
} D3D12_ROOT_SIGNATURE_FLAGS;
|
||||
cpp_quote("DEFINE_ENUM_FLAG_OPERATORS(D3D12_ROOT_SIGNATURE_FLAGS);")
|
||||
|
||||
|
@ -1991,6 +2025,10 @@ typedef enum D3D12_FEATURE
|
|||
D3D12_FEATURE_D3D12_OPTIONS7 = 32,
|
||||
D3D12_FEATURE_PROTECTED_RESOURCE_SESSION_TYPE_COUNT = 33,
|
||||
D3D12_FEATURE_PROTECTED_RESOURCE_SESSION_TYPES = 34,
|
||||
D3D12_FEATURE_D3D12_OPTIONS8 = 36,
|
||||
D3D12_FEATURE_D3D12_OPTIONS9 = 37,
|
||||
D3D12_FEATURE_D3D12_OPTIONS10 = 39,
|
||||
D3D12_FEATURE_D3D12_OPTIONS11 = 40,
|
||||
} D3D12_FEATURE;
|
||||
|
||||
typedef struct D3D12_MEMCPY_DEST
|
||||
|
@ -2120,6 +2158,15 @@ interface ID3D12Resource1 : ID3D12Resource
|
|||
{
|
||||
HRESULT GetProtectedResourceSession(REFIID riid, void **protected_session);
|
||||
}
|
||||
[
|
||||
uuid(be36ec3b-ea85-4aeb-a45a-e9d76404a495),
|
||||
object,
|
||||
local,
|
||||
pointer_default(unique)
|
||||
]
|
||||
interface ID3D12Resource2 : ID3D12Resource1 {
|
||||
D3D12_RESOURCE_DESC1 GetDesc1();
|
||||
}
|
||||
|
||||
[
|
||||
uuid(7116d91c-e7e4-47ce-b8c6-ec8168f437e5),
|
||||
|
@ -3275,6 +3322,45 @@ typedef struct D3D12_DISPATCH_MESH_ARGUMENTS
|
|||
UINT ThreadGroupCountZ;
|
||||
} D3D12_DISPATCH_MESH_ARGUMENTS;
|
||||
|
||||
typedef enum D3D12_SHADER_CACHE_MODE
|
||||
{
|
||||
D3D12_SHADER_CACHE_MODE_MEMORY = 0,
|
||||
D3D12_SHADER_CACHE_MODE_DISK = 1,
|
||||
} D3D12_SHADER_CACHE_MODE;
|
||||
|
||||
typedef enum D3D12_SHADER_CACHE_FLAGS
|
||||
{
|
||||
D3D12_SHADER_CACHE_FLAG_NONE = 0,
|
||||
D3D12_SHADER_CACHE_FLAG_DRIVER_VERSIONED = 0x1,
|
||||
D3D12_SHADER_CACHE_FLAG_USE_WORKING_DIR = 0x2,
|
||||
} D3D12_SHADER_CACHE_FLAGS;
|
||||
|
||||
typedef struct D3D12_SHADER_CACHE_SESSION_DESC
|
||||
{
|
||||
GUID Identifier;
|
||||
D3D12_SHADER_CACHE_MODE Mode;
|
||||
D3D12_SHADER_CACHE_FLAGS Flags;
|
||||
UINT MaximumInMemoryCacheSizeBytes;
|
||||
UINT MaximumInMemoryCacheEntries;
|
||||
UINT MaximumValueFileSizeBytes;
|
||||
UINT64 Version;
|
||||
} D3D12_SHADER_CACHE_SESSION_DESC;
|
||||
|
||||
typedef enum D3D12_SHADER_CACHE_KIND_FLAGS
|
||||
{
|
||||
D3D12_SHADER_CACHE_KIND_FLAG_IMPLICIT_D3D_CACHE_FOR_DRIVER = 0x1,
|
||||
D3D12_SHADER_CACHE_KIND_FLAG_IMPLICIT_D3D_CONVERSIONS = 0x2,
|
||||
D3D12_SHADER_CACHE_KIND_FLAG_IMPLICIT_DRIVER_MANAGED = 0x4,
|
||||
D3D12_SHADER_CACHE_KIND_FLAG_APPLICATION_MANAGED = 0x8,
|
||||
} D3D12_SHADER_CACHE_KIND_FLAGS;
|
||||
|
||||
typedef enum D3D12_SHADER_CACHE_CONTROL_FLAGS
|
||||
{
|
||||
D3D12_SHADER_CACHE_CONTROL_FLAG_DISABLE = 0x1,
|
||||
D3D12_SHADER_CACHE_CONTROL_FLAG_ENABLE = 0x2,
|
||||
D3D12_SHADER_CACHE_CONTROL_FLAG_CLEAR = 0x4,
|
||||
} D3D12_SHADER_CACHE_CONTROL_FLAGS;
|
||||
|
||||
[
|
||||
uuid(dbb84c27-36ce-4fc9-b801-f048c46ac570),
|
||||
object,
|
||||
|
@ -3518,6 +3604,17 @@ interface ID3D12GraphicsCommandList5 : ID3D12GraphicsCommandList4
|
|||
void RSSetShadingRateImage(ID3D12Resource *image);
|
||||
}
|
||||
|
||||
[
|
||||
uuid(c3827890-e548-4cfa-96cf-5689a9370f80),
|
||||
object,
|
||||
local,
|
||||
pointer_default(unique)
|
||||
]
|
||||
interface ID3D12GraphicsCommandList6 : ID3D12GraphicsCommandList5
|
||||
{
|
||||
void DispatchMesh(UINT x, UINT y, UINT z);
|
||||
}
|
||||
|
||||
typedef enum D3D12_TILE_RANGE_FLAGS
|
||||
{
|
||||
D3D12_TILE_RANGE_FLAG_NONE = 0x0,
|
||||
|
@ -3547,8 +3644,8 @@ interface ID3D12CommandQueue : ID3D12Pageable
|
|||
ID3D12Heap *heap,
|
||||
UINT range_count,
|
||||
const D3D12_TILE_RANGE_FLAGS *range_flags,
|
||||
UINT *heap_range_offsets,
|
||||
UINT *range_tile_counts,
|
||||
const UINT *heap_range_offsets,
|
||||
const UINT *range_tile_counts,
|
||||
D3D12_TILE_MAPPING_FLAGS flags);
|
||||
|
||||
void CopyTileMappings(ID3D12Resource *dst_resource,
|
||||
|
@ -4005,6 +4102,67 @@ interface ID3D12Device6 : ID3D12Device5
|
|||
D3D12_MEASUREMENTS_ACTION action, HANDLE event, BOOL further_measurements);
|
||||
}
|
||||
|
||||
[
|
||||
uuid(5c014b53-68a1-4b9b-8bd1-dd6046b9358b),
|
||||
object,
|
||||
local,
|
||||
pointer_default(unique)
|
||||
]
|
||||
interface ID3D12Device7 : ID3D12Device6
|
||||
{
|
||||
HRESULT AddToStateObject(const D3D12_STATE_OBJECT_DESC *addition,
|
||||
ID3D12StateObject *state_object, REFIID riid, void **new_state_object);
|
||||
|
||||
HRESULT CreateProtectedResourceSession1(
|
||||
const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc,
|
||||
REFIID riid, void **session);
|
||||
}
|
||||
[
|
||||
uuid(9218e6bb-f944-4f7e-a75c-b1b2c7b701f3),
|
||||
object,
|
||||
local,
|
||||
pointer_default(unique)
|
||||
]
|
||||
interface ID3D12Device8 : ID3D12Device7
|
||||
{
|
||||
D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo2(UINT visible_mask,
|
||||
UINT resource_desc_count, const D3D12_RESOURCE_DESC1 *resource_descs,
|
||||
D3D12_RESOURCE_ALLOCATION_INFO1 *resource_allocation_infos);
|
||||
|
||||
HRESULT CreateCommittedResource2(const D3D12_HEAP_PROPERTIES *heap_properties,
|
||||
D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *resource_desc,
|
||||
D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
|
||||
ID3D12ProtectedResourceSession *protected_session, REFIID riid, void **resource);
|
||||
|
||||
HRESULT CreatePlacedResource1(ID3D12Heap *heap, UINT64 heap_offset,
|
||||
const D3D12_RESOURCE_DESC1 *resource_desc, D3D12_RESOURCE_STATES initial_state,
|
||||
const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID riid, void **resource);
|
||||
|
||||
void CreateSamplerFeedbackUnorderedAccessView(ID3D12Resource *target_resource,
|
||||
ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor);
|
||||
|
||||
void GetCopyableFootprints1(const D3D12_RESOURCE_DESC1 *resource_desc,
|
||||
UINT first_sub_resource, UINT sub_resource_count, UINT64 base_offset,
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_count,
|
||||
UINT64 *row_size, UINT64 *total_bytes);
|
||||
}
|
||||
[
|
||||
uuid(4c80e962-f032-4f60-bc9e-ebc2cfa1d83c),
|
||||
object,
|
||||
local,
|
||||
pointer_default(unique)
|
||||
]
|
||||
interface ID3D12Device9 : ID3D12Device8
|
||||
{
|
||||
HRESULT CreateShaderCacheSession(const D3D12_SHADER_CACHE_SESSION_DESC *desc,
|
||||
REFIID riid, void **session);
|
||||
|
||||
HRESULT ShaderCacheControl(D3D12_SHADER_CACHE_KIND_FLAGS kinds,
|
||||
D3D12_SHADER_CACHE_CONTROL_FLAGS control);
|
||||
|
||||
HRESULT CreateCommandQueue1(const D3D12_COMMAND_QUEUE_DESC *desc,
|
||||
REFIID creator_id, REFIID riid, void **command_queue);
|
||||
}
|
||||
[
|
||||
uuid(34ab647b-3cc8-46ac-841b-c0965645c046),
|
||||
object,
|
||||
|
|
|
@ -54,15 +54,22 @@ enum vkd3d_shader_visibility
|
|||
|
||||
typedef uint64_t vkd3d_shader_hash_t;
|
||||
|
||||
enum vkd3d_shader_meta_flags
|
||||
{
|
||||
VKD3D_SHADER_META_FLAG_REPLACED = 1 << 0,
|
||||
VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE = 1 << 1,
|
||||
VKD3D_SHADER_META_FLAG_USES_NATIVE_16BIT_OPERATIONS = 1 << 2,
|
||||
};
|
||||
|
||||
struct vkd3d_shader_meta
|
||||
{
|
||||
vkd3d_shader_hash_t hash;
|
||||
unsigned int cs_workgroup_size[3]; /* Only contains valid data if uses_subgroup_size is true. */
|
||||
unsigned int patch_vertex_count; /* Relevant for HS. May be 0, in which case the patch vertex count is not known. */
|
||||
bool replaced;
|
||||
bool uses_subgroup_size;
|
||||
bool uses_native_16bit_operations;
|
||||
unsigned int cs_required_wave_size; /* If non-zero, force a specific CS subgroup size. */
|
||||
uint32_t flags; /* vkd3d_shader_meta_flags */
|
||||
};
|
||||
STATIC_ASSERT(sizeof(struct vkd3d_shader_meta) == 32);
|
||||
|
||||
struct vkd3d_shader_code
|
||||
{
|
||||
|
@ -71,6 +78,8 @@ struct vkd3d_shader_code
|
|||
struct vkd3d_shader_meta meta;
|
||||
};
|
||||
|
||||
vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader);
|
||||
|
||||
enum vkd3d_shader_descriptor_type
|
||||
{
|
||||
VKD3D_SHADER_DESCRIPTOR_TYPE_UNKNOWN,
|
||||
|
@ -232,6 +241,7 @@ struct vkd3d_shader_root_constant
|
|||
struct vkd3d_shader_root_descriptor
|
||||
{
|
||||
struct vkd3d_shader_resource_binding *binding;
|
||||
uint32_t raw_va_root_descriptor_index;
|
||||
};
|
||||
|
||||
struct vkd3d_shader_root_parameter
|
||||
|
@ -287,7 +297,21 @@ enum vkd3d_shader_target_extension
|
|||
VKD3D_SHADER_TARGET_EXTENSION_NONE,
|
||||
|
||||
VKD3D_SHADER_TARGET_EXTENSION_SPV_EXT_DEMOTE_TO_HELPER_INVOCATION,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_READ_STORAGE_IMAGE_WITHOUT_FORMAT
|
||||
VKD3D_SHADER_TARGET_EXTENSION_READ_STORAGE_IMAGE_WITHOUT_FORMAT,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_RAY_TRACING_PRIMITIVE_CULLING,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_SCALAR_BLOCK_LAYOUT,
|
||||
|
||||
/* When using scalar block layout with a vec3 array on a byte address buffer,
|
||||
* there is diverging behavior across hardware.
|
||||
* On AMD, robustness is checked per component, which means we can implement ByteAddressBuffer
|
||||
* without further hackery. On NVIDIA, robustness does not seem to work this way, so it's either
|
||||
* all in range, or all out of range. We can implement structured buffer vectorization of vec3,
|
||||
* but not byte address buffer. */
|
||||
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_COUNT,
|
||||
};
|
||||
|
||||
enum vkd3d_shader_quirk
|
||||
|
@ -305,6 +329,10 @@ enum vkd3d_shader_quirk
|
|||
/* After every write to group shared memory, force a memory barrier.
|
||||
* This works around buggy games which forget to use barrier(). */
|
||||
VKD3D_SHADER_QUIRK_FORCE_TGSM_BARRIERS = (1 << 1),
|
||||
|
||||
/* For Position builtins in Output storage class, emit Invariant decoration.
|
||||
* Normally, games have to emit Precise math for position, but if they forget ... */
|
||||
VKD3D_SHADER_QUIRK_INVARIANT_POSITION = (1 << 2),
|
||||
};
|
||||
|
||||
struct vkd3d_shader_quirk_hash
|
||||
|
@ -318,6 +346,10 @@ struct vkd3d_shader_quirk_info
|
|||
const struct vkd3d_shader_quirk_hash *hashes;
|
||||
unsigned int num_hashes;
|
||||
uint32_t default_quirks;
|
||||
|
||||
/* Quirks which are ORed in with the other masks (including default_quirks).
|
||||
* Used mostly for additional overrides from VKD3D_CONFIG. */
|
||||
uint32_t global_quirks;
|
||||
};
|
||||
|
||||
struct vkd3d_shader_compile_arguments
|
||||
|
@ -628,6 +660,7 @@ struct vkd3d_shader_scan_info
|
|||
bool has_side_effects;
|
||||
bool needs_late_zs;
|
||||
bool discards;
|
||||
bool has_uav_counter;
|
||||
unsigned int patch_vertex_count;
|
||||
};
|
||||
|
||||
|
@ -721,7 +754,11 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
|
|||
void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *code);
|
||||
|
||||
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature,
|
||||
vkd3d_shader_hash_t *compatibility_hash);
|
||||
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
|
||||
struct vkd3d_versioned_root_signature_desc *desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash);
|
||||
void vkd3d_shader_free_root_signature(struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
|
||||
/* FIXME: Add support for returning error messages (ID3DBlob). */
|
||||
|
@ -736,6 +773,8 @@ int vkd3d_shader_scan_dxbc(const struct vkd3d_shader_code *dxbc,
|
|||
|
||||
int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_shader_signature *signature);
|
||||
int vkd3d_shader_parse_output_signature(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_shader_signature *signature);
|
||||
struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element(
|
||||
const struct vkd3d_shader_signature *signature, const char *semantic_name,
|
||||
unsigned int semantic_index, unsigned int stream_index);
|
||||
|
@ -745,19 +784,65 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature
|
|||
struct vkd3d_shader_library_entry_point
|
||||
{
|
||||
unsigned int identifier;
|
||||
VkShaderStageFlagBits stage;
|
||||
WCHAR *mangled_entry_point;
|
||||
WCHAR *plain_entry_point;
|
||||
char *real_entry_point;
|
||||
VkShaderStageFlagBits stage;
|
||||
};
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points(
|
||||
enum vkd3d_shader_subobject_kind
|
||||
{
|
||||
/* Matches DXIL for simplicity. */
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_STATE_OBJECT_CONFIG = 0,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE = 1,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE = 2,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION = 8,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG = 9,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG = 10,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP = 11,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1 = 12,
|
||||
};
|
||||
|
||||
struct vkd3d_shader_library_subobject
|
||||
{
|
||||
enum vkd3d_shader_subobject_kind kind;
|
||||
unsigned int dxil_identifier;
|
||||
|
||||
/* All const pointers here point directly to the DXBC blob,
|
||||
* so they do not need to be freed.
|
||||
* Fortunately for us, the C strings are zero-terminated in the blob itself. */
|
||||
|
||||
/* In the blob, ASCII is used as identifier, where API uses wide strings, sigh ... */
|
||||
const char *name;
|
||||
|
||||
union
|
||||
{
|
||||
D3D12_RAYTRACING_PIPELINE_CONFIG1 pipeline_config;
|
||||
D3D12_RAYTRACING_SHADER_CONFIG shader_config;
|
||||
D3D12_STATE_OBJECT_CONFIG object_config;
|
||||
|
||||
/* Duped strings because API wants wide strings for no good reason. */
|
||||
D3D12_HIT_GROUP_DESC hit_group;
|
||||
D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION association;
|
||||
|
||||
struct
|
||||
{
|
||||
const void *data;
|
||||
size_t size;
|
||||
} payload;
|
||||
} data;
|
||||
};
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
|
||||
const D3D12_DXIL_LIBRARY_DESC *library_desc,
|
||||
unsigned int identifier,
|
||||
struct vkd3d_shader_library_entry_point **entry_points,
|
||||
size_t *entry_point_size, size_t *entry_point_count);
|
||||
size_t *entry_point_size, size_t *entry_point_count,
|
||||
struct vkd3d_shader_library_subobject **subobjects,
|
||||
size_t *subobjects_size, size_t *subobjects_count);
|
||||
|
||||
void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_entry_point *entry_points, size_t count);
|
||||
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count);
|
||||
|
||||
int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
||||
const char *export,
|
||||
|
@ -766,6 +851,11 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
const struct vkd3d_shader_interface_local_info *shader_interface_local_info,
|
||||
const struct vkd3d_shader_compile_arguments *compiler_args);
|
||||
|
||||
uint32_t vkd3d_shader_compile_arguments_select_quirks(
|
||||
const struct vkd3d_shader_compile_arguments *args, vkd3d_shader_hash_t hash);
|
||||
|
||||
uint64_t vkd3d_shader_get_revision(void);
|
||||
|
||||
#endif /* VKD3D_SHADER_NO_PROTOTYPES */
|
||||
|
||||
/*
|
||||
|
@ -778,7 +868,8 @@ typedef int (*PFN_vkd3d_shader_compile_dxbc)(const struct vkd3d_shader_code *dxb
|
|||
typedef void (*PFN_vkd3d_shader_free_shader_code)(struct vkd3d_shader_code *code);
|
||||
|
||||
typedef int (*PFN_vkd3d_shader_parse_root_signature)(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature,
|
||||
vkd3d_shader_hash_t *compatibility_hash);
|
||||
typedef void (*PFN_vkd3d_shader_free_root_signature)(struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
|
||||
typedef int (*PFN_vkd3d_shader_serialize_root_signature)(
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
|
||||
#define VK_USE_PLATFORM_WIN32_KHR
|
||||
#include <vulkan/vulkan.h>
|
||||
#include "private/vulkan_private_extensions.h"
|
||||
|
||||
#ifdef __MINGW32__
|
||||
# define static
|
||||
|
|
|
@ -88,6 +88,9 @@ typedef void *HANDLE;
|
|||
|
||||
typedef const WCHAR* LPCWSTR;
|
||||
|
||||
#define _fseeki64(a, b, c) fseeko64(a, b, c)
|
||||
#define _ftelli64(a) ftello64(a)
|
||||
|
||||
/* GUID */
|
||||
# ifdef __WIDL__
|
||||
typedef struct
|
||||
|
|
|
@ -3,9 +3,9 @@ LIBRARY d3d12.dll
|
|||
EXPORTS
|
||||
D3D12CreateDevice @101
|
||||
D3D12GetDebugInterface @102
|
||||
D3D12CreateRootSignatureDeserializer @107
|
||||
D3D12CreateVersionedRootSignatureDeserializer @108
|
||||
D3D12CreateRootSignatureDeserializer
|
||||
D3D12CreateVersionedRootSignatureDeserializer
|
||||
|
||||
D3D12EnableExperimentalFeatures @110
|
||||
D3D12SerializeRootSignature @115
|
||||
D3D12SerializeVersionedRootSignature @116
|
||||
D3D12EnableExperimentalFeatures
|
||||
D3D12SerializeRootSignature
|
||||
D3D12SerializeVersionedRootSignature
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "vkd3d_debug.h"
|
||||
#include "vkd3d_threads.h"
|
||||
|
||||
#include "vkd3d_platform.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
|
@ -58,13 +60,13 @@ static FILE *vkd3d_log_file;
|
|||
|
||||
static void vkd3d_dbg_init_once(void)
|
||||
{
|
||||
const char *vkd3d_debug;
|
||||
char vkd3d_debug[VKD3D_PATH_MAX];
|
||||
unsigned int channel, i;
|
||||
|
||||
for (channel = 0; channel < VKD3D_DBG_CHANNEL_COUNT; channel++)
|
||||
{
|
||||
if (!(vkd3d_debug = getenv(env_for_channel[channel])))
|
||||
vkd3d_debug = "";
|
||||
if (!vkd3d_get_env_var(env_for_channel[channel], vkd3d_debug, sizeof(vkd3d_debug)))
|
||||
strncpy(vkd3d_debug, "", VKD3D_PATH_MAX);
|
||||
|
||||
for (i = 1; i < ARRAY_SIZE(debug_level_names); ++i)
|
||||
if (!strcmp(debug_level_names[i], vkd3d_debug))
|
||||
|
@ -75,7 +77,7 @@ static void vkd3d_dbg_init_once(void)
|
|||
vkd3d_dbg_level[channel] = VKD3D_DBG_LEVEL_FIXME;
|
||||
}
|
||||
|
||||
if ((vkd3d_debug = getenv("VKD3D_LOG_FILE")))
|
||||
if (vkd3d_get_env_var("VKD3D_LOG_FILE", vkd3d_debug, sizeof(vkd3d_debug)))
|
||||
{
|
||||
vkd3d_log_file = fopen(vkd3d_debug, "w");
|
||||
if (!vkd3d_log_file)
|
||||
|
@ -281,11 +283,11 @@ const char *debugstr_w(const WCHAR *wstr)
|
|||
|
||||
unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value)
|
||||
{
|
||||
const char *value = getenv(name);
|
||||
char value[VKD3D_PATH_MAX];
|
||||
unsigned long r;
|
||||
char *end_ptr;
|
||||
|
||||
if (value)
|
||||
if (vkd3d_get_env_var(name, value, sizeof(value)) && strlen(value) > 0)
|
||||
{
|
||||
errno = 0;
|
||||
r = strtoul(value, &end_ptr, 0);
|
||||
|
|
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
|
||||
#include "vkd3d_file_utils.h"
|
||||
#include "vkd3d_debug.h"
|
||||
|
||||
/* For disk cache. */
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <stdio.h>
|
||||
|
||||
bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD code = ERROR_SUCCESS;
|
||||
|
||||
if (!MoveFileA(from_path, to_path))
|
||||
{
|
||||
code = GetLastError();
|
||||
if (code == ERROR_ALREADY_EXISTS)
|
||||
{
|
||||
code = ERROR_SUCCESS;
|
||||
if (!ReplaceFileA(to_path, from_path, NULL, 0, NULL, NULL))
|
||||
code = GetLastError();
|
||||
}
|
||||
}
|
||||
|
||||
return code == ERROR_SUCCESS;
|
||||
#else
|
||||
return rename(from_path, to_path) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD code = ERROR_SUCCESS;
|
||||
if (!MoveFileA(from_path, to_path))
|
||||
code = GetLastError();
|
||||
return code == ERROR_SUCCESS;
|
||||
#else
|
||||
return renameat2(AT_FDCWD, from_path, AT_FDCWD, to_path, RENAME_NOREPLACE) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool vkd3d_file_delete(const char *path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD code = ERROR_SUCCESS;
|
||||
if (!DeleteFileA(path))
|
||||
code = GetLastError();
|
||||
return code == ERROR_SUCCESS;
|
||||
#else
|
||||
return unlink(path) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
FILE *vkd3d_file_open_exclusive_write(const char *path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
/* From Fossilize. AFAIK, there is no direct way to make this work with FILE interface, so have to roundtrip
|
||||
* through jank POSIX layer.
|
||||
* wbx kinda works, but Wine warns about it, despite it working anyways.
|
||||
* Older MSVC runtimes do not support wbx. */
|
||||
FILE *file = NULL;
|
||||
int fd;
|
||||
fd = _open(path, _O_BINARY | _O_WRONLY | _O_CREAT | _O_EXCL | _O_TRUNC | _O_SEQUENTIAL,
|
||||
_S_IWRITE | _S_IREAD);
|
||||
if (fd >= 0)
|
||||
{
|
||||
file = _fdopen(fd, "wb");
|
||||
/* _fdopen takes ownership. */
|
||||
if (!file)
|
||||
_close(fd);
|
||||
}
|
||||
return file;
|
||||
#else
|
||||
return fopen(path, "wbx");
|
||||
#endif
|
||||
}
|
||||
|
||||
void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file)
|
||||
{
|
||||
if (file->mapped)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
UnmapViewOfFile(file->mapped);
|
||||
#else
|
||||
munmap(file->mapped, file->mapped_size);
|
||||
#endif
|
||||
}
|
||||
memset(file, 0, sizeof(*file));
|
||||
}
|
||||
|
||||
bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD size_hi, size_lo;
|
||||
HANDLE file_mapping;
|
||||
HANDLE handle;
|
||||
#else
|
||||
struct stat stat_buf;
|
||||
int fd;
|
||||
#endif
|
||||
|
||||
file->mapped = NULL;
|
||||
file->mapped_size = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL,
|
||||
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN,
|
||||
INVALID_HANDLE_VALUE);
|
||||
if (handle == INVALID_HANDLE_VALUE)
|
||||
goto out;
|
||||
|
||||
size_lo = GetFileSize(handle, &size_hi);
|
||||
file->mapped_size = size_lo | (((uint64_t)size_hi) << 32);
|
||||
|
||||
file_mapping = CreateFileMappingA(handle, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
if (file_mapping == INVALID_HANDLE_VALUE)
|
||||
goto out;
|
||||
|
||||
file->mapped = MapViewOfFile(file_mapping, FILE_MAP_READ, 0, 0, file->mapped_size);
|
||||
CloseHandle(file_mapping);
|
||||
file_mapping = INVALID_HANDLE_VALUE;
|
||||
if (!file->mapped)
|
||||
{
|
||||
ERR("Failed to MapViewOfFile for %s.\n", path);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (handle != INVALID_HANDLE_VALUE)
|
||||
CloseHandle(handle);
|
||||
#else
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
goto out;
|
||||
|
||||
if (fstat(fd, &stat_buf) < 0)
|
||||
{
|
||||
ERR("Failed to fstat pipeline cache.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Map private to make sure we get CoW behavior in case someone clobbers
|
||||
* the cache while in flight. We need to read data directly out of the cache. */
|
||||
file->mapped = mmap(NULL, stat_buf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (file->mapped != MAP_FAILED)
|
||||
file->mapped_size = stat_buf.st_size;
|
||||
else
|
||||
goto out;
|
||||
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
#endif
|
||||
|
||||
if (!file->mapped)
|
||||
file->mapped_size = 0;
|
||||
return file->mapped != NULL;
|
||||
}
|
|
@ -4,6 +4,8 @@ vkd3d_common_src = [
|
|||
'utf8.c',
|
||||
'profiling.c',
|
||||
'string.c',
|
||||
'file_utils.c',
|
||||
'platform.c',
|
||||
]
|
||||
|
||||
vkd3d_common_lib = static_library('vkd3d_common', vkd3d_common_src, vkd3d_header_files,
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
|
||||
#include "vkd3d_platform.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
# include <dlfcn.h>
|
||||
|
@ -153,3 +156,43 @@ bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX])
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
|
||||
{
|
||||
DWORD len;
|
||||
|
||||
assert(value);
|
||||
assert(value_size > 0);
|
||||
|
||||
len = GetEnvironmentVariableA(name, value, value_size);
|
||||
if (len > 0 && len <= value_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
value[0] = '\0';
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
|
||||
{
|
||||
const char *env_value;
|
||||
|
||||
assert(value);
|
||||
assert(value_size > 0);
|
||||
|
||||
if ((env_value = getenv(name)))
|
||||
{
|
||||
snprintf(value, value_size, "%s", env_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
value[0] = '\0';
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -21,6 +21,7 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
|
||||
#include "vkd3d_profiling.h"
|
||||
#include "vkd3d_platform.h"
|
||||
#include "vkd3d_threads.h"
|
||||
#include "vkd3d_debug.h"
|
||||
#include <stdlib.h>
|
||||
|
@ -124,8 +125,10 @@ static void vkd3d_init_profiling_path(const char *path)
|
|||
|
||||
static void vkd3d_init_profiling_once(void)
|
||||
{
|
||||
const char *path = getenv("VKD3D_PROFILE_PATH");
|
||||
if (path)
|
||||
char path[VKD3D_PATH_MAX];
|
||||
|
||||
vkd3d_get_env_var("VKD3D_PROFILE_PATH", path, sizeof(path));
|
||||
if (strlen(path) > 0)
|
||||
vkd3d_init_profiling_path(path);
|
||||
}
|
||||
|
||||
|
|
|
@ -82,6 +82,21 @@ bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b)
|
|||
return *a == *b;
|
||||
}
|
||||
|
||||
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b)
|
||||
{
|
||||
if (!a || !b)
|
||||
return false;
|
||||
|
||||
while (*a != '\0' && *b != '\0')
|
||||
{
|
||||
if (*a != *b)
|
||||
return false;
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
return *a == *b;
|
||||
}
|
||||
|
||||
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t expected_n, const WCHAR *b)
|
||||
{
|
||||
size_t n = 0;
|
||||
|
@ -123,7 +138,7 @@ WCHAR *vkd3d_dup_entry_point_n(const char *str, size_t len)
|
|||
|
||||
static bool is_valid_identifier_character(char v)
|
||||
{
|
||||
return (v >= 'a' && v <= 'z') || (v >= 'A' && v <= 'Z') || v == '_';
|
||||
return (v >= 'a' && v <= 'z') || (v >= 'A' && v <= 'Z') || v == '_' || (v >= '0' && v <= '9');
|
||||
}
|
||||
|
||||
static const char *vkd3d_manged_entry_point_scan(const char *entry, const char **out_end_entry)
|
||||
|
|
|
@ -2249,6 +2249,21 @@ static int isgn_handler(const char *data, DWORD data_size, DWORD tag, void *ctx)
|
|||
return shader_parse_signature(tag, data, data_size, is);
|
||||
}
|
||||
|
||||
static int osgn_handler(const char *data, DWORD data_size, DWORD tag, void *ctx)
|
||||
{
|
||||
struct vkd3d_shader_signature *is = ctx;
|
||||
|
||||
if (tag != TAG_OSGN && tag != TAG_OSG1)
|
||||
return VKD3D_OK;
|
||||
|
||||
if (is->elements)
|
||||
{
|
||||
FIXME("Multiple input signatures.\n");
|
||||
vkd3d_shader_free_shader_signature(is);
|
||||
}
|
||||
return shader_parse_signature(tag, data, data_size, is);
|
||||
}
|
||||
|
||||
int shader_parse_input_signature(const void *dxbc, size_t dxbc_length,
|
||||
struct vkd3d_shader_signature *signature)
|
||||
{
|
||||
|
@ -2260,6 +2275,17 @@ int shader_parse_input_signature(const void *dxbc, size_t dxbc_length,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int shader_parse_output_signature(const void *dxbc, size_t dxbc_length,
|
||||
struct vkd3d_shader_signature *signature)
|
||||
{
|
||||
int ret;
|
||||
|
||||
memset(signature, 0, sizeof(*signature));
|
||||
if ((ret = parse_dxbc(dxbc, dxbc_length, osgn_handler, signature)) < 0)
|
||||
ERR("Failed to parse output signature.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dxil_handler(const char *data, DWORD data_size, DWORD tag, void *context)
|
||||
{
|
||||
switch (tag)
|
||||
|
@ -2729,8 +2755,9 @@ static int shader_parse_static_samplers(struct root_signature_parser_context *co
|
|||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
static int shader_parse_root_signature(const char *data, unsigned int data_size,
|
||||
struct vkd3d_versioned_root_signature_desc *desc)
|
||||
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
|
||||
struct vkd3d_versioned_root_signature_desc *desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
struct vkd3d_root_signature_desc *v_1_0 = &desc->v_1_0;
|
||||
struct root_signature_parser_context context;
|
||||
|
@ -2738,6 +2765,8 @@ static int shader_parse_root_signature(const char *data, unsigned int data_size,
|
|||
const char *ptr = data;
|
||||
int ret;
|
||||
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
|
||||
context.data = data;
|
||||
context.data_size = data_size;
|
||||
|
||||
|
@ -2809,28 +2838,46 @@ static int shader_parse_root_signature(const char *data, unsigned int data_size,
|
|||
read_uint32(&ptr, &v_1_0->flags);
|
||||
TRACE("Flags %#x.\n", v_1_0->flags);
|
||||
|
||||
if (compatibility_hash)
|
||||
{
|
||||
struct vkd3d_shader_code code = { data, data_size };
|
||||
*compatibility_hash = vkd3d_shader_hash(&code);
|
||||
}
|
||||
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
static int rts0_handler(const char *data, DWORD data_size, DWORD tag, void *context)
|
||||
{
|
||||
struct vkd3d_versioned_root_signature_desc *desc = context;
|
||||
struct vkd3d_shader_code *payload = context;
|
||||
|
||||
if (tag != TAG_RTS0)
|
||||
return VKD3D_OK;
|
||||
|
||||
return shader_parse_root_signature(data, data_size, desc);
|
||||
payload->code = data;
|
||||
payload->size = data_size;
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature)
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
struct vkd3d_shader_code raw_payload;
|
||||
int ret;
|
||||
|
||||
TRACE("dxbc {%p, %zu}, root_signature %p.\n", dxbc->code, dxbc->size, root_signature);
|
||||
|
||||
memset(root_signature, 0, sizeof(*root_signature));
|
||||
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, root_signature)) < 0)
|
||||
memset(&raw_payload, 0, sizeof(raw_payload));
|
||||
|
||||
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, &raw_payload)) < 0)
|
||||
return ret;
|
||||
|
||||
if (!raw_payload.code)
|
||||
return VKD3D_ERROR;
|
||||
|
||||
if ((ret = vkd3d_shader_parse_root_signature_raw(raw_payload.code, raw_payload.size,
|
||||
root_signature, compatibility_hash)) < 0)
|
||||
{
|
||||
vkd3d_shader_free_root_signature(root_signature);
|
||||
return ret;
|
||||
|
|
|
@ -77,9 +77,26 @@ static unsigned dxil_resource_flags_from_kind(dxil_spv_resource_kind kind, bool
|
|||
}
|
||||
}
|
||||
|
||||
static bool dxil_resource_is_global_heap(const dxil_spv_d3d_binding *d3d_binding)
|
||||
{
|
||||
return d3d_binding->register_index == UINT32_MAX &&
|
||||
d3d_binding->register_space == UINT32_MAX &&
|
||||
d3d_binding->range_size == UINT32_MAX;
|
||||
}
|
||||
|
||||
static bool vkd3d_shader_resource_binding_is_global_heap(const struct vkd3d_shader_resource_binding *binding)
|
||||
{
|
||||
return binding->register_index == UINT32_MAX &&
|
||||
binding->register_space == UINT32_MAX &&
|
||||
binding->register_count == UINT32_MAX;
|
||||
}
|
||||
|
||||
static bool dxil_resource_is_in_range(const struct vkd3d_shader_resource_binding *binding,
|
||||
const dxil_spv_d3d_binding *d3d_binding)
|
||||
{
|
||||
if (vkd3d_shader_resource_binding_is_global_heap(binding) && dxil_resource_is_global_heap(d3d_binding))
|
||||
return true;
|
||||
|
||||
if (binding->register_space != d3d_binding->register_space)
|
||||
return false;
|
||||
if (d3d_binding->register_index < binding->register_index)
|
||||
|
@ -143,19 +160,28 @@ static dxil_spv_bool dxil_remap_inner(
|
|||
else if (binding->flags & VKD3D_SHADER_BINDING_FLAG_BINDLESS)
|
||||
{
|
||||
vk_binding->bindless.use_heap = DXIL_SPV_TRUE;
|
||||
vk_binding->bindless.heap_root_offset = binding->descriptor_offset +
|
||||
d3d_binding->register_index - binding->register_index;
|
||||
vk_binding->root_constant_index = binding->descriptor_table + remap->descriptor_table_offset_words;
|
||||
vk_binding->set = binding->binding.set;
|
||||
vk_binding->binding = binding->binding.binding;
|
||||
|
||||
if (vk_binding->root_constant_index < 2 * remap->num_root_descriptors)
|
||||
if (dxil_resource_is_global_heap(d3d_binding))
|
||||
{
|
||||
ERR("Bindless push constant table offset is impossible. %u < 2 * %u\n",
|
||||
vk_binding->root_constant_index, remap->num_root_descriptors);
|
||||
return DXIL_SPV_FALSE;
|
||||
vk_binding->bindless.heap_root_offset = 0; /* No constant offset. */
|
||||
vk_binding->root_constant_index = UINT32_MAX; /* No push offset. */
|
||||
}
|
||||
else
|
||||
{
|
||||
vk_binding->bindless.heap_root_offset = binding->descriptor_offset +
|
||||
d3d_binding->register_index - binding->register_index;
|
||||
vk_binding->root_constant_index = binding->descriptor_table + remap->descriptor_table_offset_words;
|
||||
|
||||
if (vk_binding->root_constant_index < 2 * remap->num_root_descriptors)
|
||||
{
|
||||
ERR("Bindless push constant table offset is impossible. %u < 2 * %u\n",
|
||||
vk_binding->root_constant_index, remap->num_root_descriptors);
|
||||
return DXIL_SPV_FALSE;
|
||||
}
|
||||
vk_binding->root_constant_index -= 2 * remap->num_root_descriptors;
|
||||
}
|
||||
vk_binding->root_constant_index -= 2 * remap->num_root_descriptors;
|
||||
|
||||
/* Acceleration structures are mapped to SSBO uvec2[] array instead of normal heap. */
|
||||
if (d3d_binding->kind == DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE)
|
||||
|
@ -487,9 +513,10 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
dxil_spv_parsed_blob blob = NULL;
|
||||
dxil_spv_compiled_spirv compiled;
|
||||
dxil_spv_shader_stage stage;
|
||||
unsigned int i, max_size;
|
||||
unsigned int i, j, max_size;
|
||||
vkd3d_shader_hash_t hash;
|
||||
int ret = VKD3D_OK;
|
||||
uint32_t quirks;
|
||||
void *code;
|
||||
|
||||
dxil_spv_set_thread_log_callback(vkd3d_dxil_log_callback, NULL);
|
||||
|
@ -499,9 +526,10 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
spirv->meta.hash = hash;
|
||||
if (vkd3d_shader_replace(hash, &spirv->code, &spirv->size))
|
||||
{
|
||||
spirv->meta.replaced = true;
|
||||
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_REPLACED;
|
||||
return ret;
|
||||
}
|
||||
quirks = vkd3d_shader_compile_arguments_select_quirks(compiler_args, hash);
|
||||
|
||||
dxil_spv_begin_thread_allocator_context();
|
||||
|
||||
|
@ -703,6 +731,63 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT)
|
||||
{
|
||||
static const dxil_spv_option_shader_i8_dot helper =
|
||||
{ { DXIL_SPV_OPTION_SHADER_I8_DOT }, DXIL_SPV_TRUE };
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support SHADER_I8_DOT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SCALAR_BLOCK_LAYOUT)
|
||||
{
|
||||
dxil_spv_option_scalar_block_layout helper =
|
||||
{ { DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT }, DXIL_SPV_TRUE };
|
||||
|
||||
for (j = 0; j < compiler_args->target_extension_count; j++)
|
||||
{
|
||||
if (compiler_args->target_extensions[j] ==
|
||||
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS)
|
||||
{
|
||||
helper.supports_per_component_robustness = DXIL_SPV_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support SCALAR_BLOCK_LAYOUT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR)
|
||||
{
|
||||
static const dxil_spv_option_barycentric_khr helper =
|
||||
{ { DXIL_SPV_OPTION_BARYCENTRIC_KHR }, DXIL_SPV_TRUE };
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support BARYCENTRIC_KHR.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
|
||||
{
|
||||
static const dxil_spv_option_min_precision_native_16bit helper =
|
||||
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (compiler_args->dual_source_blending)
|
||||
|
@ -749,6 +834,18 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
}
|
||||
}
|
||||
|
||||
if (quirks & VKD3D_SHADER_QUIRK_INVARIANT_POSITION)
|
||||
{
|
||||
const dxil_spv_option_invariant_position helper =
|
||||
{ { DXIL_SPV_OPTION_INVARIANT_POSITION }, DXIL_SPV_TRUE };
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support INVARIANT_POSITION.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
remap_userdata.shader_interface_info = shader_interface_info;
|
||||
remap_userdata.shader_interface_local_info = NULL;
|
||||
remap_userdata.num_root_descriptors = num_root_descriptors;
|
||||
|
@ -786,14 +883,16 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
memcpy(code, compiled.data, compiled.size);
|
||||
spirv->code = code;
|
||||
spirv->size = compiled.size;
|
||||
spirv->meta.uses_subgroup_size = dxil_spv_converter_uses_subgroup_size(converter) == DXIL_SPV_TRUE;
|
||||
if (dxil_spv_converter_uses_subgroup_size(converter) == DXIL_SPV_TRUE)
|
||||
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE;
|
||||
dxil_spv_converter_get_compute_workgroup_dimensions(converter,
|
||||
&spirv->meta.cs_workgroup_size[0],
|
||||
&spirv->meta.cs_workgroup_size[1],
|
||||
&spirv->meta.cs_workgroup_size[2]);
|
||||
dxil_spv_converter_get_patch_vertex_count(converter, &spirv->meta.patch_vertex_count);
|
||||
spirv->meta.uses_native_16bit_operations = dxil_spv_converter_uses_shader_feature(converter,
|
||||
DXIL_SPV_SHADER_FEATURE_NATIVE_16BIT_OPERATIONS) == DXIL_SPV_TRUE;
|
||||
dxil_spv_converter_get_compute_required_wave_size(converter, &spirv->meta.cs_required_wave_size);
|
||||
if (dxil_spv_converter_uses_shader_feature(converter, DXIL_SPV_SHADER_FEATURE_NATIVE_16BIT_OPERATIONS) == DXIL_SPV_TRUE)
|
||||
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_NATIVE_16BIT_OPERATIONS;
|
||||
|
||||
vkd3d_shader_dump_spirv_shader(hash, spirv);
|
||||
|
||||
|
@ -838,7 +937,7 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
{
|
||||
if (vkd3d_shader_replace_export(hash, &spirv->code, &spirv->size, demangled_export))
|
||||
{
|
||||
spirv->meta.replaced = true;
|
||||
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_REPLACED;
|
||||
vkd3d_free(demangled_export);
|
||||
return ret;
|
||||
}
|
||||
|
@ -1129,6 +1228,64 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT)
|
||||
{
|
||||
static const dxil_spv_option_shader_i8_dot helper =
|
||||
{ { DXIL_SPV_OPTION_SHADER_I8_DOT }, DXIL_SPV_TRUE };
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support SHADER_I8_DOT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SCALAR_BLOCK_LAYOUT)
|
||||
{
|
||||
dxil_spv_option_scalar_block_layout helper =
|
||||
{ { DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT }, DXIL_SPV_TRUE };
|
||||
|
||||
for (j = 0; j < compiler_args->target_extension_count; j++)
|
||||
{
|
||||
if (compiler_args->target_extensions[j] ==
|
||||
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS)
|
||||
{
|
||||
helper.supports_per_component_robustness = DXIL_SPV_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support SCALAR_BLOCK_LAYOUT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_RAY_TRACING_PRIMITIVE_CULLING)
|
||||
{
|
||||
/* Only relevant for ray tracing pipelines. Ray query requires support for PrimitiveCulling feature,
|
||||
* and the SPIR-V capability is implicitly enabled. */
|
||||
static const dxil_spv_option_shader_ray_tracing_primitive_culling helper =
|
||||
{ { DXIL_SPV_OPTION_SHADER_RAY_TRACING_PRIMITIVE_CULLING }, DXIL_SPV_TRUE };
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support RAY_TRACING_PRIMITIVE_CULLING.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
|
||||
{
|
||||
static const dxil_spv_option_min_precision_native_16bit helper =
|
||||
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1166,9 +1323,10 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
memcpy(code, compiled.data, compiled.size);
|
||||
spirv->code = code;
|
||||
spirv->size = compiled.size;
|
||||
spirv->meta.uses_subgroup_size = dxil_spv_converter_uses_subgroup_size(converter) == DXIL_SPV_TRUE;
|
||||
spirv->meta.uses_native_16bit_operations = dxil_spv_converter_uses_shader_feature(converter,
|
||||
DXIL_SPV_SHADER_FEATURE_NATIVE_16BIT_OPERATIONS) == DXIL_SPV_TRUE;
|
||||
if (dxil_spv_converter_uses_subgroup_size(converter) == DXIL_SPV_TRUE)
|
||||
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE;
|
||||
if (dxil_spv_converter_uses_shader_feature(converter, DXIL_SPV_SHADER_FEATURE_NATIVE_16BIT_OPERATIONS) == DXIL_SPV_TRUE)
|
||||
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_NATIVE_16BIT_OPERATIONS;
|
||||
|
||||
if (demangled_export)
|
||||
vkd3d_shader_dump_spirv_shader_export(hash, spirv, demangled_export);
|
||||
|
@ -1194,6 +1352,31 @@ void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_ent
|
|||
vkd3d_free(entry_points);
|
||||
}
|
||||
|
||||
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count)
|
||||
{
|
||||
size_t i, j;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION)
|
||||
{
|
||||
for (j = 0; j < subobjects[i].data.association.NumExports; j++)
|
||||
vkd3d_free((void*)subobjects[i].data.association.pExports[j]);
|
||||
vkd3d_free((void*)subobjects[i].data.association.pExports);
|
||||
vkd3d_free((void*)subobjects[i].data.association.SubobjectToAssociate);
|
||||
}
|
||||
else if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP)
|
||||
{
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.HitGroupExport);
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.AnyHitShaderImport);
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.ClosestHitShaderImport);
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.IntersectionShaderImport);
|
||||
}
|
||||
}
|
||||
|
||||
vkd3d_free(subobjects);
|
||||
}
|
||||
|
||||
static VkShaderStageFlagBits convert_stage(dxil_spv_shader_stage stage)
|
||||
{
|
||||
/* Only interested in RT entry_points. There is no way yet to use lib_6_3+ for non-RT. */
|
||||
|
@ -1229,6 +1412,7 @@ static bool vkd3d_dxil_build_entry(struct vkd3d_shader_library_entry_point *entr
|
|||
if (!entry->plain_entry_point)
|
||||
{
|
||||
vkd3d_free(entry->mangled_entry_point);
|
||||
entry->mangled_entry_point = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1237,20 +1421,95 @@ static bool vkd3d_dxil_build_entry(struct vkd3d_shader_library_entry_point *entr
|
|||
return true;
|
||||
}
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points(
|
||||
static void vkd3d_shader_dxil_copy_subobject(unsigned int identifier,
|
||||
struct vkd3d_shader_library_subobject *subobject,
|
||||
const dxil_spv_rdat_subobject *dxil_subobject)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* Reuse same enums as DXIL. */
|
||||
subobject->kind = (enum vkd3d_shader_subobject_kind)dxil_subobject->kind;
|
||||
subobject->name = dxil_subobject->subobject_name;
|
||||
subobject->dxil_identifier = identifier;
|
||||
|
||||
switch (dxil_subobject->kind)
|
||||
{
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE:
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE:
|
||||
subobject->data.payload.data = dxil_subobject->payload;
|
||||
subobject->data.payload.size = dxil_subobject->payload_size;
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG:
|
||||
/* Normalize the kind. */
|
||||
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
|
||||
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
|
||||
subobject->data.pipeline_config.Flags = 0;
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1:
|
||||
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
|
||||
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
|
||||
subobject->data.pipeline_config.Flags = dxil_subobject->args[1];
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG:
|
||||
subobject->data.shader_config.MaxPayloadSizeInBytes = dxil_subobject->args[0];
|
||||
subobject->data.shader_config.MaxAttributeSizeInBytes = dxil_subobject->args[1];
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_HIT_GROUP:
|
||||
/* Enum aliases. */
|
||||
subobject->data.hit_group.Type = (D3D12_HIT_GROUP_TYPE)dxil_subobject->hit_group_type;
|
||||
assert(dxil_subobject->num_exports == 3);
|
||||
/* Implementation simplifies a lot if we can reuse the D3D12 type here. */
|
||||
subobject->data.hit_group.HitGroupExport = vkd3d_dup_entry_point(dxil_subobject->subobject_name);
|
||||
subobject->data.hit_group.AnyHitShaderImport = dxil_subobject->exports[0] && *dxil_subobject->exports[0] != '\0' ?
|
||||
vkd3d_dup_entry_point(dxil_subobject->exports[0]) : NULL;
|
||||
subobject->data.hit_group.ClosestHitShaderImport = dxil_subobject->exports[1] && *dxil_subobject->exports[1] != '\0' ?
|
||||
vkd3d_dup_entry_point(dxil_subobject->exports[1]) : NULL;
|
||||
subobject->data.hit_group.IntersectionShaderImport = dxil_subobject->exports[2] && *dxil_subobject->exports[2] != '\0' ?
|
||||
vkd3d_dup_entry_point(dxil_subobject->exports[2]) : NULL;
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_STATE_OBJECT_CONFIG:
|
||||
subobject->data.object_config.Flags = dxil_subobject->args[0];
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION:
|
||||
assert(dxil_subobject->num_exports >= 1);
|
||||
subobject->data.association.SubobjectToAssociate = vkd3d_dup_entry_point(dxil_subobject->exports[0]);
|
||||
subobject->data.association.pExports = vkd3d_malloc((dxil_subobject->num_exports - 1) * sizeof(LPCWSTR));
|
||||
subobject->data.association.NumExports = dxil_subobject->num_exports - 1;
|
||||
for (i = 1; i < dxil_subobject->num_exports; i++)
|
||||
subobject->data.association.pExports[i - 1] = vkd3d_dup_entry_point(dxil_subobject->exports[i]);
|
||||
break;
|
||||
|
||||
default:
|
||||
FIXME("Unrecognized RDAT subobject type: %u.\n", dxil_subobject->kind);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
|
||||
const D3D12_DXIL_LIBRARY_DESC *library_desc,
|
||||
unsigned int identifier,
|
||||
struct vkd3d_shader_library_entry_point **entry_points,
|
||||
size_t *entry_point_size, size_t *entry_point_count)
|
||||
size_t *entry_point_size, size_t *entry_point_count,
|
||||
struct vkd3d_shader_library_subobject **subobjects,
|
||||
size_t *subobjects_size, size_t *subobjects_count)
|
||||
{
|
||||
struct vkd3d_shader_library_entry_point new_entry;
|
||||
struct vkd3d_shader_library_subobject *subobject;
|
||||
dxil_spv_parsed_blob blob = NULL;
|
||||
struct vkd3d_shader_code code;
|
||||
dxil_spv_rdat_subobject sub;
|
||||
dxil_spv_shader_stage stage;
|
||||
const char *mangled_entry;
|
||||
char *ascii_entry = NULL;
|
||||
vkd3d_shader_hash_t hash;
|
||||
unsigned int count, i;
|
||||
unsigned int count, i, j;
|
||||
unsigned int rdat_count;
|
||||
int ret = VKD3D_OK;
|
||||
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
|
@ -1271,6 +1530,8 @@ int vkd3d_shader_dxil_append_library_entry_points(
|
|||
goto end;
|
||||
}
|
||||
|
||||
rdat_count = dxil_spv_parsed_blob_get_num_rdat_subobjects(blob);
|
||||
|
||||
if (library_desc->NumExports)
|
||||
{
|
||||
for (i = 0; i < library_desc->NumExports; i++)
|
||||
|
@ -1280,24 +1541,44 @@ int vkd3d_shader_dxil_append_library_entry_points(
|
|||
else
|
||||
ascii_entry = vkd3d_strdup_w_utf8(library_desc->pExports[i].Name, 0);
|
||||
|
||||
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
|
||||
if (stage == DXIL_SPV_STAGE_UNKNOWN)
|
||||
/* An export can point to a subobject or an entry point. */
|
||||
for (j = 0; j < rdat_count; j++)
|
||||
{
|
||||
ret = VKD3D_ERROR_INVALID_ARGUMENT;
|
||||
goto end;
|
||||
dxil_spv_parsed_blob_get_rdat_subobject(blob, j, &sub);
|
||||
/* Subobject names are not mangled. */
|
||||
if (strcmp(sub.subobject_name, ascii_entry) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
new_entry.real_entry_point = ascii_entry;
|
||||
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
|
||||
new_entry.mangled_entry_point = NULL;
|
||||
new_entry.identifier = identifier;
|
||||
new_entry.stage = convert_stage(stage);
|
||||
ascii_entry = NULL;
|
||||
if (j < rdat_count)
|
||||
{
|
||||
vkd3d_array_reserve((void**)subobjects, subobjects_size,
|
||||
*subobjects_count + 1, sizeof(**subobjects));
|
||||
subobject = &(*subobjects)[*subobjects_count];
|
||||
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
|
||||
*subobjects_count += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
|
||||
if (stage == DXIL_SPV_STAGE_UNKNOWN)
|
||||
{
|
||||
ret = VKD3D_ERROR_INVALID_ARGUMENT;
|
||||
goto end;
|
||||
}
|
||||
|
||||
vkd3d_array_reserve((void**)entry_points, entry_point_size,
|
||||
*entry_point_count + 1, sizeof(new_entry));
|
||||
(*entry_points)[(*entry_point_count)++] = new_entry;
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
new_entry.real_entry_point = ascii_entry;
|
||||
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
|
||||
new_entry.mangled_entry_point = NULL;
|
||||
new_entry.identifier = identifier;
|
||||
new_entry.stage = convert_stage(stage);
|
||||
ascii_entry = NULL;
|
||||
|
||||
vkd3d_array_reserve((void**)entry_points, entry_point_size,
|
||||
*entry_point_count + 1, sizeof(new_entry));
|
||||
(*entry_points)[(*entry_point_count)++] = new_entry;
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1330,6 +1611,21 @@ int vkd3d_shader_dxil_append_library_entry_points(
|
|||
(*entry_points)[(*entry_point_count)++] = new_entry;
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
}
|
||||
|
||||
if (rdat_count)
|
||||
{
|
||||
/* All subobjects are also exported. */
|
||||
vkd3d_array_reserve((void**)subobjects, subobjects_size,
|
||||
*subobjects_count + rdat_count, sizeof(**subobjects));
|
||||
|
||||
for (i = 0; i < rdat_count; i++)
|
||||
{
|
||||
dxil_spv_parsed_blob_get_rdat_subobject(blob, i, &sub);
|
||||
subobject = &(*subobjects)[*subobjects_count];
|
||||
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
|
||||
*subobjects_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
end:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -20,6 +20,8 @@
|
|||
|
||||
#include "vkd3d_shader_private.h"
|
||||
|
||||
#include "vkd3d_platform.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
|
@ -81,13 +83,13 @@ err:
|
|||
bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *size)
|
||||
{
|
||||
static bool enabled = true;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
char filename[1024];
|
||||
const char *path;
|
||||
|
||||
if (!enabled)
|
||||
return false;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_OVERRIDE")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return false;
|
||||
|
@ -100,13 +102,13 @@ bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *s
|
|||
bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, size_t *size, const char *export)
|
||||
{
|
||||
static bool enabled = true;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
char filename[1024];
|
||||
const char *path;
|
||||
|
||||
if (!enabled)
|
||||
return false;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_OVERRIDE")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return false;
|
||||
|
@ -119,12 +121,12 @@ bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, si
|
|||
void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader, const char *ext)
|
||||
{
|
||||
static bool enabled = true;
|
||||
const char *path;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return;
|
||||
|
@ -136,12 +138,12 @@ void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shade
|
|||
void vkd3d_shader_dump_spirv_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader)
|
||||
{
|
||||
static bool enabled = true;
|
||||
const char *path;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return;
|
||||
|
@ -154,13 +156,13 @@ void vkd3d_shader_dump_spirv_shader_export(vkd3d_shader_hash_t hash, const struc
|
|||
const char *export)
|
||||
{
|
||||
static bool enabled = true;
|
||||
const char *path;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
char tag[1024];
|
||||
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return;
|
||||
|
@ -349,7 +351,7 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
|
|||
spirv->meta.hash = hash;
|
||||
if (vkd3d_shader_replace(hash, &spirv->code, &spirv->size))
|
||||
{
|
||||
spirv->meta.replaced = true;
|
||||
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_REPLACED;
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
|
@ -475,6 +477,7 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_info *
|
|||
const struct vkd3d_shader_register *reg)
|
||||
{
|
||||
scan_info->has_side_effects = true;
|
||||
scan_info->has_uav_counter = true;
|
||||
vkd3d_shader_scan_set_register_flags(scan_info, VKD3DSPR_UAV,
|
||||
reg->idx[0].offset, VKD3D_SHADER_UAV_FLAG_ATOMIC_COUNTER);
|
||||
}
|
||||
|
@ -672,6 +675,14 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
|
|||
return shader_parse_input_signature(dxbc->code, dxbc->size, signature);
|
||||
}
|
||||
|
||||
int vkd3d_shader_parse_output_signature(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_shader_signature *signature)
|
||||
{
|
||||
TRACE("dxbc {%p, %zu}, signature %p.\n", dxbc->code, dxbc->size, signature);
|
||||
|
||||
return shader_parse_output_signature(dxbc->code, dxbc->size, signature);
|
||||
}
|
||||
|
||||
struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element(
|
||||
const struct vkd3d_shader_signature *signature, const char *semantic_name,
|
||||
unsigned int semantic_index, unsigned int stream_index)
|
||||
|
@ -704,12 +715,36 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature
|
|||
|
||||
vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader)
|
||||
{
|
||||
vkd3d_shader_hash_t h = 0xcbf29ce484222325ull;
|
||||
vkd3d_shader_hash_t h = hash_fnv1_init();
|
||||
const uint8_t *code = shader->code;
|
||||
size_t i, n;
|
||||
|
||||
for (i = 0, n = shader->size; i < n; i++)
|
||||
h = (h * 0x100000001b3ull) ^ code[i];
|
||||
h = hash_fnv1_iterate_u8(h, code[i]);
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
uint32_t vkd3d_shader_compile_arguments_select_quirks(
|
||||
const struct vkd3d_shader_compile_arguments *compile_args, vkd3d_shader_hash_t shader_hash)
|
||||
{
|
||||
unsigned int i;
|
||||
if (compile_args && compile_args->quirks)
|
||||
{
|
||||
for (i = 0; i < compile_args->quirks->num_hashes; i++)
|
||||
if (compile_args->quirks->hashes[i].shader_hash == shader_hash)
|
||||
return compile_args->quirks->hashes[i].quirks | compile_args->quirks->global_quirks;
|
||||
return compile_args->quirks->default_quirks | compile_args->quirks->global_quirks;
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t vkd3d_shader_get_revision(void)
|
||||
{
|
||||
/* This is meant to be bumped every time a change is made to the shader compiler.
|
||||
* Might get nuked later ...
|
||||
* It's not immediately useful for invalidating pipeline caches, since that would mostly be covered
|
||||
* by vkd3d-proton Git hash. */
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -781,6 +781,8 @@ void free_shader_desc(struct vkd3d_shader_desc *desc);
|
|||
|
||||
int shader_parse_input_signature(const void *dxbc, size_t dxbc_length,
|
||||
struct vkd3d_shader_signature *signature);
|
||||
int shader_parse_output_signature(const void *dxbc, size_t dxbc_length,
|
||||
struct vkd3d_shader_signature *signature);
|
||||
|
||||
struct vkd3d_dxbc_compiler;
|
||||
|
||||
|
@ -917,6 +919,4 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
const struct vkd3d_shader_interface_info *shader_interface_info,
|
||||
const struct vkd3d_shader_compile_arguments *compiler_args);
|
||||
|
||||
vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader);
|
||||
|
||||
#endif /* __VKD3D_SHADER_PRIVATE_H */
|
||||
|
|
|
@ -3,12 +3,12 @@ LIBRARY vkd3d-proton-utils-3.dll
|
|||
EXPORTS
|
||||
D3D12CreateDevice @101
|
||||
D3D12GetDebugInterface @102
|
||||
D3D12CreateRootSignatureDeserializer @107
|
||||
D3D12CreateVersionedRootSignatureDeserializer @108
|
||||
D3D12CreateRootSignatureDeserializer
|
||||
D3D12CreateVersionedRootSignatureDeserializer
|
||||
|
||||
D3D12EnableExperimentalFeatures @110
|
||||
D3D12SerializeRootSignature @115
|
||||
D3D12SerializeVersionedRootSignature @116
|
||||
D3D12EnableExperimentalFeatures
|
||||
D3D12SerializeRootSignature
|
||||
D3D12SerializeVersionedRootSignature
|
||||
|
||||
vkd3d_create_event
|
||||
vkd3d_wait_event
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "vkd3d_private.h"
|
||||
|
||||
#define RT_TRACE TRACE
|
||||
|
||||
void vkd3d_acceleration_structure_build_info_cleanup(
|
||||
struct vkd3d_acceleration_structure_build_info *info)
|
||||
{
|
||||
|
@ -71,21 +73,34 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
VkAccelerationStructureBuildGeometryInfoKHR *build_info;
|
||||
VkAccelerationStructureGeometryAabbsDataKHR *aabbs;
|
||||
const D3D12_RAYTRACING_GEOMETRY_DESC *geom_desc;
|
||||
bool have_triangles, have_aabbs;
|
||||
unsigned int i;
|
||||
|
||||
RT_TRACE("Converting inputs.\n");
|
||||
RT_TRACE("=====================\n");
|
||||
|
||||
build_info = &info->build_info;
|
||||
memset(build_info, 0, sizeof(*build_info));
|
||||
build_info->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
|
||||
|
||||
if (desc->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
|
||||
{
|
||||
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
|
||||
RT_TRACE("Top level build.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
|
||||
RT_TRACE("Bottom level build.\n");
|
||||
}
|
||||
|
||||
build_info->flags = d3d12_build_flags_to_vk(desc->Flags);
|
||||
|
||||
if (desc->Flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE)
|
||||
{
|
||||
RT_TRACE("BUILD_FLAG_PERFORM_UPDATE.\n");
|
||||
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
|
||||
}
|
||||
else
|
||||
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
|
||||
|
||||
|
@ -108,9 +123,15 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
info->primitive_counts = info->primitive_counts_stack;
|
||||
info->primitive_counts[0] = desc->NumDescs;
|
||||
build_info->geometryCount = 1;
|
||||
RT_TRACE(" ArrayOfPointers: %u.\n",
|
||||
desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? 1 : 0);
|
||||
RT_TRACE(" NumDescs: %u.\n", info->primitive_counts[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
have_triangles = false;
|
||||
have_aabbs = false;
|
||||
|
||||
if (desc->NumDescs <= VKD3D_BUILD_INFO_STACK_COUNT)
|
||||
{
|
||||
memset(info->geometries, 0, sizeof(*info->geometries) * desc->NumDescs);
|
||||
|
@ -128,32 +149,57 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
for (i = 0; i < desc->NumDescs; i++)
|
||||
{
|
||||
info->geometries[i].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
|
||||
RT_TRACE(" Geom %u:\n", i);
|
||||
|
||||
if (desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
|
||||
{
|
||||
geom_desc = desc->ppGeometryDescs[i];
|
||||
RT_TRACE(" ArrayOfPointers\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
geom_desc = &desc->pGeometryDescs[i];
|
||||
RT_TRACE(" PointerToArray\n");
|
||||
}
|
||||
|
||||
info->geometries[i].flags = d3d12_geometry_flags_to_vk(geom_desc->Flags);
|
||||
RT_TRACE(" Flags = #%x\n", geom_desc->Flags);
|
||||
|
||||
switch (geom_desc->Type)
|
||||
{
|
||||
case D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES:
|
||||
/* Runtime validates this. */
|
||||
if (have_aabbs)
|
||||
{
|
||||
ERR("Cannot mix and match geometry types in a BLAS.\n");
|
||||
return false;
|
||||
}
|
||||
have_triangles = true;
|
||||
|
||||
info->geometries[i].geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
|
||||
triangles = &info->geometries[i].geometry.triangles;
|
||||
triangles->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
|
||||
triangles->indexData.deviceAddress = geom_desc->Triangles.IndexBuffer;
|
||||
if (geom_desc->Triangles.IndexBuffer)
|
||||
if (geom_desc->Triangles.IndexFormat != DXGI_FORMAT_UNKNOWN)
|
||||
{
|
||||
if (!geom_desc->Triangles.IndexBuffer)
|
||||
WARN("Application is using IndexBuffer = 0 and IndexFormat != UNKNOWN. Likely application bug.\n");
|
||||
|
||||
triangles->indexType =
|
||||
geom_desc->Triangles.IndexFormat == DXGI_FORMAT_R16_UINT ?
|
||||
VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
|
||||
info->primitive_counts[i] = geom_desc->Triangles.IndexCount / 3;
|
||||
RT_TRACE(" Indexed : Index count = %u (%u bits)\n",
|
||||
geom_desc->Triangles.IndexCount,
|
||||
triangles->indexType == VK_INDEX_TYPE_UINT16 ? 16 : 32);
|
||||
RT_TRACE(" Vertex count: %u\n", geom_desc->Triangles.VertexCount);
|
||||
RT_TRACE(" IBO VA: %"PRIx64".\n", geom_desc->Triangles.IndexBuffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
info->primitive_counts[i] = geom_desc->Triangles.VertexCount / 3;
|
||||
triangles->indexType = VK_INDEX_TYPE_NONE_KHR;
|
||||
RT_TRACE(" Triangle list : Vertex count: %u\n", geom_desc->Triangles.VertexCount);
|
||||
}
|
||||
|
||||
triangles->maxVertex = max(1, geom_desc->Triangles.VertexCount) - 1;
|
||||
|
@ -161,21 +207,37 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
triangles->vertexFormat = vkd3d_internal_get_vk_format(device, geom_desc->Triangles.VertexFormat);
|
||||
triangles->vertexData.deviceAddress = geom_desc->Triangles.VertexBuffer.StartAddress;
|
||||
triangles->transformData.deviceAddress = geom_desc->Triangles.Transform3x4;
|
||||
|
||||
RT_TRACE(" Transform3x4: %s\n", geom_desc->Triangles.Transform3x4 ? "on" : "off");
|
||||
RT_TRACE(" Vertex format: %s\n", debug_dxgi_format(geom_desc->Triangles.VertexFormat));
|
||||
RT_TRACE(" VBO VA: %"PRIx64"\n", geom_desc->Triangles.VertexBuffer.StartAddress);
|
||||
RT_TRACE(" Vertex stride: %"PRIu64" bytes\n", geom_desc->Triangles.VertexBuffer.StrideInBytes);
|
||||
break;
|
||||
|
||||
case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
|
||||
/* Runtime validates this. */
|
||||
if (have_triangles)
|
||||
{
|
||||
ERR("Cannot mix and match geometry types in a BLAS.\n");
|
||||
return false;
|
||||
}
|
||||
have_aabbs = true;
|
||||
|
||||
info->geometries[i].geometryType = VK_GEOMETRY_TYPE_AABBS_KHR;
|
||||
aabbs = &info->geometries[i].geometry.aabbs;
|
||||
aabbs->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR;
|
||||
aabbs->stride = geom_desc->AABBs.AABBs.StrideInBytes;
|
||||
aabbs->data.deviceAddress = geom_desc->AABBs.AABBs.StartAddress;
|
||||
info->primitive_counts[i] = geom_desc->AABBs.AABBCount;
|
||||
RT_TRACE(" AABB stride: %"PRIu64" bytes\n", geom_desc->AABBs.AABBs.StrideInBytes);
|
||||
break;
|
||||
|
||||
default:
|
||||
FIXME("Unsupported geometry type %u.\n", geom_desc->Type);
|
||||
return false;
|
||||
}
|
||||
|
||||
RT_TRACE(" Primitive count %u.\n", info->primitive_counts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,6 +251,8 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
}
|
||||
|
||||
build_info->pGeometries = info->geometries;
|
||||
|
||||
RT_TRACE("=====================\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -242,12 +306,18 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
|
|||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_COMPACTED_SIZE;
|
||||
stride = sizeof(uint64_t);
|
||||
}
|
||||
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE &&
|
||||
list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
|
||||
{
|
||||
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR;
|
||||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_CURRENT_SIZE;
|
||||
stride = sizeof(uint64_t);
|
||||
}
|
||||
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
|
||||
{
|
||||
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
|
||||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE;
|
||||
stride = sizeof(uint64_t);
|
||||
FIXME("NumBottomLevelPointers will always return 0.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -278,9 +348,31 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
|
|||
|
||||
if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
|
||||
{
|
||||
/* TODO: We'll need some way to store these values for later use and copy them here instead. */
|
||||
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
|
||||
sizeof(uint64_t), 0));
|
||||
if (list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
|
||||
{
|
||||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE_BOTTOM_LEVEL_POINTERS;
|
||||
if (!d3d12_command_allocator_allocate_query_from_type_index(list->allocator,
|
||||
type_index, &vk_query_pool, &vk_query_index))
|
||||
{
|
||||
ERR("Failed to allocate query.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
d3d12_command_list_reset_query(list, vk_query_pool, vk_query_index);
|
||||
|
||||
VK_CALL(vkCmdWriteAccelerationStructuresPropertiesKHR(list->vk_command_buffer,
|
||||
1, &vk_acceleration_structure, vk_query_type, vk_query_pool, vk_query_index));
|
||||
VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
|
||||
vk_query_pool, vk_query_index, 1,
|
||||
vk_buffer, offset + sizeof(uint64_t), stride,
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
|
||||
}
|
||||
else
|
||||
{
|
||||
FIXME("NumBottomLevelPointers will always return 0.\n");
|
||||
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
|
||||
sizeof(uint64_t), 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,655 @@
|
|||
/*
|
||||
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
|
||||
#include "vkd3d_private.h"
|
||||
#include "vkd3d_debug.h"
|
||||
#include "vkd3d_common.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* Just allocate everything up front. This only consumes host memory anyways. */
|
||||
#define MAX_COMMAND_LISTS (32 * 1024)
|
||||
|
||||
/* Questionable on 32-bit, but we don't really care. */
|
||||
#define NV_ENCODE_CHECKPOINT(context, counter) ((void*) ((uintptr_t)(context) + (uintptr_t)MAX_COMMAND_LISTS * (counter)))
|
||||
#define NV_CHECKPOINT_CONTEXT(ptr) ((uint32_t)((uintptr_t)(ptr) % MAX_COMMAND_LISTS))
|
||||
#define NV_CHECKPOINT_COUNTER(ptr) ((uint32_t)((uintptr_t)(ptr) / MAX_COMMAND_LISTS))
|
||||
|
||||
static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_command_type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
|
||||
return "top_marker";
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
|
||||
return "bottom_marker";
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
|
||||
return "set_shader_hash";
|
||||
case VKD3D_BREADCRUMB_COMMAND_DRAW:
|
||||
return "draw";
|
||||
case VKD3D_BREADCRUMB_COMMAND_DRAW_INDEXED:
|
||||
return "draw_indexed";
|
||||
case VKD3D_BREADCRUMB_COMMAND_DISPATCH:
|
||||
return "dispatch";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT:
|
||||
return "execute_indirect";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE:
|
||||
return "execute_indirect_template";
|
||||
case VKD3D_BREADCRUMB_COMMAND_COPY:
|
||||
return "copy";
|
||||
case VKD3D_BREADCRUMB_COMMAND_RESOLVE:
|
||||
return "resolve";
|
||||
case VKD3D_BREADCRUMB_COMMAND_WBI:
|
||||
return "wbi";
|
||||
case VKD3D_BREADCRUMB_COMMAND_RESOLVE_QUERY:
|
||||
return "resolve_query";
|
||||
case VKD3D_BREADCRUMB_COMMAND_GATHER_VIRTUAL_QUERY:
|
||||
return "gather_virtual_query";
|
||||
case VKD3D_BREADCRUMB_COMMAND_BUILD_RTAS:
|
||||
return "build_rtas";
|
||||
case VKD3D_BREADCRUMB_COMMAND_COPY_RTAS:
|
||||
return "copy_rtas";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EMIT_RTAS_POSTBUILD:
|
||||
return "emit_rtas_postbuild";
|
||||
case VKD3D_BREADCRUMB_COMMAND_TRACE_RAYS:
|
||||
return "trace_rays";
|
||||
case VKD3D_BREADCRUMB_COMMAND_BARRIER:
|
||||
return "barrier";
|
||||
case VKD3D_BREADCRUMB_COMMAND_AUX32:
|
||||
return "aux32";
|
||||
case VKD3D_BREADCRUMB_COMMAND_AUX64:
|
||||
return "aux64";
|
||||
case VKD3D_BREADCRUMB_COMMAND_VBO:
|
||||
return "vbo";
|
||||
case VKD3D_BREADCRUMB_COMMAND_IBO:
|
||||
return "ibo";
|
||||
case VKD3D_BREADCRUMB_COMMAND_ROOT_DESC:
|
||||
return "root_desc";
|
||||
case VKD3D_BREADCRUMB_COMMAND_ROOT_CONST:
|
||||
return "root_const";
|
||||
case VKD3D_BREADCRUMB_COMMAND_TAG:
|
||||
return "tag";
|
||||
|
||||
default:
|
||||
return "?";
|
||||
}
|
||||
}
|
||||
|
||||
HRESULT vkd3d_breadcrumb_tracer_init(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
D3D12_HEAP_PROPERTIES heap_properties;
|
||||
D3D12_RESOURCE_DESC1 resource_desc;
|
||||
VkMemoryPropertyFlags memory_props;
|
||||
HRESULT hr;
|
||||
int rc;
|
||||
|
||||
memset(tracer, 0, sizeof(*tracer));
|
||||
|
||||
if ((rc = pthread_mutex_init(&tracer->lock, NULL)))
|
||||
return hresult_from_errno(rc);
|
||||
|
||||
if (device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
INFO("Enabling AMD_buffer_marker breadcrumbs.\n");
|
||||
memset(&resource_desc, 0, sizeof(resource_desc));
|
||||
resource_desc.Width = MAX_COMMAND_LISTS * sizeof(struct vkd3d_breadcrumb_counter);
|
||||
resource_desc.Height = 1;
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.MipLevels = 1;
|
||||
resource_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
resource_desc.SampleDesc.Count = 1;
|
||||
resource_desc.SampleDesc.Quality = 0;
|
||||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
|
||||
if (FAILED(hr = vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
|
||||
&resource_desc, &tracer->host_buffer)))
|
||||
{
|
||||
goto err;
|
||||
}
|
||||
|
||||
memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
|
||||
/* If device faults in the middle of execution we will never get the chance to flush device caches.
|
||||
* Make sure that breadcrumbs are always written directly out.
|
||||
* This is the primary usecase for the device coherent/uncached extension after all ...
|
||||
* Don't make this a hard requirement since buffer markers might be implicitly coherent on some
|
||||
* implementations (Turnip?). */
|
||||
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
|
||||
{
|
||||
memory_props |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
|
||||
VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
|
||||
}
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_buffer_memory(device, tracer->host_buffer,
|
||||
memory_props, &tracer->host_buffer_memory)))
|
||||
{
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, tracer->host_buffer_memory.vk_memory,
|
||||
0, VK_WHOLE_SIZE,
|
||||
0, (void**)&tracer->mapped)) != VK_SUCCESS)
|
||||
{
|
||||
hr = E_OUTOFMEMORY;
|
||||
goto err;
|
||||
}
|
||||
|
||||
memset(tracer->mapped, 0, sizeof(*tracer->mapped) * MAX_COMMAND_LISTS);
|
||||
}
|
||||
else if (device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
INFO("Enabling NV_device_diagnostics_checkpoints breadcrumbs.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
ERR("Breadcrumbs require support for either AMD_buffer_marker or NV_device_diagnostics_checkpoints.\n");
|
||||
hr = E_FAIL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
tracer->trace_contexts = vkd3d_calloc(MAX_COMMAND_LISTS, sizeof(*tracer->trace_contexts));
|
||||
tracer->trace_context_index = 0;
|
||||
|
||||
return S_OK;
|
||||
|
||||
err:
|
||||
vkd3d_breadcrumb_tracer_cleanup(tracer, device);
|
||||
return hr;
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_cleanup(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
|
||||
if (device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, tracer->host_buffer, NULL));
|
||||
vkd3d_free_device_memory(device, &tracer->host_buffer_memory);
|
||||
}
|
||||
|
||||
vkd3d_free(tracer->trace_contexts);
|
||||
pthread_mutex_destroy(&tracer->lock);
|
||||
}
|
||||
|
||||
unsigned int vkd3d_breadcrumb_tracer_allocate_command_list(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
struct d3d12_command_list *list, struct d3d12_command_allocator *allocator)
|
||||
{
|
||||
unsigned int index = UINT32_MAX;
|
||||
unsigned int iteration_count;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&tracer->lock)))
|
||||
{
|
||||
ERR("Failed to lock mutex, rc %d.\n", rc);
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
/* Since this is a ring, this is extremely likely to succeed on first attempt. */
|
||||
for (iteration_count = 0; iteration_count < MAX_COMMAND_LISTS; iteration_count++)
|
||||
{
|
||||
tracer->trace_context_index = (tracer->trace_context_index + 1) % MAX_COMMAND_LISTS;
|
||||
if (!tracer->trace_contexts[tracer->trace_context_index].locked)
|
||||
{
|
||||
tracer->trace_contexts[tracer->trace_context_index].locked = 1;
|
||||
index = tracer->trace_context_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&tracer->lock);
|
||||
|
||||
if (index == UINT32_MAX)
|
||||
{
|
||||
ERR("Failed to allocate new index for command list.\n");
|
||||
return index;
|
||||
}
|
||||
|
||||
TRACE("Allocating breadcrumb context %u for list %p.\n", index, list);
|
||||
list->breadcrumb_context_index = index;
|
||||
|
||||
/* Need to clear this on a fresh allocation rather than release, since we can end up releasing a command list
|
||||
* before we observe the device lost. */
|
||||
tracer->trace_contexts[index].command_count = 0;
|
||||
tracer->trace_contexts[index].counter = 0;
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
memset(&tracer->mapped[index], 0, sizeof(tracer->mapped[index]));
|
||||
|
||||
vkd3d_array_reserve((void**)&allocator->breadcrumb_context_indices, &allocator->breadcrumb_context_index_size,
|
||||
allocator->breadcrumb_context_index_count + 1,
|
||||
sizeof(*allocator->breadcrumb_context_indices));
|
||||
allocator->breadcrumb_context_indices[allocator->breadcrumb_context_index_count++] = index;
|
||||
return index;
|
||||
}
|
||||
|
||||
/* Command allocator keeps a list of allocated breadcrumb command lists. */
|
||||
void vkd3d_breadcrumb_tracer_release_command_lists(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
const unsigned int *indices, size_t indices_count)
|
||||
{
|
||||
unsigned int index;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
if (!indices_count)
|
||||
return;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&tracer->lock)))
|
||||
{
|
||||
ERR("Failed to lock mutex, rc %d.\n", rc);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < indices_count; i++)
|
||||
{
|
||||
index = indices[i];
|
||||
if (index != UINT32_MAX)
|
||||
tracer->trace_contexts[index].locked = 0;
|
||||
TRACE("Releasing breadcrumb context %u.\n", index);
|
||||
}
|
||||
pthread_mutex_unlock(&tracer->lock);
|
||||
}
|
||||
|
||||
static void vkd3d_breadcrumb_tracer_report_command_list(
|
||||
const struct vkd3d_breadcrumb_command_list_trace_context *context,
|
||||
uint32_t begin_marker,
|
||||
uint32_t end_marker)
|
||||
{
|
||||
const struct vkd3d_breadcrumb_command *cmd;
|
||||
bool observed_begin_cmd = false;
|
||||
bool observed_end_cmd = false;
|
||||
unsigned int i;
|
||||
|
||||
if (end_marker == 0)
|
||||
{
|
||||
ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
|
||||
observed_begin_cmd = true;
|
||||
}
|
||||
|
||||
/* We can assume that possible culprit commands lie between the end_marker
|
||||
* and top_marker. */
|
||||
for (i = 0; i < context->command_count; i++)
|
||||
{
|
||||
cmd = &context->commands[i];
|
||||
|
||||
/* If there is a command which sets TOP_OF_PIPE, but we haven't observed the marker yet,
|
||||
* the command processor hasn't gotten there yet (most likely ...), so that should be the
|
||||
* natural end-point. */
|
||||
if (!observed_end_cmd &&
|
||||
cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER &&
|
||||
cmd->count > begin_marker)
|
||||
{
|
||||
observed_end_cmd = true;
|
||||
ERR(" ===== Potential crash region END =====\n");
|
||||
}
|
||||
|
||||
if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX32)
|
||||
{
|
||||
ERR(" Set arg: %u (#%x)\n", cmd->word_32bit, cmd->word_32bit);
|
||||
}
|
||||
else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX64)
|
||||
{
|
||||
ERR(" Set arg: %"PRIu64" (#%"PRIx64")\n", cmd->word_64bit, cmd->word_64bit);
|
||||
}
|
||||
else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_TAG)
|
||||
{
|
||||
ERR(" Tag: %s\n", cmd->tag);
|
||||
}
|
||||
else
|
||||
{
|
||||
ERR(" Command: %s\n", vkd3d_breadcrumb_command_type_to_str(cmd->type));
|
||||
|
||||
switch (cmd->type)
|
||||
{
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
|
||||
ERR(" marker: %u\n", cmd->count);
|
||||
break;
|
||||
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
|
||||
ERR(" hash: %016"PRIx64", stage: %x\n", cmd->shader.hash, cmd->shader.stage);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* We have proved we observed this command is complete.
|
||||
* Some command after this signal is at fault. */
|
||||
if (!observed_begin_cmd &&
|
||||
cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER &&
|
||||
cmd->count == end_marker)
|
||||
{
|
||||
observed_begin_cmd = true;
|
||||
ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vkd3d_breadcrumb_tracer_report_command_list_amd(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
unsigned int context_index)
|
||||
{
|
||||
const struct vkd3d_breadcrumb_command_list_trace_context *context;
|
||||
uint32_t begin_marker;
|
||||
uint32_t end_marker;
|
||||
|
||||
context = &tracer->trace_contexts[context_index];
|
||||
|
||||
/* Unused, cannot be the cause. */
|
||||
if (context->counter == 0)
|
||||
return;
|
||||
|
||||
begin_marker = tracer->mapped[context_index].begin_marker;
|
||||
end_marker = tracer->mapped[context_index].end_marker;
|
||||
|
||||
/* Never executed, cannot be the cause. */
|
||||
if (begin_marker == 0 && end_marker == 0)
|
||||
return;
|
||||
|
||||
/* Successfully retired, cannot be the cause. */
|
||||
if (begin_marker == UINT32_MAX && end_marker == UINT32_MAX)
|
||||
return;
|
||||
|
||||
/* Edge case if we re-submitted a command list,
|
||||
* but it ends up crashing before we hit any BOTTOM_OF_PIPE
|
||||
* marker. Normalize the inputs such that end_marker <= begin_marker. */
|
||||
if (begin_marker > 0 && end_marker == UINT32_MAX)
|
||||
end_marker = 0;
|
||||
|
||||
ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
|
||||
context_index, begin_marker, end_marker);
|
||||
vkd3d_breadcrumb_tracer_report_command_list(context, begin_marker, end_marker);
|
||||
ERR("Done analyzing command list.\n");
|
||||
}
|
||||
|
||||
static void vkd3d_breadcrumb_tracer_report_queue_nv(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
struct d3d12_device *device,
|
||||
VkQueue vk_queue)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
uint32_t begin_marker, end_marker;
|
||||
uint32_t checkpoint_context_index;
|
||||
VkCheckpointDataNV *checkpoints;
|
||||
uint32_t checkpoint_marker;
|
||||
uint32_t checkpoint_count;
|
||||
uint32_t context_index;
|
||||
uint32_t i;
|
||||
|
||||
VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, NULL));
|
||||
if (checkpoint_count == 0)
|
||||
return;
|
||||
|
||||
checkpoints = vkd3d_calloc(checkpoint_count, sizeof(VkCheckpointDataNV));
|
||||
for (i = 0; i < checkpoint_count; i++)
|
||||
checkpoints[i].sType = VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV;
|
||||
VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, checkpoints));
|
||||
|
||||
context_index = UINT32_MAX;
|
||||
begin_marker = 0;
|
||||
end_marker = 0;
|
||||
|
||||
for (i = 0; i < checkpoint_count; i++)
|
||||
{
|
||||
checkpoint_context_index = NV_CHECKPOINT_CONTEXT(checkpoints[i].pCheckpointMarker);
|
||||
checkpoint_marker = NV_CHECKPOINT_COUNTER(checkpoints[i].pCheckpointMarker);
|
||||
|
||||
if (context_index != checkpoint_context_index && context_index != UINT32_MAX)
|
||||
{
|
||||
FIXME("Markers have different contexts. Execution is likely split across multiple command buffers?\n");
|
||||
context_index = UINT32_MAX;
|
||||
break;
|
||||
}
|
||||
|
||||
context_index = checkpoint_context_index;
|
||||
|
||||
if (checkpoints[i].stage == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT && checkpoint_marker > begin_marker)
|
||||
{
|
||||
/* We want to find the latest TOP_OF_PIPE_BIT. Then we prove that command processor got to that point. */
|
||||
begin_marker = checkpoint_marker;
|
||||
}
|
||||
else if (checkpoints[i].stage == VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT && checkpoint_marker > end_marker)
|
||||
{
|
||||
/* We want to find the latest BOTTOM_OF_PIPE_BIT. Then we prove that we got that far. */
|
||||
end_marker = checkpoint_marker;
|
||||
}
|
||||
else if (checkpoints[i].stage != VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT &&
|
||||
checkpoints[i].stage != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
|
||||
{
|
||||
FIXME("Unexpected checkpoint pipeline stage. #%x\n", checkpoints[i].stage);
|
||||
context_index = UINT32_MAX;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (context_index != UINT32_MAX && begin_marker != 0 && end_marker != 0 && end_marker != UINT32_MAX)
|
||||
{
|
||||
ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
|
||||
context_index, begin_marker, end_marker);
|
||||
vkd3d_breadcrumb_tracer_report_command_list(&tracer->trace_contexts[context_index], begin_marker, end_marker);
|
||||
ERR("Done analyzing command list.\n");
|
||||
}
|
||||
|
||||
vkd3d_free(checkpoints);
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_report_device_lost(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
struct vkd3d_queue_family_info *queue_family_info;
|
||||
VkQueue vk_queue;
|
||||
unsigned int i;
|
||||
|
||||
ERR("Device lost observed, analyzing breadcrumbs ...\n");
|
||||
|
||||
if (device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
/* AMD path, buffer marker. */
|
||||
for (i = 0; i < MAX_COMMAND_LISTS; i++)
|
||||
vkd3d_breadcrumb_tracer_report_command_list_amd(tracer, i);
|
||||
}
|
||||
else if (device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
/* vkGetQueueCheckpointDataNV does not require us to synchronize access to the queue. */
|
||||
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
for (i = 0; i < queue_family_info->queue_count; i++)
|
||||
{
|
||||
vk_queue = queue_family_info->queues[i]->vk_queue;
|
||||
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
|
||||
}
|
||||
|
||||
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COMPUTE);
|
||||
for (i = 0; i < queue_family_info->queue_count; i++)
|
||||
{
|
||||
vk_queue = queue_family_info->queues[i]->vk_queue;
|
||||
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
|
||||
}
|
||||
|
||||
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COPY);
|
||||
for (i = 0; i < queue_family_info->queue_count; i++)
|
||||
{
|
||||
vk_queue = queue_family_info->queues[i]->vk_queue;
|
||||
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
|
||||
}
|
||||
}
|
||||
|
||||
ERR("Done analyzing breadcrumbs ...\n");
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_begin_command_list(struct d3d12_command_list *list)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
struct vkd3d_breadcrumb_command cmd;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
trace->counter++;
|
||||
|
||||
cmd.count = trace->counter;
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
|
||||
trace->counter));
|
||||
}
|
||||
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
/* A checkpoint is implicitly a top and bottom marker. */
|
||||
cmd.count = trace->counter;
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
|
||||
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
|
||||
}
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_add_command(struct d3d12_command_list *list,
|
||||
const struct vkd3d_breadcrumb_command *command)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
|
||||
TRACE("Adding command (%s) to context %u.\n",
|
||||
vkd3d_breadcrumb_command_type_to_str(command->type), context);
|
||||
|
||||
vkd3d_array_reserve((void**)&trace->commands, &trace->command_size,
|
||||
trace->command_count + 1, sizeof(*trace->commands));
|
||||
trace->commands[trace->command_count++] = *command;
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_signal(struct d3d12_command_list *list)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
struct vkd3d_breadcrumb_command cmd;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, end_marker),
|
||||
trace->counter));
|
||||
|
||||
trace->counter++;
|
||||
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
|
||||
trace->counter));
|
||||
}
|
||||
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
trace->counter++;
|
||||
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
|
||||
}
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_end_command_list(struct d3d12_command_list *list)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
struct vkd3d_breadcrumb_command cmd;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
trace->counter = UINT32_MAX;
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
|
||||
trace->counter));
|
||||
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, end_marker),
|
||||
trace->counter));
|
||||
}
|
||||
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
|
||||
}
|
||||
|
||||
cmd.count = trace->counter;
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
}
|
|
@ -261,12 +261,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_bundle_QueryInterface(d3d12_command_list_
|
|||
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3)
|
||||
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4)
|
||||
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5)
|
||||
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList6)
|
||||
|| IsEqualGUID(iid, &IID_ID3D12CommandList)
|
||||
|| IsEqualGUID(iid, &IID_ID3D12DeviceChild)
|
||||
|| IsEqualGUID(iid, &IID_ID3D12Object)
|
||||
|| IsEqualGUID(iid, &IID_IUnknown))
|
||||
{
|
||||
ID3D12GraphicsCommandList5_AddRef(iface);
|
||||
ID3D12GraphicsCommandList6_AddRef(iface);
|
||||
*object = iface;
|
||||
return S_OK;
|
||||
}
|
||||
|
@ -429,7 +430,7 @@ static void d3d12_bundle_exec_draw_instanced(d3d12_command_list_iface *list, con
|
|||
{
|
||||
const struct d3d12_draw_instanced_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_DrawInstanced(list, args->vertex_count,
|
||||
ID3D12GraphicsCommandList6_DrawInstanced(list, args->vertex_count,
|
||||
args->instance_count, args->first_vertex, args->first_instance);
|
||||
}
|
||||
|
||||
|
@ -466,7 +467,7 @@ static void d3d12_bundle_exec_draw_indexed_instanced(d3d12_command_list_iface *l
|
|||
{
|
||||
const struct d3d12_draw_indexed_instanced_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_DrawIndexedInstanced(list, args->index_count,
|
||||
ID3D12GraphicsCommandList6_DrawIndexedInstanced(list, args->index_count,
|
||||
args->instance_count, args->first_index, args->vertex_offset,
|
||||
args->first_instance);
|
||||
}
|
||||
|
@ -501,7 +502,7 @@ static void d3d12_bundle_exec_dispatch(d3d12_command_list_iface *list, const voi
|
|||
{
|
||||
const struct d3d12_dispatch_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_Dispatch(list, args->x, args->y, args->z);
|
||||
ID3D12GraphicsCommandList6_Dispatch(list, args->x, args->y, args->z);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_Dispatch(d3d12_command_list_iface *iface,
|
||||
|
@ -569,7 +570,7 @@ static void d3d12_bundle_exec_ia_set_primitive_topology(d3d12_command_list_iface
|
|||
{
|
||||
const struct d3d12_ia_set_primitive_topology_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_IASetPrimitiveTopology(list, args->topology);
|
||||
ID3D12GraphicsCommandList6_IASetPrimitiveTopology(list, args->topology);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_IASetPrimitiveTopology(d3d12_command_list_iface *iface,
|
||||
|
@ -606,7 +607,7 @@ static void d3d12_bundle_exec_om_set_blend_factor(d3d12_command_list_iface *list
|
|||
{
|
||||
const struct d3d12_om_set_blend_factor_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_OMSetBlendFactor(list, args->blend_factor);
|
||||
ID3D12GraphicsCommandList6_OMSetBlendFactor(list, args->blend_factor);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_OMSetBlendFactor(d3d12_command_list_iface *iface,
|
||||
|
@ -634,7 +635,7 @@ static void d3d12_bundle_exec_om_set_stencil_ref(d3d12_command_list_iface *list,
|
|||
{
|
||||
const struct d3d12_om_set_stencil_ref_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_OMSetStencilRef(list, args->stencil_ref);
|
||||
ID3D12GraphicsCommandList6_OMSetStencilRef(list, args->stencil_ref);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_OMSetStencilRef(d3d12_command_list_iface *iface,
|
||||
|
@ -659,7 +660,7 @@ static void d3d12_bundle_exec_set_pipeline_state(d3d12_command_list_iface *list,
|
|||
{
|
||||
const struct d3d12_set_pipeline_state_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetPipelineState(list, args->pipeline_state);
|
||||
ID3D12GraphicsCommandList6_SetPipelineState(list, args->pipeline_state);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetPipelineState(d3d12_command_list_iface *iface,
|
||||
|
@ -703,7 +704,7 @@ static void d3d12_bundle_exec_set_compute_root_signature(d3d12_command_list_ifac
|
|||
{
|
||||
const struct d3d12_set_root_signature_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetComputeRootSignature(list, args->root_signature);
|
||||
ID3D12GraphicsCommandList6_SetComputeRootSignature(list, args->root_signature);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootSignature(d3d12_command_list_iface *iface,
|
||||
|
@ -722,7 +723,7 @@ static void d3d12_bundle_exec_set_graphics_root_signature(d3d12_command_list_ifa
|
|||
{
|
||||
const struct d3d12_set_root_signature_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetGraphicsRootSignature(list, args->root_signature);
|
||||
ID3D12GraphicsCommandList6_SetGraphicsRootSignature(list, args->root_signature);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootSignature(d3d12_command_list_iface *iface,
|
||||
|
@ -748,7 +749,7 @@ static void d3d12_bundle_exec_set_compute_root_descriptor_table(d3d12_command_li
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_table_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetComputeRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
|
||||
ID3D12GraphicsCommandList6_SetComputeRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootDescriptorTable(d3d12_command_list_iface *iface,
|
||||
|
@ -769,7 +770,7 @@ static void d3d12_bundle_exec_set_graphics_root_descriptor_table(d3d12_command_l
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_table_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetGraphicsRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
|
||||
ID3D12GraphicsCommandList6_SetGraphicsRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootDescriptorTable(d3d12_command_list_iface *iface,
|
||||
|
@ -798,7 +799,7 @@ static void d3d12_bundle_exec_set_compute_root_32bit_constant(d3d12_command_list
|
|||
{
|
||||
const struct d3d12_set_root_32bit_constant_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetComputeRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
|
||||
ID3D12GraphicsCommandList6_SetComputeRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRoot32BitConstant(d3d12_command_list_iface *iface,
|
||||
|
@ -820,7 +821,7 @@ static void d3d12_bundle_exec_set_graphics_root_32bit_constant(d3d12_command_lis
|
|||
{
|
||||
const struct d3d12_set_root_32bit_constant_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetGraphicsRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
|
||||
ID3D12GraphicsCommandList6_SetGraphicsRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRoot32BitConstant(d3d12_command_list_iface *iface,
|
||||
|
@ -851,7 +852,7 @@ static void d3d12_bundle_exec_set_compute_root_32bit_constants(d3d12_command_lis
|
|||
{
|
||||
const struct d3d12_set_root_32bit_constants_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetComputeRoot32BitConstants(list, args->parameter_index,
|
||||
ID3D12GraphicsCommandList6_SetComputeRoot32BitConstants(list, args->parameter_index,
|
||||
args->constant_count, args->data, args->offset);
|
||||
}
|
||||
|
||||
|
@ -879,7 +880,7 @@ static void d3d12_bundle_exec_set_graphics_root_32bit_constants(d3d12_command_li
|
|||
{
|
||||
const struct d3d12_set_root_32bit_constants_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetGraphicsRoot32BitConstants(list, args->parameter_index,
|
||||
ID3D12GraphicsCommandList6_SetGraphicsRoot32BitConstants(list, args->parameter_index,
|
||||
args->constant_count, args->data, args->offset);
|
||||
}
|
||||
|
||||
|
@ -914,7 +915,7 @@ static void d3d12_bundle_exec_set_compute_root_cbv(d3d12_command_list_iface *lis
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetComputeRootConstantBufferView(list, args->parameter_index, args->address);
|
||||
ID3D12GraphicsCommandList6_SetComputeRootConstantBufferView(list, args->parameter_index, args->address);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootConstantBufferView(
|
||||
|
@ -935,7 +936,7 @@ static void d3d12_bundle_exec_set_graphics_root_cbv(d3d12_command_list_iface *li
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetGraphicsRootConstantBufferView(list, args->parameter_index, args->address);
|
||||
ID3D12GraphicsCommandList6_SetGraphicsRootConstantBufferView(list, args->parameter_index, args->address);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootConstantBufferView(
|
||||
|
@ -956,7 +957,7 @@ static void d3d12_bundle_exec_set_compute_root_srv(d3d12_command_list_iface *lis
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetComputeRootShaderResourceView(list, args->parameter_index, args->address);
|
||||
ID3D12GraphicsCommandList6_SetComputeRootShaderResourceView(list, args->parameter_index, args->address);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootShaderResourceView(
|
||||
|
@ -977,7 +978,7 @@ static void d3d12_bundle_exec_set_graphics_root_srv(d3d12_command_list_iface *li
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetGraphicsRootShaderResourceView(list, args->parameter_index, args->address);
|
||||
ID3D12GraphicsCommandList6_SetGraphicsRootShaderResourceView(list, args->parameter_index, args->address);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootShaderResourceView(
|
||||
|
@ -998,7 +999,7 @@ static void d3d12_bundle_exec_set_compute_root_uav(d3d12_command_list_iface *lis
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetComputeRootUnorderedAccessView(list, args->parameter_index, args->address);
|
||||
ID3D12GraphicsCommandList6_SetComputeRootUnorderedAccessView(list, args->parameter_index, args->address);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootUnorderedAccessView(
|
||||
|
@ -1019,7 +1020,7 @@ static void d3d12_bundle_exec_set_graphics_root_uav(d3d12_command_list_iface *li
|
|||
{
|
||||
const struct d3d12_set_root_descriptor_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetGraphicsRootUnorderedAccessView(list, args->parameter_index, args->address);
|
||||
ID3D12GraphicsCommandList6_SetGraphicsRootUnorderedAccessView(list, args->parameter_index, args->address);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootUnorderedAccessView(
|
||||
|
@ -1044,14 +1045,14 @@ struct d3d12_ia_set_index_buffer_command
|
|||
|
||||
static void d3d12_bundle_exec_ia_set_index_buffer_null(d3d12_command_list_iface *list, const void *args_v)
|
||||
{
|
||||
ID3D12GraphicsCommandList5_IASetIndexBuffer(list, NULL);
|
||||
ID3D12GraphicsCommandList6_IASetIndexBuffer(list, NULL);
|
||||
}
|
||||
|
||||
static void d3d12_bundle_exec_ia_set_index_buffer(d3d12_command_list_iface *list, const void *args_v)
|
||||
{
|
||||
const struct d3d12_ia_set_index_buffer_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_IASetIndexBuffer(list, &args->view);
|
||||
ID3D12GraphicsCommandList6_IASetIndexBuffer(list, &args->view);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_IASetIndexBuffer(d3d12_command_list_iface *iface,
|
||||
|
@ -1201,7 +1202,7 @@ static void d3d12_bundle_exec_set_marker(d3d12_command_list_iface *list, const v
|
|||
{
|
||||
const struct d3d12_debug_marker_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetMarker(list, args->metadata, args->data, args->data_size);
|
||||
ID3D12GraphicsCommandList6_SetMarker(list, args->metadata, args->data, args->data_size);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetMarker(d3d12_command_list_iface *iface,
|
||||
|
@ -1222,7 +1223,7 @@ static void d3d12_bundle_exec_begin_event(d3d12_command_list_iface *list, const
|
|||
{
|
||||
const struct d3d12_debug_marker_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_BeginEvent(list, args->metadata, args->data, args->data_size);
|
||||
ID3D12GraphicsCommandList6_BeginEvent(list, args->metadata, args->data, args->data_size);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_BeginEvent(d3d12_command_list_iface *iface,
|
||||
|
@ -1241,7 +1242,7 @@ static void STDMETHODCALLTYPE d3d12_bundle_BeginEvent(d3d12_command_list_iface *
|
|||
|
||||
static void d3d12_bundle_exec_end_event(d3d12_command_list_iface *list, const void *args_v)
|
||||
{
|
||||
ID3D12GraphicsCommandList5_EndEvent(list);
|
||||
ID3D12GraphicsCommandList6_EndEvent(list);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_EndEvent(d3d12_command_list_iface *iface)
|
||||
|
@ -1268,7 +1269,7 @@ static void d3d12_bundle_exec_execute_indirect(d3d12_command_list_iface *list, c
|
|||
{
|
||||
const struct d3d12_execute_indirect_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_ExecuteIndirect(list, args->signature, args->max_count,
|
||||
ID3D12GraphicsCommandList6_ExecuteIndirect(list, args->signature, args->max_count,
|
||||
args->arg_buffer, args->arg_offset, args->count_buffer, args->count_offset);
|
||||
}
|
||||
|
||||
|
@ -1330,7 +1331,7 @@ static void d3d12_bundle_exec_om_set_depth_bounds(d3d12_command_list_iface *list
|
|||
{
|
||||
const struct d3d12_om_set_depth_bounds_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_OMSetDepthBounds(list, args->min, args->max);
|
||||
ID3D12GraphicsCommandList6_OMSetDepthBounds(list, args->min, args->max);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_OMSetDepthBounds(d3d12_command_list_iface *iface,
|
||||
|
@ -1359,7 +1360,7 @@ static void d3d12_bundle_exec_set_sample_positions(d3d12_command_list_iface *lis
|
|||
const struct d3d12_set_sample_positions_command *args = args_v;
|
||||
|
||||
/* The sample position array is non-const but does not get written to */
|
||||
ID3D12GraphicsCommandList5_SetSamplePositions(list, args->sample_count,
|
||||
ID3D12GraphicsCommandList6_SetSamplePositions(list, args->sample_count,
|
||||
args->pixel_count, (D3D12_SAMPLE_POSITION*)args->positions);
|
||||
}
|
||||
|
||||
|
@ -1402,7 +1403,7 @@ static void d3d12_bundle_exec_set_view_instance_mask(d3d12_command_list_iface *l
|
|||
{
|
||||
const struct d3d12_set_view_instance_mask_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetViewInstanceMask(list, args->mask);
|
||||
ID3D12GraphicsCommandList6_SetViewInstanceMask(list, args->mask);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetViewInstanceMask(d3d12_command_list_iface *iface, UINT mask)
|
||||
|
@ -1428,7 +1429,7 @@ static void d3d12_bundle_exec_write_buffer_immediate(d3d12_command_list_iface *l
|
|||
{
|
||||
const struct d3d12_write_buffer_immediate_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_WriteBufferImmediate(list, args->count, args->parameters, args->modes);
|
||||
ID3D12GraphicsCommandList6_WriteBufferImmediate(list, args->count, args->parameters, args->modes);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_WriteBufferImmediate(d3d12_command_list_iface *iface,
|
||||
|
@ -1524,7 +1525,7 @@ static void d3d12_bundle_exec_set_pipeline_state1(d3d12_command_list_iface *list
|
|||
{
|
||||
const struct d3d12_set_pipeline_state1_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_SetPipelineState1(list, args->state_object);
|
||||
ID3D12GraphicsCommandList6_SetPipelineState1(list, args->state_object);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_SetPipelineState1(d3d12_command_list_iface *iface,
|
||||
|
@ -1549,7 +1550,7 @@ static void d3d12_bundle_exec_dispatch_rays(d3d12_command_list_iface *list, cons
|
|||
{
|
||||
const struct d3d12_dispatch_rays_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_DispatchRays(list, &args->desc);
|
||||
ID3D12GraphicsCommandList6_DispatchRays(list, &args->desc);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_DispatchRays(d3d12_command_list_iface *iface,
|
||||
|
@ -1575,14 +1576,14 @@ static void d3d12_bundle_exec_rs_set_shading_rate(d3d12_command_list_iface *list
|
|||
{
|
||||
const struct d3d12_rs_set_shading_rate_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_RSSetShadingRate(list, args->base, args->combiners);
|
||||
ID3D12GraphicsCommandList6_RSSetShadingRate(list, args->base, args->combiners);
|
||||
}
|
||||
|
||||
static void d3d12_bundle_exec_rs_set_shading_rate_base(d3d12_command_list_iface *list, const void *args_v)
|
||||
{
|
||||
const struct d3d12_rs_set_shading_rate_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_RSSetShadingRate(list, args->base, NULL);
|
||||
ID3D12GraphicsCommandList6_RSSetShadingRate(list, args->base, NULL);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_RSSetShadingRate(d3d12_command_list_iface *iface,
|
||||
|
@ -1612,7 +1613,7 @@ static void d3d12_bundle_exec_rs_set_shading_rate_image(d3d12_command_list_iface
|
|||
{
|
||||
const struct d3d12_rs_set_shading_rate_image_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList5_RSSetShadingRateImage(list, args->image);
|
||||
ID3D12GraphicsCommandList6_RSSetShadingRateImage(list, args->image);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_RSSetShadingRateImage(d3d12_command_list_iface *iface,
|
||||
|
@ -1627,7 +1628,27 @@ static void STDMETHODCALLTYPE d3d12_bundle_RSSetShadingRateImage(d3d12_command_l
|
|||
args->image = image;
|
||||
}
|
||||
|
||||
static CONST_VTBL struct ID3D12GraphicsCommandList5Vtbl d3d12_bundle_vtbl =
|
||||
static void d3d12_bundle_exec_dispatch_mesh(d3d12_command_list_iface *list, const void *args_v)
|
||||
{
|
||||
const struct d3d12_dispatch_command *args = args_v;
|
||||
|
||||
ID3D12GraphicsCommandList6_DispatchMesh(list, args->x, args->y, args->z);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_bundle_DispatchMesh(d3d12_command_list_iface *iface, UINT x, UINT y, UINT z)
|
||||
{
|
||||
struct d3d12_bundle *bundle = impl_from_ID3D12GraphicsCommandList(iface);
|
||||
struct d3d12_dispatch_command *args;
|
||||
|
||||
TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z);
|
||||
|
||||
args = d3d12_bundle_add_command(bundle, &d3d12_bundle_exec_dispatch_mesh, sizeof(*args));
|
||||
args->x = x;
|
||||
args->y = y;
|
||||
args->z = z;
|
||||
}
|
||||
|
||||
static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_bundle_vtbl =
|
||||
{
|
||||
/* IUnknown methods */
|
||||
d3d12_bundle_QueryInterface,
|
||||
|
@ -1718,6 +1739,8 @@ static CONST_VTBL struct ID3D12GraphicsCommandList5Vtbl d3d12_bundle_vtbl =
|
|||
/* ID3D12GraphicsCommandList5 methods */
|
||||
d3d12_bundle_RSSetShadingRate,
|
||||
d3d12_bundle_RSSetShadingRateImage,
|
||||
/* ID3D12GraphicsCommandList6 methods */
|
||||
d3d12_bundle_DispatchMesh,
|
||||
};
|
||||
|
||||
HRESULT d3d12_bundle_create(struct d3d12_device *device,
|
||||
|
|
2969
libs/vkd3d/cache.c
2969
libs/vkd3d/cache.c
File diff suppressed because it is too large
Load Diff
4970
libs/vkd3d/command.c
4970
libs/vkd3d/command.c
File diff suppressed because it is too large
Load Diff
|
@ -459,7 +459,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage_profiled(
|
|||
COMMAND_LIST_PROFILED_CALL(RSSetShadingRateImage, iface, image);
|
||||
}
|
||||
|
||||
static CONST_VTBL struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl_profiled =
|
||||
static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh_profiled(d3d12_command_list_iface *iface, UINT x, UINT y, UINT z)
|
||||
{
|
||||
COMMAND_LIST_PROFILED_CALL(DispatchMesh, iface, x, y, z);
|
||||
}
|
||||
|
||||
static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl_profiled =
|
||||
{
|
||||
/* IUnknown methods */
|
||||
d3d12_command_list_QueryInterface,
|
||||
|
@ -550,6 +555,8 @@ static CONST_VTBL struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl_
|
|||
/* ID3D12GraphicsCommandList5 methods */
|
||||
d3d12_command_list_RSSetShadingRate_profiled,
|
||||
d3d12_command_list_RSSetShadingRateImage_profiled,
|
||||
/* ID3D12GraphicsCommandList6 methods */
|
||||
d3d12_command_list_DispatchMesh_profiled,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "vkd3d_private.h"
|
||||
#include "vkd3d_debug.h"
|
||||
#include "vkd3d_common.h"
|
||||
#include "vkd3d_platform.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
|
||||
|
@ -53,22 +54,199 @@ void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
|
|||
info->map_entries[3].size = sizeof(uint32_t);
|
||||
}
|
||||
|
||||
#define READ_RING_WORD(off) ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)]
|
||||
#define READ_RING_WORD_ACQUIRE(off) \
|
||||
vkd3d_atomic_uint32_load_explicit(&ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)], \
|
||||
vkd3d_memory_order_acquire)
|
||||
#define DEBUG_CHANNEL_WORD_COOKIE 0xdeadca70u
|
||||
#define DEBUG_CHANNEL_WORD_MASK 0xfffffff0u
|
||||
|
||||
static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32: return "RootConst";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO: return "IBO VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI: return "IBO VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE: return "IBO Size";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT: return "IBO Type";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO: return "VBO VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI: return "VBO VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE: return "VBO Size";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE: return "VBO Stride";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO: return "ROOT VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI: return "ROOT VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT: return "Vertex Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT: return "Index Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT: return "Instance Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX: return "First Index";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX: return "First Vertex";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE: return "First Instance";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET: return "Vertex Offset";
|
||||
default: return "???";
|
||||
}
|
||||
}
|
||||
|
||||
static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
|
||||
uint32_t word_offset, uint32_t message_word_count)
|
||||
{
|
||||
uint32_t i, debug_instance, debug_thread_id[3], fmt;
|
||||
char message_buffer[4096];
|
||||
uint64_t shader_hash;
|
||||
size_t len, avail;
|
||||
|
||||
if (message_word_count < 8)
|
||||
{
|
||||
ERR("Message word count %u is invalid.\n", message_word_count);
|
||||
return false;
|
||||
}
|
||||
|
||||
shader_hash = (uint64_t)READ_RING_WORD(word_offset + 1) | ((uint64_t)READ_RING_WORD(word_offset + 2) << 32);
|
||||
debug_instance = READ_RING_WORD(word_offset + 3);
|
||||
for (i = 0; i < 3; i++)
|
||||
debug_thread_id[i] = READ_RING_WORD(word_offset + 4 + i);
|
||||
fmt = READ_RING_WORD(word_offset + 7);
|
||||
|
||||
word_offset += 8;
|
||||
message_word_count -= 8;
|
||||
|
||||
if (shader_hash == 0)
|
||||
{
|
||||
/* We got this from our internal debug shaders. Pretty-print.
|
||||
* Make sure the log is sortable for easier debug.
|
||||
* TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
|
||||
* but that is overengineering at this time ... */
|
||||
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
|
||||
debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
if (message_word_count == 2)
|
||||
{
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
snprintf(message_buffer + len, avail, "DrawCount %u, MaxDrawCount %u",
|
||||
READ_RING_WORD(word_offset + 0),
|
||||
READ_RING_WORD(word_offset + 1));
|
||||
}
|
||||
else if (message_word_count == 4)
|
||||
{
|
||||
union { uint32_t u32; float f32; int32_t s32; } value;
|
||||
enum vkd3d_patch_command_token token;
|
||||
uint32_t dst_offset;
|
||||
uint32_t src_offset;
|
||||
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
token = READ_RING_WORD(word_offset + 0);
|
||||
dst_offset = READ_RING_WORD(word_offset + 1);
|
||||
src_offset = READ_RING_WORD(word_offset + 2);
|
||||
value.u32 = READ_RING_WORD(word_offset + 3);
|
||||
|
||||
if (vkd3d_patch_command_token_is_hex(token))
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- #%08x",
|
||||
vkd3d_patch_command_token_str(token), value.u32);
|
||||
}
|
||||
else if (token == VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32)
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- {hex #%08x, s32 %d, f32 %f}",
|
||||
vkd3d_patch_command_token_str(token), value.u32, value.s32, value.f32);
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- %d",
|
||||
vkd3d_patch_command_token_str(token), value.s32);
|
||||
}
|
||||
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
snprintf(message_buffer + len, avail, " (dst offset %u, src offset %u)", dst_offset, src_offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %010u, ID (%u, %u, %u):",
|
||||
shader_hash, debug_instance,
|
||||
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
for (i = 0; i < message_word_count; i++)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
int32_t i32;
|
||||
} u;
|
||||
const char *delim;
|
||||
u.u32 = READ_RING_WORD(word_offset + i);
|
||||
|
||||
len = strlen(message_buffer);
|
||||
if (len + 1 >= sizeof(message_buffer))
|
||||
break;
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
delim = i == 0 ? " " : ", ";
|
||||
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
|
||||
switch ((fmt >> (2u * i)) & 3u)
|
||||
{
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
|
||||
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_I32:
|
||||
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_F32:
|
||||
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
|
||||
break;
|
||||
|
||||
default:
|
||||
snprintf(message_buffer + len, avail, "%s????", delim);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INFO("%s\n", message_buffer);
|
||||
return true;
|
||||
}
|
||||
|
||||
void *vkd3d_shader_debug_ring_thread_main(void *arg)
|
||||
{
|
||||
uint32_t last_counter, new_counter, count, i, j, message_word_count, debug_instance, debug_thread_id[3], fmt;
|
||||
uint32_t last_counter, new_counter, count, i, cookie_word_count;
|
||||
volatile const uint32_t *ring_counter; /* Atomic updated by the GPU. */
|
||||
struct vkd3d_shader_debug_ring *ring;
|
||||
struct d3d12_device *device = arg;
|
||||
const uint32_t *ring_counter;
|
||||
const uint32_t *ring_base;
|
||||
char message_buffer[4096];
|
||||
bool is_active = true;
|
||||
uint64_t shader_hash;
|
||||
uint32_t *ring_base;
|
||||
uint32_t word_count;
|
||||
size_t ring_mask;
|
||||
|
||||
ring = &device->debug_ring;
|
||||
ring_mask = ring->ring_size - 1;
|
||||
ring_counter = ring->mapped;
|
||||
ring_base = ring_counter + (ring->ring_offset / sizeof(uint32_t));
|
||||
ring_mask = (ring->ring_size / sizeof(uint32_t)) - 1;
|
||||
ring_counter = ring->mapped_control_block;
|
||||
ring_base = ring->mapped_ring;
|
||||
last_counter = 0;
|
||||
|
||||
vkd3d_set_thread_name("debug-ring");
|
||||
|
@ -82,88 +260,93 @@ void *vkd3d_shader_debug_ring_thread_main(void *arg)
|
|||
pthread_mutex_unlock(&ring->ring_lock);
|
||||
|
||||
new_counter = *ring_counter;
|
||||
|
||||
if (last_counter != new_counter)
|
||||
{
|
||||
count = (new_counter - last_counter) & ring_mask;
|
||||
|
||||
/* Assume that each iteration can safely use 1/4th of the buffer to avoid WAR hazards. */
|
||||
if ((new_counter - last_counter) > (ring->ring_size / 16))
|
||||
if (count > (ring->ring_size / 16))
|
||||
{
|
||||
ERR("Debug ring is probably too small (%u new words this iteration), increase size to avoid risk of dropping messages.\n",
|
||||
new_counter - last_counter);
|
||||
count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count; )
|
||||
{
|
||||
#define READ_RING_WORD(off) ring_base[((off) + i + last_counter) & ring_mask]
|
||||
message_word_count = READ_RING_WORD(0);
|
||||
if (i + message_word_count > count)
|
||||
break;
|
||||
if (message_word_count < 8 || message_word_count > 16 + 8)
|
||||
break;
|
||||
/* The debug ring shader has "release" semantics for the word count write,
|
||||
* so just make sure the reads don't get reordered here. */
|
||||
cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
|
||||
word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
|
||||
|
||||
shader_hash = (uint64_t)READ_RING_WORD(1) | ((uint64_t)READ_RING_WORD(2) << 32);
|
||||
debug_instance = READ_RING_WORD(3);
|
||||
for (j = 0; j < 3; j++)
|
||||
debug_thread_id[j] = READ_RING_WORD(4 + j);
|
||||
fmt = READ_RING_WORD(7);
|
||||
|
||||
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %u, ID (%u, %u, %u):",
|
||||
shader_hash, debug_instance,
|
||||
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
i += 8;
|
||||
message_word_count -= 8;
|
||||
|
||||
for (j = 0; j < message_word_count; j++)
|
||||
if (cookie_word_count == 0)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
int32_t i32;
|
||||
} u;
|
||||
const char *delim;
|
||||
size_t len, avail;
|
||||
u.u32 = READ_RING_WORD(j);
|
||||
|
||||
len = strlen(message_buffer);
|
||||
if (len + 1 >= sizeof(message_buffer))
|
||||
break;
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
delim = j == 0 ? " " : ", ";
|
||||
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
|
||||
switch ((fmt >> (2u * j)) & 3u)
|
||||
{
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
|
||||
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_I32:
|
||||
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_F32:
|
||||
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
|
||||
break;
|
||||
|
||||
default:
|
||||
snprintf(message_buffer + len, avail, "%s????", delim);
|
||||
break;
|
||||
}
|
||||
ERR("Message was allocated, but write did not complete. last_counter = %u, rewrite new_counter = %u -> %u\n",
|
||||
last_counter, new_counter, last_counter + i);
|
||||
/* Rewind the counter, and try again later. */
|
||||
new_counter = last_counter + i;
|
||||
break;
|
||||
}
|
||||
|
||||
INFO("%s\n", message_buffer);
|
||||
/* If something is written here, it must be a cookie. */
|
||||
if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) != DEBUG_CHANNEL_WORD_COOKIE)
|
||||
{
|
||||
ERR("Invalid message work cookie detected, 0x%x.\n", cookie_word_count);
|
||||
break;
|
||||
}
|
||||
|
||||
#undef READ_RING_WORD
|
||||
i += message_word_count;
|
||||
if (i + word_count > count)
|
||||
{
|
||||
ERR("Message word count %u is out of bounds (i = %u, count = %u).\n",
|
||||
word_count, i, count);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
|
||||
break;
|
||||
|
||||
i += word_count;
|
||||
}
|
||||
}
|
||||
last_counter = new_counter;
|
||||
|
||||
/* Make sure to clear out any messages we read so that when the ring gets around to
|
||||
* this point again, we can detect unwritten memory.
|
||||
* This relies on having a ring that is large enough, but in practice, if we just make the ring
|
||||
* large enough, there is nothing to worry about. */
|
||||
while (last_counter != new_counter)
|
||||
{
|
||||
ring_base[last_counter & ring_mask] = 0;
|
||||
last_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
if (ring->device_lost)
|
||||
{
|
||||
INFO("Device lost detected, attempting to fish for clues.\n");
|
||||
new_counter = *ring_counter;
|
||||
if (last_counter != new_counter)
|
||||
{
|
||||
count = (new_counter - last_counter) & ring_mask;
|
||||
for (i = 0; i < count; )
|
||||
{
|
||||
cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
|
||||
word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
|
||||
|
||||
/* This is considered a message if it has the marker and a word count that is in-range. */
|
||||
if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) == DEBUG_CHANNEL_WORD_COOKIE &&
|
||||
i + word_count <= count &&
|
||||
vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
|
||||
{
|
||||
i += word_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Keep going. */
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
INFO("Done fishing for clues ...\n");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -174,20 +357,21 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
|
|||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
D3D12_HEAP_PROPERTIES heap_properties;
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
const char *env;
|
||||
D3D12_RESOURCE_DESC1 resource_desc;
|
||||
VkMemoryPropertyFlags memory_props;
|
||||
char env[VKD3D_PATH_MAX];
|
||||
|
||||
memset(ring, 0, sizeof(*ring));
|
||||
if (!(env = getenv("VKD3D_SHADER_DEBUG_RING_SIZE_LOG2")))
|
||||
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DEBUG_RING_SIZE_LOG2", env, sizeof(env)))
|
||||
return S_OK;
|
||||
|
||||
ring->active = true;
|
||||
|
||||
ring->ring_size = (size_t)1 << strtoul(env, NULL, 0);
|
||||
// Reserve 4k to be used as a control block of some sort.
|
||||
ring->ring_offset = 4096;
|
||||
ring->control_block_size = 4096;
|
||||
|
||||
WARN("Enabling shader debug ring of size: %zu.\n", ring->ring_size);
|
||||
INFO("Enabling shader debug ring of size: %zu.\n", ring->ring_size);
|
||||
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
{
|
||||
|
@ -201,7 +385,7 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
|
|||
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
|
||||
memset(&resource_desc, 0, sizeof(resource_desc));
|
||||
resource_desc.Width = ring->ring_offset + ring->ring_size;
|
||||
resource_desc.Width = ring->ring_size;
|
||||
resource_desc.Height = 1;
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.MipLevels = 1;
|
||||
|
@ -212,33 +396,71 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
|
|||
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
|
||||
if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
|
||||
&resource_desc, &ring->host_buffer)))
|
||||
&resource_desc, &ring->host_buffer)))
|
||||
goto err_free_buffers;
|
||||
|
||||
memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
/* If we're doing breadcrumb debugging, we also need to be able to read debug ring messages
|
||||
* from a crash, so we cannot rely on being able to copy the device payload back to host.
|
||||
* Use PCI-e BAR + UNCACHED + DEVICE_COHERENT if we must. */
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
|
||||
{
|
||||
INFO("Using debug ring with breadcrumbs, opting in to device uncached payload buffer.\n");
|
||||
/* We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
|
||||
* host reads, so make extra sure. */
|
||||
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
|
||||
{
|
||||
memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
|
||||
INFO("Enabling uncached device memory for debug ring.\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (FAILED(vkd3d_allocate_buffer_memory(device, ring->host_buffer,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||
&ring->host_buffer_memory)))
|
||||
memory_props, &ring->host_buffer_memory)))
|
||||
goto err_free_buffers;
|
||||
|
||||
ring->ring_device_address = vkd3d_get_buffer_device_address(device, ring->host_buffer) + ring->ring_offset;
|
||||
|
||||
resource_desc.Width = ring->ring_offset;
|
||||
resource_desc.Width = ring->control_block_size;
|
||||
memset(&heap_properties, 0, sizeof(heap_properties));
|
||||
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
|
||||
if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
|
||||
&resource_desc, &ring->device_atomic_buffer)))
|
||||
&resource_desc, &ring->device_atomic_buffer)))
|
||||
goto err_free_buffers;
|
||||
|
||||
memory_props = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
|
||||
{
|
||||
/* Expect crashes since we won't have time to flush caches.
|
||||
* We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
|
||||
* host reads, so make extra sure. */
|
||||
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
|
||||
memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
|
||||
}
|
||||
|
||||
if (FAILED(vkd3d_allocate_buffer_memory(device, ring->device_atomic_buffer,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &ring->device_atomic_buffer_memory)))
|
||||
memory_props, &ring->device_atomic_buffer_memory)))
|
||||
goto err_free_buffers;
|
||||
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, ring->host_buffer_memory, 0, VK_WHOLE_SIZE, 0, &ring->mapped)) != VK_SUCCESS)
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, ring->host_buffer_memory.vk_memory,
|
||||
0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_ring)) != VK_SUCCESS)
|
||||
goto err_free_buffers;
|
||||
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, ring->device_atomic_buffer_memory.vk_memory,
|
||||
0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_control_block)) != VK_SUCCESS)
|
||||
goto err_free_buffers;
|
||||
|
||||
ring->ring_device_address = vkd3d_get_buffer_device_address(device, ring->host_buffer);
|
||||
ring->atomic_device_address = vkd3d_get_buffer_device_address(device, ring->device_atomic_buffer);
|
||||
|
||||
memset(ring->mapped_control_block, 0, ring->control_block_size);
|
||||
memset(ring->mapped_ring, 0, ring->ring_size);
|
||||
|
||||
if (pthread_mutex_init(&ring->ring_lock, NULL) != 0)
|
||||
goto err_free_buffers;
|
||||
if (pthread_cond_init(&ring->ring_cond, NULL) != 0)
|
||||
|
@ -259,8 +481,8 @@ err_destroy_cond:
|
|||
err_free_buffers:
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, ring->host_buffer, NULL));
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, ring->device_atomic_buffer, NULL));
|
||||
VK_CALL(vkFreeMemory(device->vk_device, ring->host_buffer_memory, NULL));
|
||||
VK_CALL(vkFreeMemory(device->vk_device, ring->device_atomic_buffer_memory, NULL));
|
||||
vkd3d_free_device_memory(device, &ring->host_buffer_memory);
|
||||
vkd3d_free_device_memory(device, &ring->device_atomic_buffer_memory);
|
||||
memset(ring, 0, sizeof(*ring));
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
@ -282,38 +504,28 @@ void vkd3d_shader_debug_ring_cleanup(struct vkd3d_shader_debug_ring *ring,
|
|||
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, ring->host_buffer, NULL));
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, ring->device_atomic_buffer, NULL));
|
||||
VK_CALL(vkFreeMemory(device->vk_device, ring->host_buffer_memory, NULL));
|
||||
VK_CALL(vkFreeMemory(device->vk_device, ring->device_atomic_buffer_memory, NULL));
|
||||
vkd3d_free_device_memory(device, &ring->host_buffer_memory);
|
||||
vkd3d_free_device_memory(device, &ring->device_atomic_buffer_memory);
|
||||
}
|
||||
|
||||
void vkd3d_shader_debug_ring_end_command_buffer(struct d3d12_command_list *list)
|
||||
static pthread_mutex_t debug_ring_teardown_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
void vkd3d_shader_debug_ring_kick(struct vkd3d_shader_debug_ring *ring, struct d3d12_device *device, bool device_lost)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
VkBufferCopy buffer_copy;
|
||||
VkMemoryBarrier barrier;
|
||||
|
||||
if (list->device->debug_ring.active &&
|
||||
list->has_replaced_shaders &&
|
||||
(list->type == D3D12_COMMAND_LIST_TYPE_DIRECT || list->type == D3D12_COMMAND_LIST_TYPE_COMPUTE))
|
||||
if (device_lost)
|
||||
{
|
||||
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
|
||||
barrier.pNext = NULL;
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
|
||||
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
1, &barrier, 0, NULL, 0, NULL));
|
||||
|
||||
buffer_copy.size = list->device->debug_ring.ring_offset;
|
||||
buffer_copy.dstOffset = 0;
|
||||
buffer_copy.srcOffset = 0;
|
||||
|
||||
VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
|
||||
list->device->debug_ring.device_atomic_buffer,
|
||||
list->device->debug_ring.host_buffer,
|
||||
1, &buffer_copy));
|
||||
|
||||
/* Host barrier is taken care of automatically. */
|
||||
/* Need a global lock here since multiple threads can observe device lost at the same time. */
|
||||
pthread_mutex_lock(&debug_ring_teardown_lock);
|
||||
{
|
||||
ring->device_lost = true;
|
||||
/* We're going to die or hang after this most likely, so make sure we get to see all messages the
|
||||
* GPU had to write. Just cleanup now. */
|
||||
vkd3d_shader_debug_ring_cleanup(ring, device);
|
||||
}
|
||||
pthread_mutex_unlock(&debug_ring_teardown_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
pthread_cond_signal(&ring->ring_cond);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ struct vkd3d_descriptor_qa_global_info
|
|||
struct vkd3d_descriptor_qa_global_buffer_data *data;
|
||||
VkDescriptorBufferInfo descriptor;
|
||||
VkBuffer vk_buffer;
|
||||
VkDeviceMemory vk_memory;
|
||||
struct vkd3d_device_memory_allocation device_allocation;
|
||||
unsigned int num_cookies;
|
||||
|
||||
pthread_t ring_thread;
|
||||
|
@ -76,10 +76,10 @@ static const char *debug_descriptor_type(vkd3d_descriptor_qa_flags type_flags)
|
|||
|
||||
static void vkd3d_descriptor_debug_init_once(void)
|
||||
{
|
||||
const char *env;
|
||||
char env[VKD3D_PATH_MAX];
|
||||
vkd3d_get_env_var("VKD3D_DESCRIPTOR_QA_LOG", env, sizeof(env));
|
||||
|
||||
env = getenv("VKD3D_DESCRIPTOR_QA_LOG");
|
||||
if (env)
|
||||
if (strlen(env) > 0)
|
||||
{
|
||||
INFO("Enabling VKD3D_DESCRIPTOR_QA_LOG\n");
|
||||
descriptor_debug_file = fopen(env, "w");
|
||||
|
@ -197,7 +197,7 @@ HRESULT vkd3d_descriptor_debug_alloc_global_info(
|
|||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
struct vkd3d_descriptor_qa_global_info *global_info;
|
||||
D3D12_RESOURCE_DESC buffer_desc;
|
||||
D3D12_RESOURCE_DESC1 buffer_desc;
|
||||
D3D12_HEAP_PROPERTIES heap_info;
|
||||
D3D12_HEAP_FLAGS heap_flags;
|
||||
VkResult vr;
|
||||
|
@ -232,13 +232,13 @@ HRESULT vkd3d_descriptor_debug_alloc_global_info(
|
|||
|
||||
if (FAILED(hr = vkd3d_allocate_buffer_memory(device, global_info->vk_buffer,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
&global_info->vk_memory)))
|
||||
&global_info->device_allocation)))
|
||||
{
|
||||
vkd3d_descriptor_debug_free_global_info(global_info, device);
|
||||
return hr;
|
||||
}
|
||||
|
||||
if ((vr = VK_CALL(vkMapMemory(device->vk_device, global_info->vk_memory,
|
||||
if ((vr = VK_CALL(vkMapMemory(device->vk_device, global_info->device_allocation.vk_memory,
|
||||
0, VK_WHOLE_SIZE, 0, (void**)&global_info->data))))
|
||||
{
|
||||
ERR("Failed to map buffer, vr %d.\n", vr);
|
||||
|
@ -289,7 +289,7 @@ void vkd3d_descriptor_debug_free_global_info(
|
|||
pthread_cond_destroy(&global_info->ring_cond);
|
||||
}
|
||||
|
||||
VK_CALL(vkFreeMemory(device->vk_device, global_info->vk_memory, NULL));
|
||||
vkd3d_free_device_memory(device, &global_info->device_allocation);
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, global_info->vk_buffer, NULL));
|
||||
vkd3d_free(global_info);
|
||||
}
|
||||
|
@ -395,7 +395,7 @@ void vkd3d_descriptor_debug_unregister_heap(uint64_t cookie)
|
|||
}
|
||||
|
||||
void vkd3d_descriptor_debug_register_resource_cookie(struct vkd3d_descriptor_qa_global_info *global_info,
|
||||
uint64_t cookie, const D3D12_RESOURCE_DESC *desc)
|
||||
uint64_t cookie, const D3D12_RESOURCE_DESC1 *desc)
|
||||
{
|
||||
const char *fmt;
|
||||
DECL_BUFFER();
|
||||
|
@ -453,7 +453,7 @@ void vkd3d_descriptor_debug_register_allocation_cookie(
|
|||
struct vkd3d_descriptor_qa_global_info *global_info,
|
||||
uint64_t cookie, const struct vkd3d_allocate_memory_info *info)
|
||||
{
|
||||
D3D12_RESOURCE_DESC desc;
|
||||
D3D12_RESOURCE_DESC1 desc;
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
|
|
2338
libs/vkd3d/device.c
2338
libs/vkd3d/device.c
File diff suppressed because it is too large
Load Diff
|
@ -215,7 +215,31 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState_profiled(d3d12
|
|||
DEVICE_PROFILED_CALL_HRESULT(CreatePipelineState, iface, desc, riid, pipeline_state);
|
||||
}
|
||||
|
||||
static CONST_VTBL struct ID3D12Device6Vtbl d3d12_device_vtbl_profiled =
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2_profiled(d3d12_device_iface *iface,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc,
|
||||
D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
|
||||
ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource)
|
||||
{
|
||||
DEVICE_PROFILED_CALL_HRESULT(CreateCommittedResource2, iface, heap_properties, heap_flags,
|
||||
desc, initial_state, optimized_clear_value, protected_session, iid, resource);
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1_profiled(d3d12_device_iface *iface,
|
||||
ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *desc,
|
||||
D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
|
||||
REFIID iid, void **resource)
|
||||
{
|
||||
DEVICE_PROFILED_CALL_HRESULT(CreatePlacedResource1, iface, heap, heap_offset,
|
||||
desc, initial_state, optimized_clear_value, iid, resource);
|
||||
}
|
||||
|
||||
static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView_profiled(d3d12_device_iface *iface,
|
||||
ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
|
||||
{
|
||||
DEVICE_PROFILED_CALL(CreateSamplerFeedbackUnorderedAccessView, iface, target_resource, feedback_resource, descriptor);
|
||||
}
|
||||
|
||||
CONST_VTBL struct ID3D12Device9Vtbl d3d12_device_vtbl_profiled =
|
||||
{
|
||||
/* IUnknown methods */
|
||||
d3d12_device_QueryInterface,
|
||||
|
@ -292,6 +316,19 @@ static CONST_VTBL struct ID3D12Device6Vtbl d3d12_device_vtbl_profiled =
|
|||
d3d12_device_CheckDriverMatchingIdentifier,
|
||||
/* ID3D12Device6 methods */
|
||||
d3d12_device_SetBackgroundProcessingMode,
|
||||
/* ID3D12Device7 methods */
|
||||
d3d12_device_AddToStateObject,
|
||||
d3d12_device_CreateProtectedResourceSession1,
|
||||
/* ID3D12Device8 methods */
|
||||
d3d12_device_GetResourceAllocationInfo2,
|
||||
d3d12_device_CreateCommittedResource2_profiled,
|
||||
d3d12_device_CreatePlacedResource1_profiled,
|
||||
d3d12_device_CreateSamplerFeedbackUnorderedAccessView_profiled,
|
||||
d3d12_device_GetCopyableFootprints1,
|
||||
/* ID3D12Device9 methods */
|
||||
d3d12_device_CreateShaderCacheSession,
|
||||
d3d12_device_ShaderCacheControl,
|
||||
d3d12_device_CreateCommandQueue1,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -154,20 +154,22 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3
|
|||
{
|
||||
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
struct d3d12_desc *sampler_desc;
|
||||
struct d3d12_desc_split sampler_desc;
|
||||
struct d3d12_desc_split srv_desc;
|
||||
struct d3d12_device *device;
|
||||
struct d3d12_desc *srv_desc;
|
||||
|
||||
TRACE("iface %p, srv_handle %#x, sampler_handle %#x, cuda_texture_handle %p.\n", iface, srv_handle, sampler_handle, cuda_texture_handle);
|
||||
|
||||
TRACE("iface %p, srv_handle %zu, sampler_handle %zu, cuda_texture_handle %p.\n",
|
||||
iface, srv_handle.ptr, sampler_handle.ptr, cuda_texture_handle);
|
||||
|
||||
if (!cuda_texture_handle)
|
||||
return E_INVALIDARG;
|
||||
|
||||
device = d3d12_device_from_ID3D12DeviceExt(iface);
|
||||
srv_desc = d3d12_desc_from_cpu_handle(srv_handle);
|
||||
sampler_desc = d3d12_desc_from_cpu_handle(sampler_handle);
|
||||
srv_desc = d3d12_desc_decode_va(srv_handle.ptr);
|
||||
sampler_desc = d3d12_desc_decode_va(sampler_handle.ptr);
|
||||
|
||||
imageViewHandleInfo.imageView = srv_desc->info.view->vk_image_view;
|
||||
imageViewHandleInfo.sampler = sampler_desc->info.view->vk_sampler;
|
||||
imageViewHandleInfo.imageView = srv_desc.view->info.view->vk_image_view;
|
||||
imageViewHandleInfo.sampler = sampler_desc.view->info.view->vk_sampler;
|
||||
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
|
||||
vk_procs = &device->vk_procs;
|
||||
|
@ -180,17 +182,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3
|
|||
{
|
||||
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
struct d3d12_desc_split uav_desc;
|
||||
struct d3d12_device *device;
|
||||
struct d3d12_desc *uav_desc;
|
||||
|
||||
TRACE("iface %p, uav_handle %#x, cuda_surface_handle %p.\n", iface, uav_handle, cuda_surface_handle);
|
||||
|
||||
TRACE("iface %p, uav_handle %zu, cuda_surface_handle %p.\n", iface, uav_handle.ptr, cuda_surface_handle);
|
||||
if (!cuda_surface_handle)
|
||||
return E_INVALIDARG;
|
||||
|
||||
device = d3d12_device_from_ID3D12DeviceExt(iface);
|
||||
uav_desc = d3d12_desc_from_cpu_handle(uav_handle);
|
||||
uav_desc = d3d12_desc_decode_va(uav_handle.ptr);
|
||||
|
||||
imageViewHandleInfo.imageView = uav_desc->info.view->vk_image_view;
|
||||
imageViewHandleInfo.imageView = uav_desc.view->info.view->vk_image_view;
|
||||
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
|
||||
vk_procs = &device->vk_procs;
|
||||
|
|
|
@ -23,11 +23,6 @@
|
|||
#include "vkd3d_private.h"
|
||||
|
||||
/* ID3D12Heap */
|
||||
static inline struct d3d12_heap *impl_from_ID3D12Heap(d3d12_heap_iface *iface)
|
||||
{
|
||||
return CONTAINING_RECORD(iface, struct d3d12_heap, ID3D12Heap_iface);
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_heap_QueryInterface(d3d12_heap_iface *iface,
|
||||
REFIID iid, void **object)
|
||||
{
|
||||
|
@ -53,7 +48,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_QueryInterface(d3d12_heap_iface *ifa
|
|||
|
||||
static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(d3d12_heap_iface *iface)
|
||||
{
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
|
||||
ULONG refcount = InterlockedIncrement(&heap->refcount);
|
||||
|
||||
TRACE("%p increasing refcount to %u.\n", heap, refcount);
|
||||
|
@ -73,13 +68,13 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
|
|||
static void d3d12_heap_set_name(struct d3d12_heap *heap, const char *name)
|
||||
{
|
||||
if (!heap->allocation.chunk)
|
||||
vkd3d_set_vk_object_name(heap->device, (uint64_t)heap->allocation.vk_memory,
|
||||
vkd3d_set_vk_object_name(heap->device, (uint64_t)heap->allocation.device_allocation.vk_memory,
|
||||
VK_OBJECT_TYPE_DEVICE_MEMORY, name);
|
||||
}
|
||||
|
||||
static ULONG STDMETHODCALLTYPE d3d12_heap_Release(d3d12_heap_iface *iface)
|
||||
{
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
|
||||
ULONG refcount = InterlockedDecrement(&heap->refcount);
|
||||
|
||||
TRACE("%p decreasing refcount to %u.\n", heap, refcount);
|
||||
|
@ -93,7 +88,7 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_Release(d3d12_heap_iface *iface)
|
|||
static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(d3d12_heap_iface *iface,
|
||||
REFGUID guid, UINT *data_size, void *data)
|
||||
{
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
|
||||
|
||||
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
||||
|
||||
|
@ -103,7 +98,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(d3d12_heap_iface *ifa
|
|||
static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateData(d3d12_heap_iface *iface,
|
||||
REFGUID guid, UINT data_size, const void *data)
|
||||
{
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
|
||||
|
||||
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
|
||||
|
||||
|
@ -114,7 +109,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateData(d3d12_heap_iface *ifa
|
|||
static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateDataInterface(d3d12_heap_iface *iface,
|
||||
REFGUID guid, const IUnknown *data)
|
||||
{
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
|
||||
|
||||
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
|
||||
|
||||
|
@ -124,7 +119,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateDataInterface(d3d12_heap_i
|
|||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_heap_GetDevice(d3d12_heap_iface *iface, REFIID iid, void **device)
|
||||
{
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
|
||||
|
||||
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
|
||||
|
||||
|
@ -134,7 +129,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_GetDevice(d3d12_heap_iface *iface, R
|
|||
static D3D12_HEAP_DESC * STDMETHODCALLTYPE d3d12_heap_GetDesc(d3d12_heap_iface *iface,
|
||||
D3D12_HEAP_DESC *desc)
|
||||
{
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
|
||||
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
|
||||
|
||||
TRACE("iface %p, desc %p.\n", iface, desc);
|
||||
|
||||
|
@ -150,7 +145,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_GetProtectedResourceSession(d3d12_he
|
|||
return E_NOTIMPL;
|
||||
}
|
||||
|
||||
static CONST_VTBL struct ID3D12Heap1Vtbl d3d12_heap_vtbl =
|
||||
CONST_VTBL struct ID3D12Heap1Vtbl d3d12_heap_vtbl =
|
||||
{
|
||||
/* IUnknown methods */
|
||||
d3d12_heap_QueryInterface,
|
||||
|
@ -169,21 +164,33 @@ static CONST_VTBL struct ID3D12Heap1Vtbl d3d12_heap_vtbl =
|
|||
d3d12_heap_GetProtectedResourceSession,
|
||||
};
|
||||
|
||||
static struct d3d12_heap *unsafe_impl_from_ID3D12Heap1(ID3D12Heap1 *iface)
|
||||
HRESULT d3d12_device_validate_custom_heap_type(struct d3d12_device *device,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties)
|
||||
{
|
||||
if (!iface)
|
||||
return NULL;
|
||||
assert(iface->lpVtbl == &d3d12_heap_vtbl);
|
||||
return impl_from_ID3D12Heap(iface);
|
||||
if (heap_properties->Type != D3D12_HEAP_TYPE_CUSTOM)
|
||||
return S_OK;
|
||||
|
||||
if (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN
|
||||
|| (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_L1
|
||||
&& (is_cpu_accessible_heap(heap_properties) || d3d12_device_is_uma(device, NULL))))
|
||||
{
|
||||
WARN("Invalid memory pool preference.\n");
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
if (heap_properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_UNKNOWN)
|
||||
{
|
||||
WARN("Must have explicit CPU page property for CUSTOM heap type.\n");
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface)
|
||||
static HRESULT validate_heap_desc(struct d3d12_device *device, const D3D12_HEAP_DESC *desc)
|
||||
{
|
||||
return unsafe_impl_from_ID3D12Heap1((ID3D12Heap1 *)iface);
|
||||
}
|
||||
HRESULT hr;
|
||||
|
||||
static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc)
|
||||
{
|
||||
if (!desc->SizeInBytes)
|
||||
{
|
||||
WARN("Invalid size %"PRIu64".\n", desc->SizeInBytes);
|
||||
|
@ -203,6 +210,9 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc)
|
|||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
if (FAILED(hr = d3d12_device_validate_custom_heap_type(device, &desc->Properties)))
|
||||
return hr;
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
@ -225,11 +235,12 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *dev
|
|||
if (!heap->desc.Alignment)
|
||||
heap->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
|
||||
if (FAILED(hr = validate_heap_desc(&heap->desc)))
|
||||
if (FAILED(hr = validate_heap_desc(device, &heap->desc)))
|
||||
return hr;
|
||||
|
||||
alloc_info.heap_desc = heap->desc;
|
||||
alloc_info.host_ptr = host_address;
|
||||
alloc_info.extra_allocation_flags = 0;
|
||||
|
||||
if (FAILED(hr = vkd3d_private_store_init(&heap->private_store)))
|
||||
return hr;
|
||||
|
|
|
@ -24,34 +24,25 @@
|
|||
static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator *allocator,
|
||||
struct d3d12_device *device, const struct vkd3d_memory_allocation *allocation);
|
||||
|
||||
static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties)
|
||||
{
|
||||
if (properties->Type == D3D12_HEAP_TYPE_DEFAULT)
|
||||
return false;
|
||||
if (properties->Type == D3D12_HEAP_TYPE_CUSTOM)
|
||||
{
|
||||
return properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE
|
||||
|| properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint32_t vkd3d_select_memory_types(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags)
|
||||
{
|
||||
const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties;
|
||||
uint32_t type_mask = (1 << memory_info->memoryTypeCount) - 1;
|
||||
const struct vkd3d_memory_info_domain *domain_info;
|
||||
|
||||
domain_info = d3d12_device_get_memory_info_domain(device, heap_properties);
|
||||
|
||||
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
|
||||
type_mask &= device->memory_info.buffer_type_mask;
|
||||
type_mask &= domain_info->buffer_type_mask;
|
||||
|
||||
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES))
|
||||
type_mask &= device->memory_info.sampled_type_mask;
|
||||
type_mask &= domain_info->sampled_type_mask;
|
||||
|
||||
/* Render targets are not allowed on UPLOAD and READBACK heaps */
|
||||
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) &&
|
||||
heap_properties->Type != D3D12_HEAP_TYPE_UPLOAD &&
|
||||
heap_properties->Type != D3D12_HEAP_TYPE_READBACK)
|
||||
type_mask &= device->memory_info.rt_ds_type_mask;
|
||||
type_mask &= domain_info->rt_ds_type_mask;
|
||||
|
||||
if (!type_mask)
|
||||
ERR("No memory type found for heap flags %#x.\n", heap_flags);
|
||||
|
@ -75,6 +66,7 @@ static uint32_t vkd3d_find_memory_types_with_flags(struct d3d12_device *device,
|
|||
|
||||
static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, VkMemoryPropertyFlags *type_flags)
|
||||
{
|
||||
HRESULT hr;
|
||||
switch (heap_properties->Type)
|
||||
{
|
||||
case D3D12_HEAP_TYPE_DEFAULT:
|
||||
|
@ -83,7 +75,9 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
|
|||
|
||||
case D3D12_HEAP_TYPE_UPLOAD:
|
||||
*type_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_UPLOAD_HVV)
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED)
|
||||
*type_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
else if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV))
|
||||
*type_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
break;
|
||||
|
||||
|
@ -92,13 +86,8 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
|
|||
break;
|
||||
|
||||
case D3D12_HEAP_TYPE_CUSTOM:
|
||||
if (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN
|
||||
|| (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_L1
|
||||
&& (is_cpu_accessible_heap(heap_properties) || d3d12_device_is_uma(device, NULL))))
|
||||
{
|
||||
WARN("Invalid memory pool preference.\n");
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
if (FAILED(hr = d3d12_device_validate_custom_heap_type(device, heap_properties)))
|
||||
return hr;
|
||||
|
||||
switch (heap_properties->CPUPageProperty)
|
||||
{
|
||||
|
@ -107,13 +96,13 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
|
|||
break;
|
||||
case D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE:
|
||||
*type_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED)
|
||||
*type_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
break;
|
||||
case D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE:
|
||||
*type_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
break;
|
||||
case D3D12_CPU_PAGE_PROPERTY_UNKNOWN:
|
||||
default:
|
||||
WARN("Invalid CPU page property.\n");
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
break;
|
||||
|
@ -128,7 +117,7 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
|
|||
|
||||
static HRESULT vkd3d_create_global_buffer(struct d3d12_device *device, VkDeviceSize size, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, VkBuffer *vk_buffer)
|
||||
{
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
D3D12_RESOURCE_DESC1 resource_desc;
|
||||
|
||||
memset(&resource_desc, 0, sizeof(resource_desc));
|
||||
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
|
@ -149,14 +138,53 @@ static HRESULT vkd3d_create_global_buffer(struct d3d12_device *device, VkDeviceS
|
|||
return vkd3d_create_buffer(device, heap_properties, heap_flags, &resource_desc, vk_buffer);
|
||||
}
|
||||
|
||||
void vkd3d_free_device_memory(struct d3d12_device *device, const struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkDeviceSize *type_current;
|
||||
bool budget_sensitive;
|
||||
|
||||
if (allocation->vk_memory == VK_NULL_HANDLE)
|
||||
{
|
||||
/* Deferred heap. Return early to skip confusing log messages. */
|
||||
return;
|
||||
}
|
||||
|
||||
VK_CALL(vkFreeMemory(device->vk_device, allocation->vk_memory, NULL));
|
||||
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << allocation->vk_memory_type));
|
||||
if (budget_sensitive)
|
||||
{
|
||||
type_current = &device->memory_info.type_current[allocation->vk_memory_type];
|
||||
pthread_mutex_lock(&device->memory_info.budget_lock);
|
||||
assert(*type_current >= allocation->size);
|
||||
*type_current -= allocation->size;
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Freeing memory of type %u, new total allocated size %"PRIu64" MiB.\n",
|
||||
allocation->vk_memory_type, *type_current / (1024 * 1024));
|
||||
}
|
||||
pthread_mutex_unlock(&device->memory_info.budget_lock);
|
||||
}
|
||||
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Freeing memory of type %u, %"PRIu64" KiB.\n",
|
||||
allocation->vk_memory_type, allocation->size / 1024);
|
||||
}
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
|
||||
VkDeviceSize size, VkMemoryPropertyFlags type_flags, uint32_t type_mask,
|
||||
void *pNext, VkDeviceMemory *vk_memory, uint32_t *vk_memory_type)
|
||||
void *pNext, struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
const VkPhysicalDeviceMemoryProperties *memory_props = &device->memory_properties;
|
||||
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
struct vkd3d_memory_info *memory_info = &device->memory_info;
|
||||
VkMemoryAllocateInfo allocate_info;
|
||||
VkDeviceSize *type_current;
|
||||
VkDeviceSize *type_budget;
|
||||
bool budget_sensitive;
|
||||
VkResult vr;
|
||||
|
||||
/* buffer_mask / sampled_mask etc will generally take care of this,
|
||||
* but for certain fallback scenarios where we select other memory
|
||||
|
@ -171,15 +199,60 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
|
|||
{
|
||||
uint32_t type_index = vkd3d_bitmask_iter32(&type_mask);
|
||||
|
||||
if ((memory_info->memoryTypes[type_index].propertyFlags & type_flags) != type_flags)
|
||||
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) != type_flags)
|
||||
continue;
|
||||
|
||||
allocate_info.memoryTypeIndex = type_index;
|
||||
|
||||
if (VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, vk_memory)) == VK_SUCCESS)
|
||||
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << type_index));
|
||||
if (budget_sensitive)
|
||||
{
|
||||
if (vk_memory_type)
|
||||
*vk_memory_type = type_index;
|
||||
type_budget = &memory_info->type_budget[type_index];
|
||||
type_current = &memory_info->type_current[type_index];
|
||||
pthread_mutex_lock(&memory_info->budget_lock);
|
||||
if (*type_current + size > *type_budget)
|
||||
{
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
|
||||
type_index, *type_current, size, *type_budget);
|
||||
}
|
||||
pthread_mutex_unlock(&memory_info->budget_lock);
|
||||
|
||||
/* If we're out of DEVICE budget, don't try other types. */
|
||||
if (type_flags & optional_flags)
|
||||
return E_OUTOFMEMORY;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
|
||||
|
||||
if (budget_sensitive)
|
||||
{
|
||||
if (vr == VK_SUCCESS)
|
||||
{
|
||||
*type_current += size;
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
|
||||
type_index, *type_current / (1024 * 1024));
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&memory_info->budget_lock);
|
||||
}
|
||||
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
|
||||
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
|
||||
type_index, allocate_info.allocationSize / 1024);
|
||||
}
|
||||
|
||||
if (vr == VK_SUCCESS)
|
||||
{
|
||||
allocation->vk_memory_type = type_index;
|
||||
allocation->size = size;
|
||||
return S_OK;
|
||||
}
|
||||
else if (type_flags & optional_flags)
|
||||
|
@ -196,22 +269,48 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
|
|||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
static bool vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(
|
||||
const struct VkPhysicalDeviceMemoryProperties *props, uint32_t type_mask)
|
||||
{
|
||||
uint32_t heap_mask = 0;
|
||||
if (!type_mask)
|
||||
return false;
|
||||
while (type_mask)
|
||||
heap_mask |= 1u << props->memoryTypes[vkd3d_bitmask_iter32(&type_mask)].heapIndex;
|
||||
return !!(heap_mask & (heap_mask - 1u));
|
||||
}
|
||||
|
||||
HRESULT vkd3d_allocate_device_memory(struct d3d12_device *device,
|
||||
VkDeviceSize size, VkMemoryPropertyFlags type_flags, uint32_t type_mask,
|
||||
void *pNext, VkDeviceMemory *vk_memory, uint32_t *vk_memory_type)
|
||||
void *pNext, struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
HRESULT hr;
|
||||
|
||||
hr = vkd3d_try_allocate_device_memory(device, size, type_flags,
|
||||
type_mask, pNext, vk_memory, vk_memory_type);
|
||||
type_mask, pNext, allocation);
|
||||
|
||||
if (FAILED(hr) && (type_flags & optional_flags))
|
||||
{
|
||||
WARN("Memory allocation failed, falling back to system memory.\n");
|
||||
hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
type_flags & ~optional_flags, type_mask, pNext,
|
||||
vk_memory, vk_memory_type);
|
||||
if (vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(&device->memory_properties, type_mask))
|
||||
{
|
||||
WARN("Memory allocation failed, falling back to system memory.\n");
|
||||
hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
type_flags & ~optional_flags, type_mask, pNext, allocation);
|
||||
}
|
||||
else if (device->memory_properties.memoryHeapCount > 1)
|
||||
{
|
||||
/* It might be the case (NV with RT/DS heap) that we just cannot fall back in any meaningful way.
|
||||
* E.g. there exists no memory type that is not DEVICE_LOCAL and covers both RT and DS.
|
||||
* For this case, we have no choice but to not allocate,
|
||||
* and defer actual memory allocation to CreatePlacedResource() time.
|
||||
* NVIDIA bug reference for fixing this case: 2175829. */
|
||||
WARN("Memory allocation failed, but it is not possible to fallback to system memory here. Deferring allocation.\n");
|
||||
return hr;
|
||||
}
|
||||
|
||||
/* If we fail to allocate, and only have one heap to work with (iGPU),
|
||||
* falling back is meaningless, just fail. */
|
||||
}
|
||||
|
||||
if (FAILED(hr))
|
||||
|
@ -225,37 +324,42 @@ HRESULT vkd3d_allocate_device_memory(struct d3d12_device *device,
|
|||
|
||||
static HRESULT vkd3d_import_host_memory(struct d3d12_device *device, void *host_address,
|
||||
VkDeviceSize size, VkMemoryPropertyFlags type_flags, uint32_t type_mask,
|
||||
void *pNext, VkDeviceMemory *vk_memory, uint32_t *vk_memory_type)
|
||||
void *pNext, struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
VkImportMemoryHostPointerInfoEXT import_info;
|
||||
HRESULT hr;
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT;
|
||||
import_info.pNext = pNext;
|
||||
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
|
||||
import_info.pHostPointer = host_address;
|
||||
|
||||
if (FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
type_flags, type_mask, &import_info, vk_memory, vk_memory_type)))
|
||||
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK) ||
|
||||
FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
type_flags, type_mask, &import_info, allocation)))
|
||||
{
|
||||
WARN("Failed to import host memory, hr %#x.\n", hr);
|
||||
if (FAILED(hr))
|
||||
WARN("Failed to import host memory, hr %#x.\n", hr);
|
||||
/* If we failed, fall back to a host-visible allocation. Generally
|
||||
* the app will access the memory thorugh the main host pointer,
|
||||
* so it's fine. */
|
||||
hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
type_mask, &import_info, vk_memory, vk_memory_type);
|
||||
type_mask, pNext, allocation);
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation, struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
|
||||
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation,
|
||||
struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
|
||||
{
|
||||
if (device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
allocation->resource.va = vkd3d_get_buffer_device_address(device, allocation->resource.vk_buffer);
|
||||
else
|
||||
else if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
allocation->resource.va = vkd3d_va_map_alloc_fake_va(&allocator->va_map, allocation->resource.size);
|
||||
else
|
||||
allocation->resource.va = 0xdeadbeef;
|
||||
|
||||
if (!allocation->resource.va)
|
||||
{
|
||||
|
@ -263,7 +367,9 @@ static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocatio
|
|||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
|
||||
/* Internal scratch buffers are not visible to application so we never have to map it back to VkBuffer. */
|
||||
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
@ -347,10 +453,12 @@ static void vkd3d_memory_allocation_free(const struct vkd3d_memory_allocation *a
|
|||
|
||||
if ((allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS) && allocation->resource.va)
|
||||
{
|
||||
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
|
||||
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
|
||||
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
{
|
||||
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
|
||||
}
|
||||
}
|
||||
|
||||
if (allocation->resource.view_map)
|
||||
|
@ -362,7 +470,7 @@ static void vkd3d_memory_allocation_free(const struct vkd3d_memory_allocation *a
|
|||
if (allocation->flags & VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER)
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, allocation->resource.vk_buffer, NULL));
|
||||
|
||||
VK_CALL(vkFreeMemory(device->vk_device, allocation->vk_memory, NULL));
|
||||
vkd3d_free_device_memory(device, &allocation->device_allocation);
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allocation, struct d3d12_device *device,
|
||||
|
@ -372,6 +480,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
VkMemoryRequirements memory_requirements;
|
||||
VkMemoryAllocateFlagsInfo flags_info;
|
||||
VkMemoryPropertyFlags type_flags;
|
||||
VkBindBufferMemoryInfo bind_info;
|
||||
void *host_ptr = info->host_ptr;
|
||||
uint32_t type_mask;
|
||||
VkResult vr;
|
||||
|
@ -389,6 +498,12 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
if (FAILED(hr = vkd3d_select_memory_flags(device, &info->heap_properties, &type_flags)))
|
||||
return hr;
|
||||
|
||||
/* Mask out optional memory properties as needed.
|
||||
* This is relevant for chunk allocator fallbacks
|
||||
* since the info->memory_requirements already encodes
|
||||
* only HOST_VISIBLE types and we use NO_FALLBACK allocation mode. */
|
||||
type_flags &= ~info->optional_memory_properties;
|
||||
|
||||
if (allocation->flags & VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER)
|
||||
{
|
||||
/* If requested, create a buffer covering the entire allocation
|
||||
|
@ -410,8 +525,14 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
memory_requirements = info->memory_requirements;
|
||||
}
|
||||
|
||||
type_mask = vkd3d_select_memory_types(device, &info->heap_properties,
|
||||
info->heap_flags) & memory_requirements.memoryTypeBits;
|
||||
/* If an allocation is a dedicated fallback allocation,
|
||||
* we must not look at heap_flags, since we might end up noping out
|
||||
* the memory types we want to allocate with. */
|
||||
type_mask = memory_requirements.memoryTypeBits;
|
||||
if (info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED)
|
||||
type_mask &= device->memory_info.global_mask;
|
||||
else
|
||||
type_mask &= vkd3d_select_memory_types(device, &info->heap_properties, info->heap_flags);
|
||||
|
||||
/* Allocate actual backing storage */
|
||||
flags_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
|
||||
|
@ -434,22 +555,33 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
|
||||
allocation->flags |= VKD3D_ALLOCATION_FLAG_ALLOW_WRITE_WATCH;
|
||||
if (!(host_ptr = vkd3d_allocate_write_watch_pointer(&info->heap_properties, memory_requirements.size)))
|
||||
{
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, allocation->resource.vk_buffer, NULL));
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
}
|
||||
|
||||
if (host_ptr)
|
||||
{
|
||||
hr = vkd3d_import_host_memory(device, host_ptr, memory_requirements.size,
|
||||
type_flags, type_mask, &flags_info, &allocation->vk_memory, &allocation->vk_memory_type);
|
||||
type_flags, type_mask, &flags_info, &allocation->device_allocation);
|
||||
}
|
||||
else if (info->flags & VKD3D_ALLOCATION_FLAG_NO_FALLBACK)
|
||||
{
|
||||
hr = vkd3d_try_allocate_device_memory(device, memory_requirements.size, type_flags,
|
||||
type_mask, &flags_info, &allocation->device_allocation);
|
||||
}
|
||||
else
|
||||
{
|
||||
hr = vkd3d_allocate_device_memory(device, memory_requirements.size, type_flags,
|
||||
type_mask, &flags_info, &allocation->vk_memory, &allocation->vk_memory_type);
|
||||
type_mask, &flags_info, &allocation->device_allocation);
|
||||
}
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, allocation->resource.vk_buffer, NULL));
|
||||
return hr;
|
||||
}
|
||||
|
||||
/* Map memory if the allocation was requested to be host-visible,
|
||||
* but do not map if the allocation was meant to be device-local
|
||||
|
@ -465,7 +597,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
{
|
||||
allocation->flags |= VKD3D_ALLOCATION_FLAG_CPU_ACCESS;
|
||||
|
||||
if ((vr = VK_CALL(vkMapMemory(device->vk_device, allocation->vk_memory,
|
||||
if ((vr = VK_CALL(vkMapMemory(device->vk_device, allocation->device_allocation.vk_memory,
|
||||
0, VK_WHOLE_SIZE, 0, &allocation->cpu_address))))
|
||||
{
|
||||
ERR("Failed to map memory, vr %d.\n", vr);
|
||||
|
@ -477,8 +609,13 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
/* Bind memory to global or dedicated buffer as needed */
|
||||
if (allocation->resource.vk_buffer)
|
||||
{
|
||||
if ((vr = VK_CALL(vkBindBufferMemory(device->vk_device,
|
||||
allocation->resource.vk_buffer, allocation->vk_memory, 0))) < 0)
|
||||
bind_info.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO;
|
||||
bind_info.pNext = NULL;
|
||||
bind_info.buffer = allocation->resource.vk_buffer;
|
||||
bind_info.memory = allocation->device_allocation.vk_memory;
|
||||
bind_info.memoryOffset = 0;
|
||||
|
||||
if ((vr = VK_CALL(vkBindBufferMemory2KHR(device->vk_device, 1, &bind_info))) < 0)
|
||||
{
|
||||
ERR("Failed to bind buffer memory, vr %d.\n", vr);
|
||||
vkd3d_memory_allocation_free(allocation, device, allocator);
|
||||
|
@ -501,7 +638,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
|
|||
allocation->resource.cookie, info);
|
||||
|
||||
TRACE("Created allocation %p on memory type %u (%"PRIu64" bytes).\n",
|
||||
allocation, allocation->vk_memory_type, allocation->resource.size);
|
||||
allocation, allocation->device_allocation.vk_memory_type, allocation->resource.size);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
@ -928,6 +1065,13 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
|
|||
* rather than rewriting the command buffer or dispatching the clear */
|
||||
vk_cmd_buffer = clear_queue->vk_command_buffers[clear_queue->command_buffer_index];
|
||||
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
|
||||
{
|
||||
INFO("Submitting clear command list.\n");
|
||||
for (i = 0; i < clear_queue->allocations_count; i++)
|
||||
INFO("Clearing allocation %zu: %"PRIu64".\n", i, clear_queue->allocations[i]->resource.size);
|
||||
}
|
||||
|
||||
vkd3d_memory_allocator_wait_clear_semaphore(allocator, device,
|
||||
clear_queue->next_signal_value - VKD3D_MEMORY_CLEAR_COMMAND_BUFFER_COUNT, UINT64_MAX);
|
||||
|
||||
|
@ -982,6 +1126,8 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
|
|||
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
|
||||
vkd3d_queue_release(allocator->vkd3d_queue);
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (vr < 0)
|
||||
{
|
||||
ERR("Failed to submit command buffer, vr %d.\n", vr);
|
||||
|
@ -1011,6 +1157,7 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
|
|||
for (i = 0; i < queue_family->queue_count; i++)
|
||||
{
|
||||
vkd3d_queue_add_wait(queue_family->queues[i],
|
||||
NULL,
|
||||
clear_queue->vk_semaphore,
|
||||
clear_queue->next_signal_value);
|
||||
}
|
||||
|
@ -1124,8 +1271,9 @@ static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator
|
|||
vkd3d_memory_allocator_wait_clear_semaphore(allocator, device, wait_value, UINT64_MAX);
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, struct vkd3d_memory_chunk **chunk)
|
||||
static HRESULT vkd3d_memory_allocator_try_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask,
|
||||
VkMemoryPropertyFlags optional_properties, struct vkd3d_memory_chunk **chunk)
|
||||
{
|
||||
struct vkd3d_allocate_memory_info alloc_info;
|
||||
struct vkd3d_memory_chunk *object;
|
||||
|
@ -1137,6 +1285,8 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a
|
|||
alloc_info.memory_requirements.memoryTypeBits = type_mask;
|
||||
alloc_info.heap_properties = *heap_properties;
|
||||
alloc_info.heap_flags = heap_flags;
|
||||
alloc_info.flags = VKD3D_ALLOCATION_FLAG_NO_FALLBACK;
|
||||
alloc_info.optional_memory_properties = optional_properties;
|
||||
|
||||
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
|
||||
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
|
||||
|
@ -1157,6 +1307,7 @@ static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *a
|
|||
|
||||
static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory_allocator *allocator,
|
||||
struct d3d12_device *device, const VkMemoryRequirements *memory_requirements, uint32_t type_mask,
|
||||
VkMemoryPropertyFlags optional_properties,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
|
||||
struct vkd3d_memory_allocation *allocation)
|
||||
{
|
||||
|
@ -1179,7 +1330,7 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
|
|||
continue;
|
||||
|
||||
/* Filter out unsupported memory types */
|
||||
if (!(type_mask & (1u << chunk->allocation.vk_memory_type)))
|
||||
if (!(type_mask & (1u << chunk->allocation.device_allocation.vk_memory_type)))
|
||||
continue;
|
||||
|
||||
if (SUCCEEDED(hr = vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation)))
|
||||
|
@ -1188,8 +1339,8 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
|
|||
|
||||
/* Try allocating a new chunk on one of the supported memory type
|
||||
* before the caller falls back to potentially slower memory */
|
||||
if (FAILED(hr = vkd3d_memory_allocator_add_chunk(allocator, device, heap_properties,
|
||||
heap_flags & heap_flag_mask, memory_requirements->memoryTypeBits, &chunk)))
|
||||
if (FAILED(hr = vkd3d_memory_allocator_try_add_chunk(allocator, device, heap_properties,
|
||||
heap_flags & heap_flag_mask, type_mask, optional_properties, &chunk)))
|
||||
return hr;
|
||||
|
||||
return vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation);
|
||||
|
@ -1198,6 +1349,9 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
|
|||
void vkd3d_free_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
|
||||
const struct vkd3d_memory_allocation *allocation)
|
||||
{
|
||||
if (allocation->device_allocation.vk_memory == VK_NULL_HANDLE)
|
||||
return;
|
||||
|
||||
if (allocation->clear_semaphore_value)
|
||||
vkd3d_memory_allocator_wait_allocation(allocator, device, allocation);
|
||||
|
||||
|
@ -1233,13 +1387,14 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
|
|||
pthread_mutex_lock(&allocator->mutex);
|
||||
|
||||
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
|
||||
&memory_requirements, optional_mask, &info->heap_properties,
|
||||
&memory_requirements, optional_mask, 0, &info->heap_properties,
|
||||
info->heap_flags, allocation);
|
||||
|
||||
if (FAILED(hr) && (required_mask & ~optional_mask))
|
||||
{
|
||||
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
|
||||
&memory_requirements, required_mask & ~optional_mask,
|
||||
optional_flags,
|
||||
&info->heap_properties, info->heap_flags, allocation);
|
||||
}
|
||||
|
||||
|
@ -1247,13 +1402,35 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
|
|||
return hr;
|
||||
}
|
||||
|
||||
static inline bool vkd3d_driver_implicitly_clears(VkDriverId driver_id)
|
||||
{
|
||||
switch (driver_id)
|
||||
{
|
||||
/* Known to pass test_stress_suballocation which hits this path. */
|
||||
case VK_DRIVER_ID_MESA_RADV:
|
||||
case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
|
||||
case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
|
||||
const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation)
|
||||
{
|
||||
bool implementation_implicitly_clears;
|
||||
bool needs_clear;
|
||||
bool suballocate;
|
||||
HRESULT hr;
|
||||
|
||||
if (!info->pNext && !info->host_ptr && info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
|
||||
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)))
|
||||
suballocate = !info->pNext && !info->host_ptr &&
|
||||
info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
|
||||
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)) &&
|
||||
!(info->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH);
|
||||
|
||||
if (suballocate)
|
||||
hr = vkd3d_suballocate_memory(device, allocator, info, allocation);
|
||||
else
|
||||
hr = vkd3d_memory_allocation_init(allocation, device, allocator, info);
|
||||
|
@ -1261,16 +1438,51 @@ HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_a
|
|||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
if (!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED))
|
||||
/* If we're allocating Vulkan memory directly,
|
||||
* we can rely on the driver doing this for us.
|
||||
* This is relying on implementation details.
|
||||
* RADV definitely does this, and it seems like NV also does it.
|
||||
* TODO: an extension for this would be nice. */
|
||||
implementation_implicitly_clears =
|
||||
vkd3d_driver_implicitly_clears(device->device_info.driver_properties.driverID) &&
|
||||
!suballocate;
|
||||
|
||||
needs_clear = !implementation_implicitly_clears &&
|
||||
!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) &&
|
||||
!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR);
|
||||
|
||||
if (needs_clear)
|
||||
vkd3d_memory_allocator_clear_allocation(allocator, device, allocation);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
static bool vkd3d_heap_allocation_accept_deferred_resource_placements(struct d3d12_device *device,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags)
|
||||
{
|
||||
uint32_t type_mask;
|
||||
|
||||
/* Normally, if a memory allocation fails, we consider it an error, but there are some exceptions
|
||||
* where we can defer memory allocation, like CreateHeap where fallback system memory type is not available.
|
||||
* In this case, we will defer memory allocation until CreatePlacedResource() time, and we should
|
||||
* accept that a memory allocation failed. */
|
||||
|
||||
/* Only accept deferrals for DEFAULT / CPU_NOT_AVAILABLE heaps.
|
||||
* If we're going for host memory, we have nowhere left to fall back to either way. */
|
||||
if (is_cpu_accessible_heap(heap_properties))
|
||||
return false;
|
||||
|
||||
type_mask = vkd3d_select_memory_types(device, heap_properties, heap_flags);
|
||||
return device->memory_properties.memoryHeapCount > 1 &&
|
||||
!vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(&device->memory_properties, type_mask);
|
||||
}
|
||||
|
||||
HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
|
||||
const struct vkd3d_allocate_heap_memory_info *info, struct vkd3d_memory_allocation *allocation)
|
||||
{
|
||||
struct vkd3d_allocate_heap_memory_info heap_info;
|
||||
struct vkd3d_allocate_memory_info alloc_info;
|
||||
HRESULT hr;
|
||||
|
||||
memset(&alloc_info, 0, sizeof(alloc_info));
|
||||
alloc_info.memory_requirements.memoryTypeBits = ~0u;
|
||||
|
@ -1280,18 +1492,52 @@ HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_mem
|
|||
alloc_info.heap_flags = info->heap_desc.Flags;
|
||||
alloc_info.host_ptr = info->host_ptr;
|
||||
|
||||
alloc_info.flags |= info->extra_allocation_flags;
|
||||
if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
|
||||
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
|
||||
|
||||
return vkd3d_allocate_memory(device, allocator, &alloc_info, allocation);
|
||||
if (is_cpu_accessible_heap(&info->heap_desc.Properties))
|
||||
{
|
||||
if (info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)
|
||||
{
|
||||
/* If the heap was only designed to handle images, the heap is useless,
|
||||
* and we can force everything to go through committed path. */
|
||||
memset(allocation, 0, sizeof(*allocation));
|
||||
return S_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* CPU visible textures are never placed on a heap directly,
|
||||
* since LINEAR images have alignment / size requirements
|
||||
* that are vastly different from OPTIMAL ones.
|
||||
* We can place buffers however. */
|
||||
heap_info = *info;
|
||||
info = &heap_info;
|
||||
heap_info.heap_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
||||
}
|
||||
}
|
||||
|
||||
hr = vkd3d_allocate_memory(device, allocator, &alloc_info, allocation);
|
||||
if (hr == E_OUTOFMEMORY && vkd3d_heap_allocation_accept_deferred_resource_placements(device,
|
||||
&info->heap_desc.Properties, info->heap_desc.Flags))
|
||||
{
|
||||
/* It's okay and sometimes expected that we fail here.
|
||||
* Defer allocation until CreatePlacedResource(). */
|
||||
memset(allocation, 0, sizeof(*allocation));
|
||||
hr = S_OK;
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HRESULT vkd3d_allocate_buffer_memory(struct d3d12_device *device, VkBuffer vk_buffer,
|
||||
VkMemoryPropertyFlags type_flags, VkDeviceMemory *vk_memory)
|
||||
VkMemoryPropertyFlags type_flags,
|
||||
struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkMemoryRequirements memory_requirements;
|
||||
VkMemoryAllocateFlagsInfo flags_info;
|
||||
VkBindBufferMemoryInfo bind_info;
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -1305,30 +1551,44 @@ HRESULT vkd3d_allocate_buffer_memory(struct d3d12_device *device, VkBuffer vk_bu
|
|||
VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, vk_buffer, &memory_requirements));
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_device_memory(device, memory_requirements.size,
|
||||
type_flags, memory_requirements.memoryTypeBits, &flags_info, vk_memory, NULL)))
|
||||
type_flags, memory_requirements.memoryTypeBits, &flags_info, allocation)))
|
||||
return hr;
|
||||
|
||||
if (FAILED(vr = VK_CALL(vkBindBufferMemory(device->vk_device, vk_buffer, *vk_memory, 0))))
|
||||
bind_info.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO;
|
||||
bind_info.pNext = NULL;
|
||||
bind_info.buffer = vk_buffer;
|
||||
bind_info.memory = allocation->vk_memory;
|
||||
bind_info.memoryOffset = 0;
|
||||
|
||||
if (FAILED(vr = VK_CALL(vkBindBufferMemory2KHR(device->vk_device, 1, &bind_info))))
|
||||
return hresult_from_vk_result(vr);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HRESULT vkd3d_allocate_image_memory(struct d3d12_device *device, VkImage vk_image,
|
||||
VkMemoryPropertyFlags type_flags, VkDeviceMemory *vk_memory)
|
||||
VkMemoryPropertyFlags type_flags,
|
||||
struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkMemoryRequirements memory_requirements;
|
||||
VkBindImageMemoryInfo bind_info;
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &memory_requirements));
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_device_memory(device, memory_requirements.size,
|
||||
type_flags, memory_requirements.memoryTypeBits, NULL, vk_memory, NULL)))
|
||||
type_flags, memory_requirements.memoryTypeBits, NULL, allocation)))
|
||||
return hr;
|
||||
|
||||
if (FAILED(vr = VK_CALL(vkBindImageMemory(device->vk_device, vk_image, *vk_memory, 0))))
|
||||
bind_info.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
|
||||
bind_info.pNext = NULL;
|
||||
bind_info.image = vk_image;
|
||||
bind_info.memory = allocation->vk_memory;
|
||||
bind_info.memoryOffset = 0;
|
||||
|
||||
if (FAILED(vr = VK_CALL(vkBindImageMemory2KHR(device->vk_device, 1, &bind_info))))
|
||||
return hresult_from_vk_result(vr);
|
||||
|
||||
return hr;
|
||||
|
|
|
@ -19,6 +19,7 @@ vkd3d_shaders =[
|
|||
|
||||
'shaders/fs_copy_image_float.frag',
|
||||
'shaders/fs_copy_image_uint.frag',
|
||||
'shaders/fs_copy_image_stencil.frag',
|
||||
|
||||
'shaders/gs_fullscreen.geom',
|
||||
'shaders/vs_fullscreen.vert',
|
||||
|
@ -26,6 +27,8 @@ vkd3d_shaders =[
|
|||
|
||||
'shaders/vs_swapchain_fullscreen.vert',
|
||||
'shaders/fs_swapchain_fullscreen.frag',
|
||||
'shaders/cs_execute_indirect_patch.comp',
|
||||
'shaders/cs_execute_indirect_patch_debug_ring.comp',
|
||||
]
|
||||
|
||||
vkd3d_src = [
|
||||
|
@ -38,7 +41,6 @@ vkd3d_src = [
|
|||
'heap.c',
|
||||
'memory.c',
|
||||
'meta.c',
|
||||
'platform.c',
|
||||
'resource.c',
|
||||
'state.c',
|
||||
'utils.c',
|
||||
|
@ -61,6 +63,14 @@ if enable_descriptor_qa
|
|||
vkd3d_src += ['descriptor_debug.c']
|
||||
endif
|
||||
|
||||
if enable_breadcrumbs
|
||||
vkd3d_src += ['breadcrumbs.c']
|
||||
endif
|
||||
|
||||
if vkd3d_platform == 'windows'
|
||||
vkd3d_src += ['shared_metadata.c']
|
||||
endif
|
||||
|
||||
if not enable_d3d12
|
||||
vkd3d_lib = shared_library('vkd3d-proton', vkd3d_src, glsl_generator.process(vkd3d_shaders), vkd3d_build, vkd3d_version,
|
||||
dependencies : [ vkd3d_common_dep, vkd3d_shader_dep ] + vkd3d_extra_libs,
|
||||
|
|
|
@ -137,73 +137,8 @@ static VkResult vkd3d_meta_create_compute_pipeline(struct d3d12_device *device,
|
|||
return vr;
|
||||
}
|
||||
|
||||
static VkResult vkd3d_meta_create_render_pass(struct d3d12_device *device, VkSampleCountFlagBits samples,
|
||||
const struct vkd3d_format *format, VkImageLayout layout, VkRenderPass *vk_render_pass)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkAttachmentDescription2KHR attachment_desc;
|
||||
VkAttachmentReference2KHR attachment_ref;
|
||||
VkSubpassDescription2KHR subpass_desc;
|
||||
VkRenderPassCreateInfo2KHR pass_info;
|
||||
bool has_depth_target;
|
||||
VkResult vr;
|
||||
|
||||
assert(format);
|
||||
|
||||
has_depth_target = (format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0;
|
||||
|
||||
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
|
||||
attachment_desc.pNext = NULL;
|
||||
attachment_desc.flags = 0;
|
||||
attachment_desc.format = format->vk_format;
|
||||
attachment_desc.samples = samples;
|
||||
attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment_desc.initialLayout = layout;
|
||||
attachment_desc.finalLayout = layout;
|
||||
|
||||
attachment_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
|
||||
attachment_ref.pNext = NULL;
|
||||
attachment_ref.attachment = 0;
|
||||
attachment_ref.layout = layout;
|
||||
attachment_ref.aspectMask = 0; /* input attachment aspect mask */
|
||||
|
||||
subpass_desc.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
|
||||
subpass_desc.pNext = NULL;
|
||||
subpass_desc.flags = 0;
|
||||
subpass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass_desc.viewMask = 0;
|
||||
subpass_desc.inputAttachmentCount = 0;
|
||||
subpass_desc.pInputAttachments = NULL;
|
||||
subpass_desc.colorAttachmentCount = has_depth_target ? 0 : 1;
|
||||
subpass_desc.pColorAttachments = has_depth_target ? NULL : &attachment_ref;
|
||||
subpass_desc.pResolveAttachments = NULL;
|
||||
subpass_desc.pDepthStencilAttachment = has_depth_target ? &attachment_ref : NULL;
|
||||
subpass_desc.preserveAttachmentCount = 0;
|
||||
subpass_desc.pPreserveAttachments = NULL;
|
||||
|
||||
pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
|
||||
pass_info.pNext = NULL;
|
||||
pass_info.flags = 0;
|
||||
pass_info.attachmentCount = 1;
|
||||
pass_info.pAttachments = &attachment_desc;
|
||||
pass_info.subpassCount = 1;
|
||||
pass_info.pSubpasses = &subpass_desc;
|
||||
pass_info.dependencyCount = 0;
|
||||
pass_info.pDependencies = NULL;
|
||||
pass_info.correlatedViewMaskCount = 0;
|
||||
pass_info.pCorrelatedViewMasks = NULL;
|
||||
|
||||
if ((vr = VK_CALL(vkCreateRenderPass2KHR(device->vk_device, &pass_info, NULL, vk_render_pass))) < 0)
|
||||
ERR("Failed to create render pass, vr %d.\n", vr);
|
||||
|
||||
return vr;
|
||||
}
|
||||
|
||||
static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
VkPipelineLayout layout, VkRenderPass render_pass,
|
||||
VkPipelineLayout layout, VkFormat color_format, VkFormat ds_format, VkImageAspectFlags vk_aspect_mask,
|
||||
VkShaderModule vs_module, VkShaderModule fs_module,
|
||||
VkSampleCountFlagBits samples, const VkPipelineDepthStencilStateCreateInfo *ds_state,
|
||||
const VkPipelineColorBlendStateCreateInfo *cb_state, const VkSpecializationInfo *spec_info,
|
||||
|
@ -213,6 +148,7 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
|
|||
VkPipelineShaderStageCreateInfo shader_stages[3];
|
||||
VkPipelineInputAssemblyStateCreateInfo ia_state;
|
||||
VkPipelineRasterizationStateCreateInfo rs_state;
|
||||
VkPipelineRenderingCreateInfoKHR rendering_info;
|
||||
VkPipelineVertexInputStateCreateInfo vi_state;
|
||||
VkPipelineMultisampleStateCreateInfo ms_state;
|
||||
VkPipelineViewportStateCreateInfo vp_state;
|
||||
|
@ -279,8 +215,16 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
|
|||
dyn_state.dynamicStateCount = ARRAY_SIZE(dynamic_states);
|
||||
dyn_state.pDynamicStates = dynamic_states;
|
||||
|
||||
rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR;
|
||||
rendering_info.pNext = NULL;
|
||||
rendering_info.viewMask = 0;
|
||||
rendering_info.colorAttachmentCount = color_format && (vk_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) ? 1 : 0;
|
||||
rendering_info.pColorAttachmentFormats = color_format ? &color_format : NULL;
|
||||
rendering_info.depthAttachmentFormat = (vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) ? ds_format : VK_FORMAT_UNDEFINED;
|
||||
rendering_info.stencilAttachmentFormat = (vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) ? ds_format : VK_FORMAT_UNDEFINED;
|
||||
|
||||
pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
||||
pipeline_info.pNext = NULL;
|
||||
pipeline_info.pNext = &rendering_info;
|
||||
pipeline_info.flags = 0;
|
||||
pipeline_info.stageCount = 0;
|
||||
pipeline_info.pStages = shader_stages;
|
||||
|
@ -294,7 +238,7 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
|
|||
pipeline_info.pColorBlendState = cb_state;
|
||||
pipeline_info.pDynamicState = &dyn_state;
|
||||
pipeline_info.layout = layout;
|
||||
pipeline_info.renderPass = render_pass;
|
||||
pipeline_info.renderPass = VK_NULL_HANDLE;
|
||||
pipeline_info.subpass = 0;
|
||||
pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
|
||||
pipeline_info.basePipelineIndex = -1;
|
||||
|
@ -606,6 +550,16 @@ HRESULT vkd3d_copy_image_ops_init(struct vkd3d_copy_image_ops *meta_copy_image_o
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if (device->vk_info.EXT_shader_stencil_export)
|
||||
{
|
||||
if ((vr = vkd3d_meta_create_shader_module(device, SPIRV_CODE(fs_copy_image_stencil),
|
||||
&meta_copy_image_ops->vk_fs_stencil_module)) < 0)
|
||||
{
|
||||
ERR("Failed to create shader modules, vr %d.\n", vr);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
|
||||
fail:
|
||||
|
@ -623,7 +577,6 @@ void vkd3d_copy_image_ops_cleanup(struct vkd3d_copy_image_ops *meta_copy_image_o
|
|||
{
|
||||
struct vkd3d_copy_image_pipeline *pipeline = &meta_copy_image_ops->pipelines[i];
|
||||
|
||||
VK_CALL(vkDestroyRenderPass(device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
|
||||
}
|
||||
|
||||
|
@ -631,95 +584,21 @@ void vkd3d_copy_image_ops_cleanup(struct vkd3d_copy_image_ops *meta_copy_image_o
|
|||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_copy_image_ops->vk_pipeline_layout, NULL));
|
||||
VK_CALL(vkDestroyShaderModule(device->vk_device, meta_copy_image_ops->vk_fs_float_module, NULL));
|
||||
VK_CALL(vkDestroyShaderModule(device->vk_device, meta_copy_image_ops->vk_fs_uint_module, NULL));
|
||||
VK_CALL(vkDestroyShaderModule(device->vk_device, meta_copy_image_ops->vk_fs_stencil_module, NULL));
|
||||
|
||||
pthread_mutex_destroy(&meta_copy_image_ops->mutex);
|
||||
|
||||
vkd3d_free(meta_copy_image_ops->pipelines);
|
||||
}
|
||||
|
||||
static VkResult vkd3d_meta_create_swapchain_render_pass(struct d3d12_device *device,
|
||||
const struct vkd3d_swapchain_pipeline_key *key, VkRenderPass *render_pass)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkRenderPassCreateInfo2KHR render_pass_info;
|
||||
VkAttachmentDescription2KHR attachment_desc;
|
||||
VkAttachmentReference2KHR attachment_ref;
|
||||
VkSubpassDescription2KHR subpass_desc;
|
||||
VkSubpassDependency2KHR subpass_dep;
|
||||
|
||||
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
|
||||
attachment_desc.pNext = NULL;
|
||||
attachment_desc.loadOp = key->load_op;
|
||||
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
attachment_desc.format = key->format;
|
||||
attachment_desc.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
attachment_desc.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
attachment_desc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
attachment_desc.flags = 0;
|
||||
|
||||
attachment_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
|
||||
attachment_ref.pNext = NULL;
|
||||
attachment_ref.attachment = 0;
|
||||
attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
attachment_ref.aspectMask = 0; /* input attachment aspect mask */
|
||||
|
||||
subpass_desc.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
|
||||
subpass_desc.pNext = NULL;
|
||||
subpass_desc.flags = 0;
|
||||
subpass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass_desc.viewMask = 0;
|
||||
subpass_desc.inputAttachmentCount = 0;
|
||||
subpass_desc.pInputAttachments = NULL;
|
||||
subpass_desc.colorAttachmentCount = 1;
|
||||
subpass_desc.pColorAttachments = &attachment_ref;
|
||||
subpass_desc.pResolveAttachments = NULL;
|
||||
subpass_desc.pDepthStencilAttachment = NULL;
|
||||
subpass_desc.preserveAttachmentCount = 0;
|
||||
subpass_desc.pPreserveAttachments = NULL;
|
||||
|
||||
subpass_dep.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
|
||||
subpass_dep.pNext = NULL;
|
||||
subpass_dep.srcSubpass = VK_SUBPASS_EXTERNAL;
|
||||
subpass_dep.dstSubpass = 0;
|
||||
subpass_dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
subpass_dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
subpass_dep.srcAccessMask = 0;
|
||||
subpass_dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
subpass_dep.dependencyFlags = 0;
|
||||
subpass_dep.viewOffset = 0;
|
||||
|
||||
render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
|
||||
render_pass_info.pNext = NULL;
|
||||
render_pass_info.flags = 0;
|
||||
render_pass_info.attachmentCount = 1;
|
||||
render_pass_info.pAttachments = &attachment_desc;
|
||||
render_pass_info.subpassCount = 1;
|
||||
render_pass_info.pSubpasses = &subpass_desc;
|
||||
render_pass_info.dependencyCount = 1;
|
||||
render_pass_info.pDependencies = &subpass_dep;
|
||||
render_pass_info.correlatedViewMaskCount = 0;
|
||||
render_pass_info.pCorrelatedViewMasks = NULL;
|
||||
|
||||
return VK_CALL(vkCreateRenderPass2KHR(device->vk_device, &render_pass_info, NULL, render_pass));
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_pipeline *pipeline)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &meta_ops->device->vk_procs;
|
||||
struct vkd3d_swapchain_ops *meta_swapchain_ops = &meta_ops->swapchain;
|
||||
VkPipelineColorBlendAttachmentState blend_att;
|
||||
VkPipelineColorBlendStateCreateInfo cb_state;
|
||||
VkResult vr;
|
||||
|
||||
if ((vr = vkd3d_meta_create_swapchain_render_pass(meta_ops->device, key, &pipeline->vk_render_pass)))
|
||||
{
|
||||
ERR("Failed to create render pass, vr %d.\n", vr);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
memset(&cb_state, 0, sizeof(cb_state));
|
||||
memset(&blend_att, 0, sizeof(blend_att));
|
||||
cb_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
||||
|
@ -732,14 +611,11 @@ static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_
|
|||
VK_COLOR_COMPONENT_A_BIT;
|
||||
|
||||
if ((vr = vkd3d_meta_create_graphics_pipeline(meta_ops,
|
||||
meta_swapchain_ops->vk_pipeline_layouts[key->filter], pipeline->vk_render_pass,
|
||||
meta_swapchain_ops->vk_pipeline_layouts[key->filter], key->format, VK_FORMAT_UNDEFINED, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
meta_swapchain_ops->vk_vs_module, meta_swapchain_ops->vk_fs_module, 1,
|
||||
NULL, &cb_state,
|
||||
NULL, &pipeline->vk_pipeline)) < 0)
|
||||
{
|
||||
VK_CALL(vkDestroyRenderPass(meta_ops->device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
pipeline->key = *key;
|
||||
return S_OK;
|
||||
|
@ -748,7 +624,6 @@ static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_
|
|||
static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_pipeline *pipeline)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &meta_ops->device->vk_procs;
|
||||
struct vkd3d_copy_image_ops *meta_copy_image_ops = &meta_ops->copy_image;
|
||||
VkPipelineColorBlendAttachmentState blend_attachment;
|
||||
VkPipelineDepthStencilStateCreateInfo ds_state;
|
||||
|
@ -793,13 +668,30 @@ static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta
|
|||
ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
|
||||
ds_state.pNext = NULL;
|
||||
ds_state.flags = 0;
|
||||
ds_state.depthTestEnable = VK_TRUE;
|
||||
ds_state.depthWriteEnable = VK_TRUE;
|
||||
ds_state.depthTestEnable = (key->dst_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_TRUE : VK_FALSE;
|
||||
ds_state.depthWriteEnable = ds_state.depthTestEnable;
|
||||
ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
|
||||
ds_state.depthBoundsTestEnable = VK_FALSE;
|
||||
ds_state.stencilTestEnable = VK_FALSE;
|
||||
memset(&ds_state.front, 0, sizeof(ds_state.front));
|
||||
memset(&ds_state.back, 0, sizeof(ds_state.back));
|
||||
|
||||
if (key->dst_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
{
|
||||
ds_state.stencilTestEnable = VK_TRUE;
|
||||
ds_state.front.reference = 0;
|
||||
ds_state.front.writeMask = 0xff;
|
||||
ds_state.front.compareMask = 0xff;
|
||||
ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
|
||||
ds_state.front.failOp = VK_STENCIL_OP_KEEP;
|
||||
ds_state.front.depthFailOp = VK_STENCIL_OP_KEEP;
|
||||
ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
|
||||
ds_state.back = ds_state.front;
|
||||
}
|
||||
else
|
||||
{
|
||||
ds_state.stencilTestEnable = VK_FALSE;
|
||||
memset(&ds_state.front, 0, sizeof(ds_state.front));
|
||||
memset(&ds_state.back, 0, sizeof(ds_state.back));
|
||||
}
|
||||
|
||||
ds_state.minDepthBounds = 0.0f;
|
||||
ds_state.maxDepthBounds = 1.0f;
|
||||
|
||||
|
@ -818,25 +710,32 @@ static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta
|
|||
cb_state.pAttachments = &blend_attachment;
|
||||
memset(&cb_state.blendConstants, 0, sizeof(cb_state.blendConstants));
|
||||
|
||||
if ((vr = vkd3d_meta_create_render_pass(meta_ops->device,
|
||||
key->sample_count, key->format, key->layout, &pipeline->vk_render_pass)) < 0)
|
||||
return hresult_from_vk_result(vr);
|
||||
|
||||
/* Special path when copying stencil -> color. */
|
||||
if (key->format->vk_format == VK_FORMAT_R8_UINT)
|
||||
{
|
||||
/* Special path when copying stencil -> color. */
|
||||
vk_module = meta_copy_image_ops->vk_fs_uint_module;
|
||||
}
|
||||
else if (key->dst_aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
{
|
||||
/* FragStencilRef path. */
|
||||
vk_module = meta_copy_image_ops->vk_fs_stencil_module;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Depth or float color path. */
|
||||
vk_module = meta_copy_image_ops->vk_fs_float_module;
|
||||
}
|
||||
|
||||
if ((vr = vkd3d_meta_create_graphics_pipeline(meta_ops,
|
||||
meta_copy_image_ops->vk_pipeline_layout, pipeline->vk_render_pass,
|
||||
meta_copy_image_ops->vk_pipeline_layout,
|
||||
has_depth_target ? VK_FORMAT_UNDEFINED : key->format->vk_format,
|
||||
has_depth_target ? key->format->vk_format : VK_FORMAT_UNDEFINED,
|
||||
key->format->vk_aspect_mask,
|
||||
VK_NULL_HANDLE, vk_module, key->sample_count,
|
||||
has_depth_target ? &ds_state : NULL, has_depth_target ? NULL : &cb_state,
|
||||
&spec_info, &pipeline->vk_pipeline)) < 0)
|
||||
{
|
||||
VK_CALL(vkDestroyRenderPass(meta_ops->device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
pipeline->key = *key;
|
||||
return S_OK;
|
||||
|
@ -866,7 +765,6 @@ HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
|
||||
if (!memcmp(key, &pipeline->key, sizeof(*key)))
|
||||
{
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
pthread_mutex_unlock(&meta_copy_image_ops->mutex);
|
||||
return S_OK;
|
||||
|
@ -888,7 +786,6 @@ HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
return hr;
|
||||
}
|
||||
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
|
||||
pthread_mutex_unlock(&meta_copy_image_ops->mutex);
|
||||
|
@ -1049,7 +946,6 @@ void vkd3d_swapchain_ops_cleanup(struct vkd3d_swapchain_ops *meta_swapchain_ops,
|
|||
{
|
||||
struct vkd3d_swapchain_pipeline *pipeline = &meta_swapchain_ops->pipelines[i];
|
||||
|
||||
VK_CALL(vkDestroyRenderPass(device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
|
||||
}
|
||||
|
||||
|
@ -1090,7 +986,6 @@ HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
|
||||
if (!memcmp(key, &pipeline->key, sizeof(*key)))
|
||||
{
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
pthread_mutex_unlock(&meta_swapchain_ops->mutex);
|
||||
return S_OK;
|
||||
|
@ -1112,7 +1007,6 @@ HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
return hr;
|
||||
}
|
||||
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
|
||||
pthread_mutex_unlock(&meta_swapchain_ops->mutex);
|
||||
|
@ -1323,6 +1217,144 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
info->data_size = predicate_ops->data_sizes[command_type];
|
||||
}
|
||||
|
||||
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
VkPushConstantRange push_constant_range;
|
||||
VkResult vr;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL)))
|
||||
return hresult_from_errno(rc);
|
||||
|
||||
push_constant_range.offset = 0;
|
||||
push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args);
|
||||
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||
&push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0)
|
||||
{
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
meta_indirect_ops->pipelines_count = 0;
|
||||
meta_indirect_ops->pipelines_size = 0;
|
||||
meta_indirect_ops->pipelines = NULL;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
struct vkd3d_meta_execute_indirect_spec_constant_data
|
||||
{
|
||||
struct vkd3d_shader_debug_ring_spec_constants constants;
|
||||
uint32_t workgroup_size_x;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info)
|
||||
{
|
||||
struct vkd3d_meta_execute_indirect_spec_constant_data execute_indirect_spec_constants;
|
||||
VkSpecializationMapEntry map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES + 1];
|
||||
struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect;
|
||||
struct vkd3d_shader_debug_ring_spec_info debug_ring_info;
|
||||
|
||||
VkSpecializationInfo spec;
|
||||
HRESULT hr = S_OK;
|
||||
VkResult vr;
|
||||
bool debug;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
{
|
||||
if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count)
|
||||
{
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
debug = meta_ops->device->debug_ring.active;
|
||||
|
||||
/* If we have debug ring, we can dump indirect command buffer data to the ring as well.
|
||||
* Vital for debugging broken execute indirect data with templates. */
|
||||
if (debug)
|
||||
{
|
||||
vkd3d_shader_debug_ring_init_spec_constant(meta_ops->device, &debug_ring_info,
|
||||
0 /* Reserve this hash for internal debug streams. */);
|
||||
|
||||
memset(&execute_indirect_spec_constants, 0, sizeof(execute_indirect_spec_constants));
|
||||
execute_indirect_spec_constants.constants = debug_ring_info.constants;
|
||||
execute_indirect_spec_constants.workgroup_size_x = patch_command_count;
|
||||
|
||||
memcpy(map_entry, debug_ring_info.map_entries, sizeof(debug_ring_info.map_entries));
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].constantID = 4;
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].offset =
|
||||
offsetof(struct vkd3d_meta_execute_indirect_spec_constant_data, workgroup_size_x);
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].size = sizeof(patch_command_count);
|
||||
|
||||
spec.pMapEntries = map_entry;
|
||||
spec.pData = &execute_indirect_spec_constants;
|
||||
spec.mapEntryCount = ARRAY_SIZE(map_entry);
|
||||
spec.dataSize = sizeof(execute_indirect_spec_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
map_entry[0].constantID = 0;
|
||||
map_entry[0].offset = 0;
|
||||
map_entry[0].size = sizeof(patch_command_count);
|
||||
|
||||
spec.pMapEntries = map_entry;
|
||||
spec.pData = &patch_command_count;
|
||||
spec.mapEntryCount = 1;
|
||||
spec.dataSize = sizeof(patch_command_count);
|
||||
}
|
||||
|
||||
vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size,
|
||||
meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines));
|
||||
|
||||
meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count;
|
||||
|
||||
vr = vkd3d_meta_create_compute_pipeline(meta_ops->device,
|
||||
debug ? sizeof(cs_execute_indirect_patch_debug_ring) : sizeof(cs_execute_indirect_patch),
|
||||
debug ? cs_execute_indirect_patch_debug_ring : cs_execute_indirect_patch,
|
||||
meta_indirect_ops->vk_pipeline_layout, &spec,
|
||||
&meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline);
|
||||
|
||||
if (vr)
|
||||
{
|
||||
hr = hresult_from_vk_result(vr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline;
|
||||
meta_indirect_ops->pipelines_count++;
|
||||
|
||||
out:
|
||||
pthread_mutex_unlock(&meta_indirect_ops->mutex);
|
||||
return hr;
|
||||
}
|
||||
|
||||
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL));
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
}
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
@ -1348,8 +1380,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
|
|||
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
|
||||
goto fail_predicate_ops;
|
||||
|
||||
if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device)))
|
||||
goto fail_execute_indirect_ops;
|
||||
|
||||
return S_OK;
|
||||
|
||||
fail_execute_indirect_ops:
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
fail_predicate_ops:
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
fail_query_ops:
|
||||
|
@ -1366,6 +1403,7 @@ fail_common:
|
|||
|
||||
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device);
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -42,6 +42,7 @@ static vkd3d_shader_hash_t renderdoc_capture_shader_hash;
|
|||
static uint32_t *renderdoc_capture_counts;
|
||||
static size_t renderdoc_capture_counts_count;
|
||||
static bool vkd3d_renderdoc_is_active;
|
||||
static bool vkd3d_renderdoc_global_capture;
|
||||
|
||||
static void vkd3d_renderdoc_init_capture_count_list(const char *env)
|
||||
{
|
||||
|
@ -49,6 +50,13 @@ static void vkd3d_renderdoc_init_capture_count_list(const char *env)
|
|||
uint32_t count;
|
||||
char *endp;
|
||||
|
||||
if (strcmp(env, "-1") == 0)
|
||||
{
|
||||
INFO("Doing one big capture of the entire lifetime of a device.\n");
|
||||
vkd3d_renderdoc_global_capture = true;
|
||||
return;
|
||||
}
|
||||
|
||||
while (*env != '\0')
|
||||
{
|
||||
errno = 0;
|
||||
|
@ -92,9 +100,9 @@ static bool vkd3d_renderdoc_enable_submit_counter(uint32_t counter)
|
|||
|
||||
static void vkd3d_renderdoc_init_once(void)
|
||||
{
|
||||
char counts[VKD3D_PATH_MAX];
|
||||
pRENDERDOC_GetAPI get_api;
|
||||
const char *counts;
|
||||
const char *env;
|
||||
char env[VKD3D_PATH_MAX];
|
||||
|
||||
#ifdef _WIN32
|
||||
HMODULE renderdoc;
|
||||
|
@ -104,19 +112,19 @@ static void vkd3d_renderdoc_init_once(void)
|
|||
void *fn_ptr;
|
||||
#endif
|
||||
|
||||
env = getenv("VKD3D_AUTO_CAPTURE_SHADER");
|
||||
counts = getenv("VKD3D_AUTO_CAPTURE_COUNTS");
|
||||
vkd3d_get_env_var("VKD3D_AUTO_CAPTURE_SHADER", env, sizeof(env));
|
||||
vkd3d_get_env_var("VKD3D_AUTO_CAPTURE_COUNTS", counts, sizeof(counts));
|
||||
|
||||
if (!env && !counts)
|
||||
if (strlen(env) == 0 && strlen(counts) == 0)
|
||||
{
|
||||
WARN("VKD3D_AUTO_CAPTURE_SHADER or VKD3D_AUTO_CAPTURE_COUNTS is not set, RenderDoc auto capture will not be enabled.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!counts)
|
||||
if (strlen(counts) == 0)
|
||||
WARN("VKD3D_AUTO_CAPTURE_COUNTS is not set, will assume that only the first submission is captured.\n");
|
||||
|
||||
if (env)
|
||||
if (strlen(env) > 0)
|
||||
renderdoc_capture_shader_hash = strtoull(env, NULL, 16);
|
||||
|
||||
if (renderdoc_capture_shader_hash)
|
||||
|
@ -124,7 +132,7 @@ static void vkd3d_renderdoc_init_once(void)
|
|||
else
|
||||
INFO("Enabling RenderDoc capture for all shaders.\n");
|
||||
|
||||
if (counts)
|
||||
if (strlen(counts) > 0)
|
||||
vkd3d_renderdoc_init_capture_count_list(counts);
|
||||
else
|
||||
{
|
||||
|
@ -180,6 +188,11 @@ bool vkd3d_renderdoc_active(void)
|
|||
return vkd3d_renderdoc_is_active;
|
||||
}
|
||||
|
||||
bool vkd3d_renderdoc_global_capture_enabled(void)
|
||||
{
|
||||
return vkd3d_renderdoc_global_capture;
|
||||
}
|
||||
|
||||
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash)
|
||||
{
|
||||
return (renderdoc_capture_shader_hash == hash) || (renderdoc_capture_shader_hash == 0);
|
||||
|
@ -190,9 +203,12 @@ bool vkd3d_renderdoc_begin_capture(void *instance)
|
|||
static uint32_t overall_counter;
|
||||
uint32_t counter;
|
||||
|
||||
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
|
||||
if (!vkd3d_renderdoc_enable_submit_counter(counter))
|
||||
return false;
|
||||
if (!vkd3d_renderdoc_global_capture)
|
||||
{
|
||||
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
|
||||
if (!vkd3d_renderdoc_enable_submit_counter(counter))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (renderdoc_api)
|
||||
renderdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL);
|
||||
|
@ -215,11 +231,14 @@ void vkd3d_renderdoc_command_list_check_capture(struct d3d12_command_list *list,
|
|||
{
|
||||
unsigned int i;
|
||||
|
||||
if (vkd3d_renderdoc_global_capture_enabled())
|
||||
return;
|
||||
|
||||
if (vkd3d_renderdoc_active() && state)
|
||||
{
|
||||
if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
|
||||
{
|
||||
if (vkd3d_renderdoc_should_capture_shader_hash(state->compute.meta.hash))
|
||||
if (vkd3d_renderdoc_should_capture_shader_hash(state->compute.code.meta.hash))
|
||||
{
|
||||
WARN("Triggering RenderDoc capture for this command list.\n");
|
||||
list->debug_capture = true;
|
||||
|
@ -229,7 +248,7 @@ void vkd3d_renderdoc_command_list_check_capture(struct d3d12_command_list *list,
|
|||
{
|
||||
for (i = 0; i < state->graphics.stage_count; i++)
|
||||
{
|
||||
if (vkd3d_renderdoc_should_capture_shader_hash(state->graphics.stage_meta[i].hash))
|
||||
if (vkd3d_renderdoc_should_capture_shader_hash(state->graphics.code[i].meta.hash))
|
||||
{
|
||||
WARN("Triggering RenderDoc capture for this command list.\n");
|
||||
list->debug_capture = true;
|
||||
|
@ -246,6 +265,9 @@ bool vkd3d_renderdoc_command_queue_begin_capture(struct d3d12_command_queue *com
|
|||
VkDebugUtilsLabelEXT capture_label;
|
||||
bool debug_capture;
|
||||
|
||||
if (vkd3d_renderdoc_global_capture_enabled())
|
||||
return false;
|
||||
|
||||
debug_capture = vkd3d_renderdoc_begin_capture(command_queue->device->vkd3d_instance->vk_instance);
|
||||
if (debug_capture && !vkd3d_renderdoc_loaded_api())
|
||||
{
|
||||
|
@ -273,6 +295,9 @@ void vkd3d_renderdoc_command_queue_end_capture(struct d3d12_command_queue *comma
|
|||
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
|
||||
VkDebugUtilsLabelEXT capture_label;
|
||||
|
||||
if (vkd3d_renderdoc_global_capture_enabled())
|
||||
return;
|
||||
|
||||
if (!vkd3d_renderdoc_loaded_api())
|
||||
{
|
||||
/* Magic fallback which lets us bridge the Wine barrier over to Linux RenderDoc. */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,67 @@
|
|||
#version 450
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference_uvec2 : require
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
|
||||
struct Command
|
||||
{
|
||||
uint type;
|
||||
uint src_offset;
|
||||
uint dst_offset;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
|
||||
{
|
||||
Command commands[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
Commands commands_va;
|
||||
SrcBuffer src_buffer_va;
|
||||
DstBuffer dst_buffer_va;
|
||||
uvec2 indirect_count_va;
|
||||
IndirectCountWrite dst_indirect_count_va;
|
||||
uint src_stride;
|
||||
uint dst_stride;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
|
||||
|
||||
uint draw_id = gl_WorkGroupID.x;
|
||||
uint max_draws = gl_NumWorkGroups.x;
|
||||
|
||||
if (any(notEqual(indirect_count_va, uvec2(0))))
|
||||
{
|
||||
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
|
||||
if (gl_WorkGroupID.x == 0u)
|
||||
dst_indirect_count_va.count = max_draws;
|
||||
}
|
||||
|
||||
if (draw_id < max_draws)
|
||||
{
|
||||
uint src_offset = src_stride * draw_id + cmd.src_offset;
|
||||
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
|
||||
uint src_value = src_buffer_va.values[src_offset];
|
||||
dst_buffer_va.values[dst_offset] = src_value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
#version 450
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference_uvec2 : require
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#include "../../../include/shader-debug/debug_channel.h"
|
||||
|
||||
layout(local_size_x_id = 4) in;
|
||||
|
||||
struct Command
|
||||
{
|
||||
uint type;
|
||||
uint src_offset;
|
||||
uint dst_offset;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
|
||||
{
|
||||
Command commands[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
Commands commands_va;
|
||||
SrcBuffer src_buffer_va;
|
||||
DstBuffer dst_buffer_va;
|
||||
uvec2 indirect_count_va;
|
||||
IndirectCountWrite dst_indirect_count_va;
|
||||
uint src_stride;
|
||||
uint dst_stride;
|
||||
|
||||
// Debug metadata here
|
||||
uint debug_tag;
|
||||
uint implicit_instance;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
if (debug_tag != 0u)
|
||||
DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3(debug_tag, gl_WorkGroupID.x, gl_LocalInvocationIndex), implicit_instance);
|
||||
|
||||
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
|
||||
|
||||
uint draw_id = gl_WorkGroupID.x;
|
||||
uint max_draws = gl_NumWorkGroups.x;
|
||||
if (any(notEqual(indirect_count_va, uvec2(0))))
|
||||
{
|
||||
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
|
||||
if (gl_WorkGroupID.x == 0u)
|
||||
dst_indirect_count_va.count = max_draws;
|
||||
}
|
||||
|
||||
if (debug_tag != 0u && gl_WorkGroupID.x == 0)
|
||||
DEBUG_CHANNEL_MSG_UNIFORM(int(max_draws), int(gl_NumWorkGroups.x));
|
||||
|
||||
if (draw_id < max_draws)
|
||||
{
|
||||
uint src_offset = src_stride * draw_id + cmd.src_offset;
|
||||
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
|
||||
|
||||
uint src_value = src_buffer_va.values[src_offset];
|
||||
|
||||
if (debug_tag != 0u)
|
||||
DEBUG_CHANNEL_MSG(cmd.type, dst_offset, src_offset, src_value);
|
||||
|
||||
dst_buffer_va.values[dst_offset] = src_value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
#version 450
|
||||
|
||||
#extension GL_EXT_samplerless_texture_functions : enable
|
||||
#extension GL_ARB_shader_stencil_export : enable
|
||||
|
||||
#define MODE_1D 0
|
||||
#define MODE_2D 1
|
||||
#define MODE_MS 2
|
||||
|
||||
layout(constant_id = 0) const uint c_mode = MODE_2D;
|
||||
|
||||
layout(binding = 0) uniform utexture1DArray tex_1d;
|
||||
layout(binding = 0) uniform utexture2DArray tex_2d;
|
||||
layout(binding = 0) uniform utexture2DMSArray tex_ms;
|
||||
|
||||
layout(push_constant)
|
||||
uniform u_info_t {
|
||||
ivec2 offset;
|
||||
} u_info;
|
||||
|
||||
void main() {
|
||||
ivec3 coord = ivec3(u_info.offset + ivec2(gl_FragCoord.xy), gl_Layer);
|
||||
uint value;
|
||||
if (c_mode == MODE_1D) value = texelFetch(tex_1d, coord.xz, 0).r;
|
||||
if (c_mode == MODE_2D) value = texelFetch(tex_2d, coord, 0).r;
|
||||
if (c_mode == MODE_MS) value = texelFetch(tex_ms, coord, gl_SampleID).r;
|
||||
gl_FragStencilRefARB = int(value);
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2021 Derek Lesho for Codeweavers
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "vkd3d_private.h"
|
||||
|
||||
#include "winioctl.h"
|
||||
|
||||
#define IOCTL_SHARED_GPU_RESOURCE_SET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 4, METHOD_BUFFERED, FILE_WRITE_ACCESS)
|
||||
#define IOCTL_SHARED_GPU_RESOURCE_GET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 5, METHOD_BUFFERED, FILE_READ_ACCESS)
|
||||
#define IOCTL_SHARED_GPU_RESOURCE_OPEN CTL_CODE(FILE_DEVICE_VIDEO, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS)
|
||||
|
||||
bool vkd3d_set_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size)
|
||||
{
|
||||
DWORD ret_size;
|
||||
|
||||
return DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_SET_METADATA, buf, buf_size, NULL, 0, &ret_size, NULL);
|
||||
}
|
||||
|
||||
bool vkd3d_get_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size, uint32_t *metadata_size)
|
||||
{
|
||||
DWORD ret_size;
|
||||
|
||||
bool ret = DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_GET_METADATA, NULL, 0, buf, buf_size, &ret_size, NULL);
|
||||
|
||||
if (metadata_size)
|
||||
*metadata_size = ret_size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HANDLE vkd3d_open_kmt_handle(HANDLE kmt_handle)
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned int kmt_handle;
|
||||
/* the following parameter represents a larger sized string for a dynamically allocated struct for use when opening an object by name */
|
||||
WCHAR name[1];
|
||||
} shared_resource_open;
|
||||
|
||||
HANDLE nt_handle = CreateFileA("\\\\.\\SharedGpuResource", GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (nt_handle == INVALID_HANDLE_VALUE)
|
||||
return nt_handle;
|
||||
|
||||
shared_resource_open.kmt_handle = (ULONG_PTR)kmt_handle;
|
||||
shared_resource_open.name[0] = 0;
|
||||
if (!DeviceIoControl(nt_handle, IOCTL_SHARED_GPU_RESOURCE_OPEN, &shared_resource_open, sizeof(shared_resource_open), NULL, 0, NULL, NULL))
|
||||
{
|
||||
CloseHandle(nt_handle);
|
||||
return INVALID_HANDLE_VALUE;
|
||||
}
|
||||
return nt_handle;
|
||||
}
|
2006
libs/vkd3d/state.c
2006
libs/vkd3d/state.c
File diff suppressed because it is too large
Load Diff
|
@ -180,7 +180,6 @@ struct d3d12_swapchain
|
|||
VkImage vk_images[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkImage vk_swapchain_images[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkImageView vk_swapchain_image_views[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkFramebuffer vk_framebuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkCommandBuffer vk_cmd_buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
bool vk_acquire_semaphores_signaled[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkSemaphore vk_acquire_semaphores[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
|
@ -227,7 +226,7 @@ static inline const struct vkd3d_vk_device_procs* d3d12_swapchain_procs(struct d
|
|||
return &swapchain->command_queue->device->vk_procs;
|
||||
}
|
||||
|
||||
static inline struct ID3D12Device6* d3d12_swapchain_device_iface(struct d3d12_swapchain* swapchain)
|
||||
static inline struct ID3D12Device9* d3d12_swapchain_device_iface(struct d3d12_swapchain* swapchain)
|
||||
{
|
||||
return &swapchain->command_queue->device->ID3D12Device_iface;
|
||||
}
|
||||
|
@ -805,11 +804,16 @@ static BOOL d3d12_swapchain_is_present_mode_supported(struct d3d12_swapchain *sw
|
|||
return supported;
|
||||
}
|
||||
|
||||
static BOOL d3d12_swapchain_has_user_images(struct d3d12_swapchain *swapchain)
|
||||
static bool d3d12_swapchain_has_user_images(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
return !!swapchain->vk_images[0];
|
||||
}
|
||||
|
||||
static bool d3d12_swapchain_has_user_descriptors(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
return swapchain->descriptors.pool != VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_get_user_graphics_pipeline(struct d3d12_swapchain *swapchain, VkFormat format)
|
||||
{
|
||||
struct d3d12_device *device = d3d12_swapchain_device(swapchain);
|
||||
|
@ -817,8 +821,6 @@ static HRESULT d3d12_swapchain_get_user_graphics_pipeline(struct d3d12_swapchain
|
|||
HRESULT hr;
|
||||
|
||||
key.bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
key.load_op = swapchain->desc.Scaling == DXGI_SCALING_NONE ?
|
||||
VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
key.filter = swapchain->desc.Scaling == DXGI_SCALING_NONE ? VK_FILTER_NEAREST : VK_FILTER_LINEAR;
|
||||
key.format = format;
|
||||
|
||||
|
@ -927,20 +929,18 @@ static HRESULT d3d12_swapchain_create_user_descriptors(struct d3d12_swapchain *s
|
|||
static HRESULT d3d12_swapchain_create_user_buffers(struct d3d12_swapchain *swapchain, VkFormat vk_format)
|
||||
{
|
||||
D3D12_HEAP_PROPERTIES heap_properties;
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
D3D12_RESOURCE_DESC1 resource_desc;
|
||||
struct d3d12_resource* object;
|
||||
HRESULT hr;
|
||||
UINT i;
|
||||
|
||||
if (d3d12_swapchain_has_user_images(swapchain))
|
||||
return S_OK;
|
||||
|
||||
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
heap_properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
heap_properties.CreationNodeMask = 1;
|
||||
heap_properties.VisibleNodeMask = 1;
|
||||
|
||||
memset(&resource_desc, 0, sizeof(resource_desc));
|
||||
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
resource_desc.Alignment = 0;
|
||||
resource_desc.Width = swapchain->desc.Width;
|
||||
|
@ -953,31 +953,38 @@ static HRESULT d3d12_swapchain_create_user_buffers(struct d3d12_swapchain *swapc
|
|||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||
|
||||
for (i = 0; i < swapchain->desc.BufferCount; i++)
|
||||
if (!d3d12_swapchain_has_user_images(swapchain))
|
||||
{
|
||||
if (FAILED(hr = d3d12_resource_create_committed(d3d12_swapchain_device(swapchain),
|
||||
&resource_desc, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_RESOURCE_STATE_PRESENT, NULL, &object)))
|
||||
for (i = 0; i < swapchain->desc.BufferCount; i++)
|
||||
{
|
||||
ERR("Failed to create image for swapchain buffer");
|
||||
return hr;
|
||||
if (FAILED(hr = d3d12_resource_create_committed(d3d12_swapchain_device(swapchain),
|
||||
&resource_desc, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_RESOURCE_STATE_PRESENT, NULL, NULL, &object)))
|
||||
{
|
||||
ERR("Failed to create image for swapchain buffer");
|
||||
return hr;
|
||||
}
|
||||
|
||||
swapchain->vk_images[i] = object->res.vk_image;
|
||||
swapchain->buffers[i] = (ID3D12Resource *)&object->ID3D12Resource_iface;
|
||||
|
||||
vkd3d_resource_incref(swapchain->buffers[i]);
|
||||
ID3D12Resource_Release(swapchain->buffers[i]);
|
||||
|
||||
/* It is technically possible to just start presenting images without rendering to them.
|
||||
* The initial resource state for swapchain images is PRESENT.
|
||||
* Since presentable images are dedicated allocations, we can safely queue a transition into common state
|
||||
* right away. We will also drain the queue when we release the images, so there is no risk of early delete. */
|
||||
vkd3d_enqueue_initial_transition(&swapchain->command_queue->ID3D12CommandQueue_iface, swapchain->buffers[i]);
|
||||
}
|
||||
|
||||
swapchain->vk_images[i] = object->res.vk_image;
|
||||
swapchain->buffers[i] = (ID3D12Resource *)&object->ID3D12Resource_iface;
|
||||
|
||||
vkd3d_resource_incref(swapchain->buffers[i]);
|
||||
ID3D12Resource_Release(swapchain->buffers[i]);
|
||||
|
||||
/* It is technically possible to just start presenting images without rendering to them.
|
||||
* The initial resource state for swapchain images is PRESENT.
|
||||
* Since presentable images are dedicated allocations, we can safely queue a transition into common state
|
||||
* right away. We will also drain the queue when we release the images, so there is no risk of early delete. */
|
||||
vkd3d_enqueue_initial_transition(&swapchain->command_queue->ID3D12CommandQueue_iface, swapchain->buffers[i]);
|
||||
}
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_user_descriptors(swapchain, vk_format)))
|
||||
return hr;
|
||||
/* If we don't have a swapchain pipeline layout yet (0x0 surface on first frame),
|
||||
* we cannot allocate any descriptors yet. We'll create the descriptors eventually
|
||||
* when we get a proper swapchain working. */
|
||||
if (!d3d12_swapchain_has_user_descriptors(swapchain) && swapchain->pipeline.vk_set_layout)
|
||||
if (FAILED(hr = d3d12_swapchain_create_user_descriptors(swapchain, vk_format)))
|
||||
return hr;
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
@ -986,16 +993,13 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
VkCommandBuffer vk_cmd_buffer, unsigned int dst_index, unsigned int src_index)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkSubpassBeginInfoKHR subpass_begin_info;
|
||||
VkSubpassEndInfoKHR subpass_end_info;
|
||||
VkRenderingAttachmentInfoKHR attachment_info;
|
||||
VkCommandBufferBeginInfo begin_info;
|
||||
VkRenderPassBeginInfo rp_info;
|
||||
VkClearValue clear_value;
|
||||
VkImageMemoryBarrier image_barrier;
|
||||
VkRenderingInfoKHR rendering_info;
|
||||
VkViewport viewport;
|
||||
VkResult vr;
|
||||
|
||||
memset(&clear_value, 0, sizeof(clear_value));
|
||||
|
||||
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
begin_info.pNext = NULL;
|
||||
begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
@ -1007,39 +1011,28 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
return vr;
|
||||
}
|
||||
|
||||
rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
rp_info.pNext = NULL;
|
||||
rp_info.renderPass = swapchain->pipeline.vk_render_pass;
|
||||
rp_info.framebuffer = swapchain->vk_framebuffers[dst_index];
|
||||
|
||||
rp_info.renderArea.offset.x = 0;
|
||||
rp_info.renderArea.offset.y = 0;
|
||||
rp_info.renderArea.extent.width = swapchain->vk_swapchain_width;
|
||||
rp_info.renderArea.extent.height = swapchain->vk_swapchain_height;
|
||||
|
||||
subpass_begin_info.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR;
|
||||
subpass_begin_info.pNext = NULL;
|
||||
subpass_begin_info.contents = VK_SUBPASS_CONTENTS_INLINE;
|
||||
|
||||
subpass_end_info.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR;
|
||||
subpass_end_info.pNext = NULL;
|
||||
memset(&attachment_info, 0, sizeof(attachment_info));
|
||||
attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
|
||||
attachment_info.imageView = swapchain->vk_swapchain_image_views[dst_index];
|
||||
attachment_info.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
|
||||
if (swapchain->desc.Scaling == DXGI_SCALING_NONE)
|
||||
{
|
||||
rp_info.clearValueCount = 1;
|
||||
rp_info.pClearValues = &clear_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
rp_info.clearValueCount = 0;
|
||||
rp_info.pClearValues = NULL;
|
||||
}
|
||||
attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
|
||||
memset(&rendering_info, 0, sizeof(rendering_info));
|
||||
rendering_info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR;
|
||||
rendering_info.renderArea.extent.width = swapchain->vk_swapchain_width;
|
||||
rendering_info.renderArea.extent.height = swapchain->vk_swapchain_height;
|
||||
rendering_info.layerCount = 1;
|
||||
rendering_info.colorAttachmentCount = 1;
|
||||
rendering_info.pColorAttachments = &attachment_info;
|
||||
|
||||
viewport.x = viewport.y = 0.0f;
|
||||
viewport.minDepth = 0.0f;
|
||||
viewport.maxDepth = 1.0f;
|
||||
|
||||
VK_CALL(vkCmdBeginRenderPass2KHR(vk_cmd_buffer, &rp_info, &subpass_begin_info));
|
||||
if (swapchain->desc.Scaling == DXGI_SCALING_NONE)
|
||||
{
|
||||
viewport.width = (float)swapchain->desc.Width;
|
||||
|
@ -1051,15 +1044,48 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
viewport.height = swapchain->vk_swapchain_height;
|
||||
}
|
||||
|
||||
VK_CALL(vkCmdSetViewport(vk_cmd_buffer, 0, 1, &viewport));
|
||||
VK_CALL(vkCmdSetScissor(vk_cmd_buffer, 0, 1, &rp_info.renderArea));
|
||||
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
image_barrier.pNext = NULL;
|
||||
image_barrier.srcAccessMask = 0;
|
||||
image_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
image_barrier.image = swapchain->vk_swapchain_images[dst_index];
|
||||
image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
image_barrier.subresourceRange.baseMipLevel = 0;
|
||||
image_barrier.subresourceRange.levelCount = 1;
|
||||
image_barrier.subresourceRange.baseArrayLayer = 0;
|
||||
image_barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
if (attachment_info.loadOp != VK_ATTACHMENT_LOAD_OP_DONT_CARE)
|
||||
image_barrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
|
||||
|
||||
VK_CALL(vkCmdPipelineBarrier(vk_cmd_buffer,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
0, 0, NULL, 0, NULL, 1, &image_barrier));
|
||||
|
||||
VK_CALL(vkCmdBeginRenderingKHR(vk_cmd_buffer, &rendering_info));
|
||||
VK_CALL(vkCmdSetViewport(vk_cmd_buffer, 0, 1, &viewport));
|
||||
VK_CALL(vkCmdSetScissor(vk_cmd_buffer, 0, 1, &rendering_info.renderArea));
|
||||
VK_CALL(vkCmdBindPipeline(vk_cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, swapchain->pipeline.vk_pipeline));
|
||||
VK_CALL(vkCmdBindDescriptorSets(vk_cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
swapchain->pipeline.vk_pipeline_layout, 0, 1, &swapchain->descriptors.sets[src_index],
|
||||
0, NULL));
|
||||
VK_CALL(vkCmdDraw(vk_cmd_buffer, 3, 1, 0, 0));
|
||||
VK_CALL(vkCmdEndRenderPass2KHR(vk_cmd_buffer, &subpass_end_info));
|
||||
VK_CALL(vkCmdEndRenderingKHR(vk_cmd_buffer));
|
||||
|
||||
image_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
image_barrier.dstAccessMask = 0;
|
||||
image_barrier.oldLayout = image_barrier.newLayout;
|
||||
image_barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
|
||||
VK_CALL(vkCmdPipelineBarrier(vk_cmd_buffer,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
0, 0, NULL, 0, NULL, 1, &image_barrier));
|
||||
|
||||
if ((vr = VK_CALL(vkEndCommandBuffer(vk_cmd_buffer))) < 0)
|
||||
WARN("Failed to end command buffer, vr %d.\n", vr);
|
||||
|
@ -1067,7 +1093,7 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
return vr;
|
||||
}
|
||||
|
||||
static void d3d12_swapchain_destroy_framebuffers(struct d3d12_swapchain *swapchain)
|
||||
static void d3d12_swapchain_destroy_views(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkDevice vk_device = d3d12_swapchain_device(swapchain)->vk_device;
|
||||
|
@ -1076,30 +1102,18 @@ static void d3d12_swapchain_destroy_framebuffers(struct d3d12_swapchain *swapcha
|
|||
for (i = 0; i < swapchain->buffer_count; i++)
|
||||
{
|
||||
VK_CALL(vkDestroyImageView(vk_device, swapchain->vk_swapchain_image_views[i], NULL));
|
||||
VK_CALL(vkDestroyFramebuffer(vk_device, swapchain->vk_framebuffers[i], NULL));
|
||||
swapchain->vk_swapchain_image_views[i] = VK_NULL_HANDLE;
|
||||
swapchain->vk_framebuffers[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_create_framebuffers(struct d3d12_swapchain *swapchain, VkFormat format)
|
||||
static HRESULT d3d12_swapchain_create_views(struct d3d12_swapchain *swapchain, VkFormat format)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkDevice vk_device = d3d12_swapchain_device(swapchain)->vk_device;
|
||||
VkImageViewCreateInfo image_view_info;
|
||||
VkFramebufferCreateInfo fb_info;
|
||||
unsigned int i;
|
||||
VkResult vr;
|
||||
|
||||
fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
||||
fb_info.pNext = NULL;
|
||||
fb_info.flags = 0;
|
||||
fb_info.renderPass = swapchain->pipeline.vk_render_pass;
|
||||
fb_info.width = swapchain->vk_swapchain_width;
|
||||
fb_info.height = swapchain->vk_swapchain_height;
|
||||
fb_info.layers = 1;
|
||||
fb_info.attachmentCount = 1;
|
||||
|
||||
image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
image_view_info.pNext = NULL;
|
||||
image_view_info.flags = 0;
|
||||
|
@ -1120,9 +1134,6 @@ static HRESULT d3d12_swapchain_create_framebuffers(struct d3d12_swapchain *swapc
|
|||
image_view_info.image = swapchain->vk_swapchain_images[i];
|
||||
if ((vr = VK_CALL(vkCreateImageView(vk_device, &image_view_info, NULL, &swapchain->vk_swapchain_image_views[i]))))
|
||||
return hresult_from_vk_result(vr);
|
||||
fb_info.pAttachments = &swapchain->vk_swapchain_image_views[i];
|
||||
if ((vr = VK_CALL(vkCreateFramebuffer(vk_device, &fb_info, NULL, &swapchain->vk_framebuffers[i]))))
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
|
@ -1224,8 +1235,6 @@ static HRESULT d3d12_swapchain_create_buffers(struct d3d12_swapchain *swapchain,
|
|||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
d3d12_swapchain_destroy_framebuffers(swapchain);
|
||||
|
||||
if ((vr = VK_CALL(vkGetSwapchainImagesKHR(vk_device, vk_swapchain, &image_count, NULL))) < 0)
|
||||
{
|
||||
WARN("Failed to get Vulkan swapchain images, vr %d.\n", vr);
|
||||
|
@ -1254,7 +1263,7 @@ static HRESULT d3d12_swapchain_create_buffers(struct d3d12_swapchain *swapchain,
|
|||
|
||||
if (queue_desc.Type == D3D12_COMMAND_LIST_TYPE_DIRECT)
|
||||
{
|
||||
if (FAILED(hr = d3d12_swapchain_create_framebuffers(swapchain, vk_swapchain_format)))
|
||||
if (FAILED(hr = d3d12_swapchain_create_views(swapchain, vk_swapchain_format)))
|
||||
return hr;
|
||||
}
|
||||
|
||||
|
@ -1300,25 +1309,30 @@ static VkResult d3d12_swapchain_unsignal_acquire_semaphore(struct d3d12_swapchai
|
|||
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))))
|
||||
{
|
||||
ERR("Failed to submit unsignal operation, vr %d\n", vr);
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
goto end;
|
||||
}
|
||||
|
||||
swapchain->vk_acquire_semaphores_signaled[frame_id] = false;
|
||||
|
||||
if (vk_fence)
|
||||
{
|
||||
if ((vr = VK_CALL(vkWaitForFences(swapchain->command_queue->device->vk_device, 1, &vk_fence, VK_TRUE, UINT64_MAX))))
|
||||
ERR("Failed to wait for fences, vr %d\n", vr);
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
}
|
||||
|
||||
end:
|
||||
VK_CALL(vkDestroyFence(vk_device, vk_fence, NULL));
|
||||
return vr;
|
||||
}
|
||||
|
||||
static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, BOOL destroy_user_buffers)
|
||||
static void d3d12_swapchain_destroy_resources(struct d3d12_swapchain *swapchain, bool destroy_user_buffers)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkQueue vk_queue;
|
||||
unsigned int i;
|
||||
VkResult vr;
|
||||
|
||||
if (swapchain->command_queue)
|
||||
{
|
||||
|
@ -1331,7 +1345,8 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
|
|||
if (swapchain->vk_acquire_semaphores_signaled[i])
|
||||
d3d12_swapchain_unsignal_acquire_semaphore(swapchain, vk_queue, i, false);
|
||||
|
||||
VK_CALL(vkQueueWaitIdle(vk_queue));
|
||||
vr = VK_CALL(vkQueueWaitIdle(vk_queue));
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
}
|
||||
|
@ -1341,18 +1356,20 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < swapchain->desc.BufferCount; ++i)
|
||||
{
|
||||
if (swapchain->buffers[i] && destroy_user_buffers)
|
||||
{
|
||||
vkd3d_resource_decref(swapchain->buffers[i]);
|
||||
swapchain->buffers[i] = NULL;
|
||||
swapchain->vk_images[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
if (destroy_user_buffers)
|
||||
{
|
||||
for (i = 0; i < swapchain->desc.BufferCount; ++i)
|
||||
{
|
||||
if (swapchain->buffers[i])
|
||||
{
|
||||
vkd3d_resource_decref(swapchain->buffers[i]);
|
||||
swapchain->buffers[i] = NULL;
|
||||
swapchain->vk_images[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
d3d12_swapchain_destroy_user_descriptors(swapchain);
|
||||
}
|
||||
|
||||
if (swapchain->command_queue && swapchain->command_queue->device->vk_device)
|
||||
{
|
||||
|
@ -1371,6 +1388,8 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
|
|||
VK_CALL(vkDestroyCommandPool(swapchain->command_queue->device->vk_device, swapchain->vk_cmd_pool, NULL));
|
||||
swapchain->vk_cmd_pool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
d3d12_swapchain_destroy_views(swapchain);
|
||||
}
|
||||
|
||||
static bool d3d12_swapchain_has_nonzero_surface_size(struct d3d12_swapchain *swapchain)
|
||||
|
@ -1390,7 +1409,7 @@ static bool d3d12_swapchain_has_nonzero_surface_size(struct d3d12_swapchain *swa
|
|||
return surface_caps.maxImageExtent.width != 0 && surface_caps.maxImageExtent.height != 0;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *swapchain)
|
||||
static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *swapchain, bool force_surface_lost)
|
||||
{
|
||||
VkPhysicalDevice vk_physical_device = d3d12_swapchain_device(swapchain)->vk_physical_device;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
|
@ -1400,6 +1419,8 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
VkFormat vk_format, vk_swapchain_format;
|
||||
unsigned int width, height, image_count;
|
||||
VkSurfaceCapabilitiesKHR surface_caps;
|
||||
unsigned int override_image_count;
|
||||
char count_env[VKD3D_PATH_MAX];
|
||||
VkSwapchainKHR vk_swapchain;
|
||||
VkImageUsageFlags usage;
|
||||
VkResult vr;
|
||||
|
@ -1415,8 +1436,37 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
swapchain->vk_surface, &swapchain->desc, &vk_swapchain_format)))
|
||||
return hr;
|
||||
|
||||
if ((vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device,
|
||||
swapchain->vk_surface, &surface_caps))) < 0)
|
||||
if (force_surface_lost)
|
||||
{
|
||||
/* If we cannot successfully present after 2 attempts, we must assume the swapchain
|
||||
* is in an unstable state with many resizes happening async. Until things stabilize,
|
||||
* force a dummy swapchain for now so that we can make forward progress.
|
||||
* When we don't have a proper swapchain, we will attempt again next present. */
|
||||
vr = VK_ERROR_SURFACE_LOST_KHR;
|
||||
}
|
||||
else
|
||||
{
|
||||
vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device,
|
||||
swapchain->vk_surface, &surface_caps));
|
||||
}
|
||||
|
||||
if (vr == VK_ERROR_SURFACE_LOST_KHR)
|
||||
{
|
||||
/* We already handle the scenario where swapchain is 0x0 and we fallback to pure user
|
||||
* swapchain. Do something similar here. */
|
||||
WARN("Surface is lost, synthesizing a fake surface_caps so we can keep presenting into the aether.\n");
|
||||
memset(&surface_caps, 0, sizeof(surface_caps));
|
||||
surface_caps.minImageCount = 2;
|
||||
surface_caps.currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
surface_caps.maxImageArrayLayers = 1;
|
||||
surface_caps.supportedCompositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
|
||||
surface_caps.supportedUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
surface_caps.supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
vr = VK_SUCCESS;
|
||||
}
|
||||
|
||||
if (vr)
|
||||
{
|
||||
WARN("Failed to get surface capabilities, vr %d.\n", vr);
|
||||
return hresult_from_vk_result(vr);
|
||||
|
@ -1425,6 +1475,15 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
/* Need to account for the front buffer, so + 1 */
|
||||
image_count = swapchain->desc.BufferCount + 1;
|
||||
image_count = max(image_count, surface_caps.minImageCount);
|
||||
|
||||
vkd3d_get_env_var("VKD3D_SWAPCHAIN_IMAGES", count_env, sizeof(count_env));
|
||||
if (strlen(count_env) > 0)
|
||||
{
|
||||
override_image_count = strtoul(count_env, NULL, 0);
|
||||
image_count = max(image_count, override_image_count);
|
||||
INFO("Overriding swapchain images to %u.\n", image_count);
|
||||
}
|
||||
|
||||
if (surface_caps.maxImageCount)
|
||||
image_count = min(image_count, surface_caps.maxImageCount);
|
||||
|
||||
|
@ -1544,23 +1603,12 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
if (FAILED(hr = d3d12_swapchain_create_user_buffers(swapchain, vk_format)))
|
||||
return hr;
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
|
||||
d3d12_swapchain_destroy_framebuffers(swapchain);
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
swapchain->buffer_count = 0;
|
||||
return S_OK;
|
||||
}
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_recreate_vulkan_swapchain(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain)))
|
||||
ERR("Failed to recreate Vulkan swapchain, hr %#x.\n", hr);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
static inline struct d3d12_swapchain *d3d12_swapchain_from_IDXGISwapChain(dxgi_swapchain_iface *iface)
|
||||
{
|
||||
return CONTAINING_RECORD(iface, struct d3d12_swapchain, IDXGISwapChain_iface);
|
||||
|
@ -1606,8 +1654,7 @@ static void d3d12_swapchain_destroy(struct d3d12_swapchain *swapchain)
|
|||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, TRUE);
|
||||
d3d12_swapchain_destroy_framebuffers(swapchain);
|
||||
d3d12_swapchain_destroy_resources(swapchain, true);
|
||||
|
||||
if (swapchain->frame_latency_event)
|
||||
CloseHandle(swapchain->frame_latency_event);
|
||||
|
@ -1706,7 +1753,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_swapchain_GetDevice(dxgi_swapchain_iface
|
|||
|
||||
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
|
||||
|
||||
return ID3D12Device6_QueryInterface(d3d12_swapchain_device_iface(swapchain), iid, device);
|
||||
return ID3D12Device9_QueryInterface(d3d12_swapchain_device_iface(swapchain), iid, device);
|
||||
}
|
||||
|
||||
/* IDXGISwapChain methods */
|
||||
|
@ -1739,9 +1786,9 @@ static HRESULT d3d12_swapchain_set_sync_interval(struct d3d12_swapchain *swapcha
|
|||
return S_OK;
|
||||
}
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
swapchain->present_mode = present_mode;
|
||||
return d3d12_swapchain_recreate_vulkan_swapchain(swapchain);
|
||||
return d3d12_swapchain_create_vulkan_swapchain(swapchain, false);
|
||||
}
|
||||
|
||||
static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain, VkQueue vk_queue)
|
||||
|
@ -1759,6 +1806,11 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
if (swapchain->vk_swapchain == VK_NULL_HANDLE)
|
||||
return VK_SUCCESS;
|
||||
|
||||
/* If we know we're already suboptimal, e.g. observed in present or acquire after present,
|
||||
* just recreate the swapchain right away. */
|
||||
if (swapchain->is_suboptimal)
|
||||
return VK_ERROR_OUT_OF_DATE_KHR;
|
||||
|
||||
if (swapchain->vk_image_index == INVALID_VK_IMAGE_INDEX)
|
||||
{
|
||||
/* If we hit SUBOPTIMAL path last AcquireNextImageKHR, we will have a pending acquire we did not
|
||||
|
@ -1772,13 +1824,15 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->vk_acquire_semaphores[swapchain->frame_id],
|
||||
VK_NULL_HANDLE, &swapchain->vk_image_index));
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (vr >= 0)
|
||||
{
|
||||
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = true;
|
||||
/* If we have observed suboptimal once, guarantees that we keep observing it
|
||||
* until we have recreated the swapchain. */
|
||||
if (swapchain->is_suboptimal)
|
||||
vr = VK_SUBOPTIMAL_KHR;
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
swapchain->is_suboptimal = true;
|
||||
}
|
||||
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
|
@ -1811,6 +1865,7 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
VK_TRUE, UINT64_MAX))))
|
||||
{
|
||||
ERR("Failed to wait for fence.\n");
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
return vr;
|
||||
}
|
||||
|
||||
|
@ -1839,6 +1894,7 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, swapchain->vk_blit_fences[swapchain->vk_image_index]))) < 0)
|
||||
{
|
||||
ERR("Failed to blit swapchain buffer, vr %d.\n", vr);
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
return vr;
|
||||
}
|
||||
|
||||
|
@ -1852,12 +1908,11 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->frame_id = (swapchain->frame_id + 1) % swapchain->buffer_count;
|
||||
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
|
||||
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
swapchain->is_suboptimal = true;
|
||||
|
||||
/* If we have observed suboptimal once, guarantees that we keep observing it
|
||||
* until we have recreated the swapchain. */
|
||||
if (swapchain->is_suboptimal)
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
swapchain->is_suboptimal = true;
|
||||
else if (swapchain->is_suboptimal)
|
||||
vr = VK_SUBOPTIMAL_KHR;
|
||||
|
||||
/* Could get SUBOPTIMAL here. Defer acquiring if we hit that path.
|
||||
|
@ -1870,6 +1925,8 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->vk_acquire_semaphores[swapchain->frame_id], VK_NULL_HANDLE,
|
||||
&swapchain->vk_image_index));
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (vr >= 0)
|
||||
{
|
||||
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = true;
|
||||
|
@ -1888,9 +1945,13 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
|
||||
}
|
||||
}
|
||||
|
||||
/* Not being able to successfully acquire here is okay, we'll defer the acquire to next frame. */
|
||||
vr = VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
return vr;
|
||||
}
|
||||
|
||||
|
@ -1929,15 +1990,17 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
|
|||
return E_FAIL;
|
||||
}
|
||||
|
||||
/* We must have some kind of forward progress here. Keep trying until we exhaust all possible avenues. */
|
||||
vr = d3d12_swapchain_queue_present(swapchain, vk_queue);
|
||||
if (vr == VK_ERROR_OUT_OF_DATE_KHR)
|
||||
if (vr < 0)
|
||||
{
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
|
||||
TRACE("Recreating Vulkan swapchain.\n");
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
|
||||
if (FAILED(hr = d3d12_swapchain_recreate_vulkan_swapchain(swapchain)))
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, false)))
|
||||
return hr;
|
||||
|
||||
if (!(vk_queue = vkd3d_acquire_vk_queue(d3d12_swapchain_queue_iface(swapchain))))
|
||||
|
@ -1947,7 +2010,22 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
|
|||
}
|
||||
|
||||
if ((vr = d3d12_swapchain_queue_present(swapchain, vk_queue)) < 0)
|
||||
ERR("Failed to present after recreating swapchain, vr %d.\n", vr);
|
||||
{
|
||||
ERR("Failed to present after recreating swapchain, vr %d. Attempting fallback swapchain.\n", vr);
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, true)))
|
||||
return hr;
|
||||
|
||||
if (!(vk_queue = vkd3d_acquire_vk_queue(d3d12_swapchain_queue_iface(swapchain))))
|
||||
{
|
||||
ERR("Failed to acquire Vulkan queue.\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
if ((vr = d3d12_swapchain_queue_present(swapchain, vk_queue)) < 0)
|
||||
ERR("Failed to present even after creating dummy swapchain, vr %d. This should not be possible.\n", vr);
|
||||
}
|
||||
}
|
||||
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
|
@ -1967,7 +2045,7 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
|
|||
return hr;
|
||||
}
|
||||
|
||||
if (FAILED(hr = d3d12_fence_set_event_on_completion(unsafe_impl_from_ID3D12Fence(swapchain->frame_latency_fence),
|
||||
if (FAILED(hr = d3d12_fence_set_event_on_completion(impl_from_ID3D12Fence(swapchain->frame_latency_fence),
|
||||
swapchain->frame_number, swapchain->frame_latency_event, VKD3D_WAITING_EVENT_TYPE_SEMAPHORE)))
|
||||
{
|
||||
ERR("Failed to enqueue frame latency event, hr %#x.\n", hr);
|
||||
|
@ -2227,9 +2305,9 @@ static HRESULT d3d12_swapchain_resize_buffers(struct d3d12_swapchain *swapchain,
|
|||
&& desc->Format == new_desc.Format && desc->BufferCount == new_desc.BufferCount)
|
||||
return S_OK;
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, TRUE);
|
||||
d3d12_swapchain_destroy_resources(swapchain, true);
|
||||
swapchain->desc = new_desc;
|
||||
return d3d12_swapchain_recreate_vulkan_swapchain(swapchain);
|
||||
return d3d12_swapchain_create_vulkan_swapchain(swapchain, false);
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_swapchain_ResizeBuffers(dxgi_swapchain_iface *iface,
|
||||
|
@ -2810,7 +2888,7 @@ static HRESULT d3d12_swapchain_init(struct d3d12_swapchain *swapchain, IDXGIFact
|
|||
ID3D12CommandQueue_AddRef(&queue->ID3D12CommandQueue_iface);
|
||||
d3d12_device_add_ref(queue->device);
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain)))
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, false)))
|
||||
{
|
||||
d3d12_swapchain_destroy(swapchain);
|
||||
return hr;
|
||||
|
@ -2827,7 +2905,7 @@ static HRESULT d3d12_swapchain_init(struct d3d12_swapchain *swapchain, IDXGIFact
|
|||
if (swapchain_desc->Flags & DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)
|
||||
swapchain->frame_latency = 1;
|
||||
|
||||
if (FAILED(hr = ID3D12Device6_CreateFence(d3d12_swapchain_device_iface(swapchain), DXGI_MAX_SWAP_CHAIN_BUFFERS,
|
||||
if (FAILED(hr = ID3D12Device9_CreateFence(d3d12_swapchain_device_iface(swapchain), DXGI_MAX_SWAP_CHAIN_BUFFERS,
|
||||
0, &IID_ID3D12Fence, (void **)&swapchain->frame_latency_fence)))
|
||||
{
|
||||
WARN("Failed to create frame latency fence, hr %#x.\n", hr);
|
||||
|
|
|
@ -153,133 +153,258 @@ static const struct vkd3d_format vkd3d_depth_stencil_formats[] =
|
|||
#undef SINT
|
||||
#undef UINT
|
||||
|
||||
static const struct vkd3d_format_compatibility_info
|
||||
static const struct dxgi_format_compatibility_list
|
||||
{
|
||||
DXGI_FORMAT format;
|
||||
DXGI_FORMAT typeless_format;
|
||||
DXGI_FORMAT image_format;
|
||||
DXGI_FORMAT view_formats[VKD3D_MAX_COMPATIBLE_FORMAT_COUNT];
|
||||
DXGI_FORMAT uint_format; /* for ClearUAVUint */
|
||||
}
|
||||
vkd3d_format_compatibility_info[] =
|
||||
dxgi_format_compatibility_list[] =
|
||||
{
|
||||
/* DXGI_FORMAT_R32G32B32A32_TYPELESS */
|
||||
{DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_TYPELESS},
|
||||
{DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32A32_TYPELESS},
|
||||
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_TYPELESS},
|
||||
/* DXGI_FORMAT_R32G32B32_TYPELESS */
|
||||
{DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_TYPELESS},
|
||||
{DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_R32G32B32_TYPELESS},
|
||||
{DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_TYPELESS},
|
||||
/* DXGI_FORMAT_R16G16B16A16_TYPELESS */
|
||||
{DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_TYPELESS},
|
||||
/* DXGI_FORMAT_R32G32_TYPELESS */
|
||||
{DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_TYPELESS},
|
||||
{DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G32_TYPELESS},
|
||||
{DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_TYPELESS},
|
||||
/* DXGI_FORMAT_R32G8X24_TYPELESS */
|
||||
{DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_R32G8X24_TYPELESS},
|
||||
{DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS},
|
||||
{DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS},
|
||||
/* DXGI_FORMAT_R10G10B10A2_TYPELESS */
|
||||
{DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R10G10B10A2_TYPELESS},
|
||||
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_TYPELESS},
|
||||
/* DXGI_FORMAT_R8G8B8A8_TYPELESS */
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_TYPELESS},
|
||||
{DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_TYPELESS},
|
||||
{DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_TYPELESS},
|
||||
{DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_TYPELESS},
|
||||
{DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_TYPELESS},
|
||||
/* DXGI_FORMAT_R16G16_TYPELESS */
|
||||
{DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_TYPELESS},
|
||||
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_TYPELESS},
|
||||
/* DXGI_FORMAT_R32_TYPELESS */
|
||||
{DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_TYPELESS},
|
||||
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_TYPELESS},
|
||||
{DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_TYPELESS},
|
||||
{DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32_TYPELESS},
|
||||
/* DXGI_FORMAT_R24G8_TYPELESS */
|
||||
{DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_R24G8_TYPELESS},
|
||||
{DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_R24G8_TYPELESS},
|
||||
{DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24G8_TYPELESS},
|
||||
/* DXGI_FORMAT_R8G8_TYPELESS */
|
||||
{DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_TYPELESS},
|
||||
{DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_TYPELESS},
|
||||
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_TYPELESS},
|
||||
{DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_TYPELESS},
|
||||
/* DXGI_FORMAT_R16_TYPELESS */
|
||||
{DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_TYPELESS},
|
||||
{DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_TYPELESS},
|
||||
{DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_TYPELESS},
|
||||
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_TYPELESS},
|
||||
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_TYPELESS},
|
||||
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_TYPELESS},
|
||||
/* DXGI_FORMAT_R8_TYPELESS */
|
||||
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_TYPELESS},
|
||||
{DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_TYPELESS},
|
||||
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_TYPELESS},
|
||||
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_TYPELESS},
|
||||
/* DXGI_FORMAT_BC1_TYPELESS */
|
||||
{DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_BC1_TYPELESS},
|
||||
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_TYPELESS},
|
||||
/* DXGI_FORMAT_BC2_TYPELESS */
|
||||
{DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_BC2_TYPELESS},
|
||||
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_TYPELESS},
|
||||
/* DXGI_FORMAT_BC3_TYPELESS */
|
||||
{DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_BC3_TYPELESS},
|
||||
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_TYPELESS},
|
||||
/* DXGI_FORMAT_BC4_TYPELESS */
|
||||
{DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_TYPELESS},
|
||||
{DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_BC4_TYPELESS},
|
||||
/* DXGI_FORMAT_BC5_TYPELESS */
|
||||
{DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_TYPELESS},
|
||||
{DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_BC5_TYPELESS},
|
||||
/* DXGI_FORMAT_BC6H_TYPELESS */
|
||||
{DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_TYPELESS},
|
||||
{DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC6H_TYPELESS},
|
||||
/* DXGI_FORMAT_BC7_TYPELESS */
|
||||
{DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_BC7_TYPELESS},
|
||||
{DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_TYPELESS},
|
||||
/* DXGI_FORMAT_B8G8R8A8_TYPELESS */
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_B8G8R8A8_TYPELESS},
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_TYPELESS},
|
||||
/* DXGI_FORMAT_B8G8R8X8_TYPELESS */
|
||||
{DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_B8G8R8X8_TYPELESS},
|
||||
{DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_B8G8R8X8_TYPELESS},
|
||||
{DXGI_FORMAT_R32G32B32A32_TYPELESS,
|
||||
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_SINT},
|
||||
DXGI_FORMAT_R32G32B32A32_UINT},
|
||||
{DXGI_FORMAT_R32G32B32A32_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R32G32B32A32_UINT},
|
||||
{DXGI_FORMAT_R32G32B32A32_UINT,
|
||||
{DXGI_FORMAT_R32G32B32A32_SINT},
|
||||
DXGI_FORMAT_R32G32B32A32_UINT},
|
||||
{DXGI_FORMAT_R32G32B32A32_SINT,
|
||||
{DXGI_FORMAT_R32G32B32A32_UINT},
|
||||
DXGI_FORMAT_R32G32B32A32_UINT},
|
||||
|
||||
{DXGI_FORMAT_R32G32B32_TYPELESS,
|
||||
{DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT},
|
||||
DXGI_FORMAT_R32G32B32_UINT},
|
||||
{DXGI_FORMAT_R32G32B32_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R32G32B32_UINT},
|
||||
{DXGI_FORMAT_R32G32B32_UINT,
|
||||
{DXGI_FORMAT_R32G32B32_SINT},
|
||||
DXGI_FORMAT_R32G32B32_UINT},
|
||||
{DXGI_FORMAT_R32G32B32_SINT,
|
||||
{DXGI_FORMAT_R32G32B32_UINT},
|
||||
DXGI_FORMAT_R32G32B32_UINT},
|
||||
|
||||
{DXGI_FORMAT_R16G16B16A16_TYPELESS,
|
||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT},
|
||||
DXGI_FORMAT_R16G16B16A16_UINT},
|
||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R16G16B16A16_UINT},
|
||||
{DXGI_FORMAT_R16G16B16A16_UINT,
|
||||
{DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_SNORM},
|
||||
DXGI_FORMAT_R16G16B16A16_UINT},
|
||||
{DXGI_FORMAT_R16G16B16A16_SINT,
|
||||
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_SNORM},
|
||||
DXGI_FORMAT_R16G16B16A16_UINT},
|
||||
{DXGI_FORMAT_R16G16B16A16_UNORM,
|
||||
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT},
|
||||
DXGI_FORMAT_R16G16B16A16_UINT},
|
||||
{DXGI_FORMAT_R16G16B16A16_SNORM,
|
||||
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT},
|
||||
DXGI_FORMAT_R16G16B16A16_UINT},
|
||||
|
||||
{DXGI_FORMAT_R32G32_TYPELESS,
|
||||
{DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_SINT},
|
||||
DXGI_FORMAT_R32G32_UINT},
|
||||
{DXGI_FORMAT_R32G32_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R32G32_UINT},
|
||||
{DXGI_FORMAT_R32G32_UINT,
|
||||
{DXGI_FORMAT_R32G32_SINT},
|
||||
DXGI_FORMAT_R32G32_UINT},
|
||||
{DXGI_FORMAT_R32G32_SINT,
|
||||
{DXGI_FORMAT_R32G32_UINT},
|
||||
DXGI_FORMAT_R32G32_UINT},
|
||||
|
||||
{DXGI_FORMAT_R10G10B10A2_TYPELESS,
|
||||
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT},
|
||||
DXGI_FORMAT_R10G10B10A2_UINT},
|
||||
{DXGI_FORMAT_R10G10B10A2_UINT,
|
||||
{DXGI_FORMAT_R10G10B10A2_UNORM},
|
||||
DXGI_FORMAT_R10G10B10A2_UINT},
|
||||
{DXGI_FORMAT_R10G10B10A2_UNORM,
|
||||
{DXGI_FORMAT_R10G10B10A2_UINT},
|
||||
DXGI_FORMAT_R10G10B10A2_UINT},
|
||||
|
||||
{DXGI_FORMAT_R11G11B10_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R32_UINT},
|
||||
|
||||
{DXGI_FORMAT_R8G8_TYPELESS,
|
||||
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM},
|
||||
DXGI_FORMAT_R8G8_UINT},
|
||||
{DXGI_FORMAT_R8G8_UINT,
|
||||
{DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM},
|
||||
DXGI_FORMAT_R8G8_UINT},
|
||||
{DXGI_FORMAT_R8G8_SINT,
|
||||
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM},
|
||||
DXGI_FORMAT_R8G8_UINT},
|
||||
{DXGI_FORMAT_R8G8_UNORM,
|
||||
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT},
|
||||
DXGI_FORMAT_R8G8_UINT},
|
||||
{DXGI_FORMAT_R8G8_SNORM,
|
||||
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT},
|
||||
DXGI_FORMAT_R8G8_UINT},
|
||||
|
||||
{DXGI_FORMAT_R8G8B8A8_TYPELESS,
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_SNORM},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT,
|
||||
{DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_SNORM},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_R8G8B8A8_SINT,
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_SNORM},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_R8G8B8A8_UNORM_SRGB,
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_R8G8B8A8_UNORM,
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_R8G8B8A8_SNORM,
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
|
||||
{DXGI_FORMAT_R16G16_TYPELESS,
|
||||
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_SNORM},
|
||||
DXGI_FORMAT_R16G16_UINT},
|
||||
{DXGI_FORMAT_R16G16_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R16G16_UINT},
|
||||
{DXGI_FORMAT_R16G16_UINT,
|
||||
{DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_SNORM},
|
||||
DXGI_FORMAT_R16G16_UINT},
|
||||
{DXGI_FORMAT_R16G16_SINT,
|
||||
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_SNORM},
|
||||
DXGI_FORMAT_R16G16_UINT},
|
||||
{DXGI_FORMAT_R16G16_UNORM,
|
||||
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SINT},
|
||||
DXGI_FORMAT_R16G16_UINT},
|
||||
{DXGI_FORMAT_R16G16_SNORM,
|
||||
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SINT},
|
||||
DXGI_FORMAT_R16G16_UINT},
|
||||
|
||||
{DXGI_FORMAT_R32_TYPELESS,
|
||||
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_SINT},
|
||||
DXGI_FORMAT_R32_UINT},
|
||||
{DXGI_FORMAT_R32_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R32_UINT},
|
||||
{DXGI_FORMAT_R32_UINT,
|
||||
{DXGI_FORMAT_R32_SINT},
|
||||
DXGI_FORMAT_R32_UINT},
|
||||
{DXGI_FORMAT_R32_SINT,
|
||||
{DXGI_FORMAT_R32_UINT},
|
||||
DXGI_FORMAT_R32_UINT},
|
||||
|
||||
{DXGI_FORMAT_R16_TYPELESS,
|
||||
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM},
|
||||
DXGI_FORMAT_R16_UINT},
|
||||
{DXGI_FORMAT_R16_FLOAT, {DXGI_FORMAT_UNKNOWN},
|
||||
DXGI_FORMAT_R16_UINT},
|
||||
{DXGI_FORMAT_R16_UINT,
|
||||
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM},
|
||||
DXGI_FORMAT_R16_UINT},
|
||||
{DXGI_FORMAT_R16_SINT,
|
||||
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM},
|
||||
DXGI_FORMAT_R16_UINT},
|
||||
{DXGI_FORMAT_R16_UNORM,
|
||||
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT},
|
||||
DXGI_FORMAT_R16_UINT},
|
||||
{DXGI_FORMAT_R16_SNORM,
|
||||
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT},
|
||||
DXGI_FORMAT_R16_UINT},
|
||||
|
||||
{DXGI_FORMAT_R8_TYPELESS,
|
||||
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_A8_UNORM},
|
||||
DXGI_FORMAT_R8_UINT},
|
||||
{DXGI_FORMAT_R8_UINT,
|
||||
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_A8_UNORM},
|
||||
DXGI_FORMAT_R8_UINT},
|
||||
{DXGI_FORMAT_R8_SINT,
|
||||
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_A8_UNORM},
|
||||
DXGI_FORMAT_R8_UINT},
|
||||
{DXGI_FORMAT_R8_UNORM,
|
||||
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_A8_UNORM},
|
||||
DXGI_FORMAT_R8_UINT},
|
||||
{DXGI_FORMAT_R8_SNORM,
|
||||
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT},
|
||||
DXGI_FORMAT_R8_UINT},
|
||||
{DXGI_FORMAT_A8_UNORM,
|
||||
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_UNORM},
|
||||
DXGI_FORMAT_R8_UINT},
|
||||
|
||||
{DXGI_FORMAT_B8G8R8A8_TYPELESS,
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM,
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM_SRGB},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM_SRGB,
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
|
||||
{DXGI_FORMAT_B8G8R8X8_TYPELESS,
|
||||
{DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_B8G8R8X8_UNORM,
|
||||
{DXGI_FORMAT_B8G8R8X8_UNORM_SRGB},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
{DXGI_FORMAT_B8G8R8X8_UNORM_SRGB,
|
||||
{DXGI_FORMAT_B8G8R8X8_UNORM},
|
||||
DXGI_FORMAT_R8G8B8A8_UINT},
|
||||
|
||||
{DXGI_FORMAT_BC1_TYPELESS,
|
||||
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC1_UNORM,
|
||||
{DXGI_FORMAT_BC1_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC1_UNORM_SRGB,
|
||||
{DXGI_FORMAT_BC1_UNORM}},
|
||||
|
||||
{DXGI_FORMAT_BC2_TYPELESS,
|
||||
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC2_UNORM,
|
||||
{DXGI_FORMAT_BC2_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC2_UNORM_SRGB,
|
||||
{DXGI_FORMAT_BC2_UNORM}},
|
||||
|
||||
{DXGI_FORMAT_BC3_TYPELESS,
|
||||
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC3_UNORM,
|
||||
{DXGI_FORMAT_BC3_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC3_UNORM_SRGB,
|
||||
{DXGI_FORMAT_BC3_UNORM}},
|
||||
|
||||
{DXGI_FORMAT_BC4_TYPELESS,
|
||||
{DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_SNORM}},
|
||||
{DXGI_FORMAT_BC5_TYPELESS,
|
||||
{DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_SNORM}},
|
||||
{DXGI_FORMAT_BC6H_TYPELESS,
|
||||
{DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16}},
|
||||
|
||||
{DXGI_FORMAT_BC7_TYPELESS,
|
||||
{DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC7_UNORM,
|
||||
{DXGI_FORMAT_BC7_UNORM_SRGB}},
|
||||
{DXGI_FORMAT_BC7_UNORM_SRGB,
|
||||
{DXGI_FORMAT_BC7_UNORM}},
|
||||
};
|
||||
|
||||
static bool dxgi_format_is_depth_stencil(DXGI_FORMAT dxgi_format)
|
||||
void vkd3d_format_compatibility_list_add_format(struct vkd3d_format_compatibility_list *list, VkFormat vk_format)
|
||||
{
|
||||
unsigned int i;
|
||||
bool found = false;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i)
|
||||
for (i = 0; i < list->format_count && !found; i++)
|
||||
found = list->vk_formats[i] == vk_format;
|
||||
|
||||
if (!found)
|
||||
{
|
||||
const struct vkd3d_format *current = &vkd3d_formats[i];
|
||||
|
||||
if (current->dxgi_format == dxgi_format)
|
||||
return current->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
assert(list->format_count < ARRAY_SIZE(list->vk_formats));
|
||||
list->vk_formats[list->format_count++] = vk_format;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i)
|
||||
{
|
||||
if (vkd3d_depth_stencil_formats[i].dxgi_format == dxgi_format)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* FIXME: This table should be generated at compile-time. */
|
||||
static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device)
|
||||
{
|
||||
struct vkd3d_format_compatibility_list *lists, *current_list;
|
||||
const struct vkd3d_format_compatibility_info *current;
|
||||
DXGI_FORMAT dxgi_format;
|
||||
VkFormat vk_format;
|
||||
struct vkd3d_format_compatibility_list *lists, *dst;
|
||||
const struct dxgi_format_compatibility_list *src;
|
||||
unsigned int count;
|
||||
unsigned int i, j;
|
||||
|
||||
|
@ -289,62 +414,25 @@ static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device
|
|||
if (!device->vk_info.KHR_image_format_list)
|
||||
return S_OK;
|
||||
|
||||
count = 1;
|
||||
dxgi_format = vkd3d_format_compatibility_info[0].typeless_format;
|
||||
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
|
||||
{
|
||||
DXGI_FORMAT typeless_format = vkd3d_format_compatibility_info[i].typeless_format;
|
||||
|
||||
if (dxgi_format != typeless_format)
|
||||
{
|
||||
++count;
|
||||
dxgi_format = typeless_format;
|
||||
}
|
||||
}
|
||||
count = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(dxgi_format_compatibility_list); ++i)
|
||||
count = max(count, dxgi_format_compatibility_list[i].image_format + 1);
|
||||
|
||||
if (!(lists = vkd3d_calloc(count, sizeof(*lists))))
|
||||
return E_OUTOFMEMORY;
|
||||
|
||||
count = 0;
|
||||
current_list = lists;
|
||||
current_list->typeless_format = vkd3d_format_compatibility_info[0].typeless_format;
|
||||
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
|
||||
for (i = 0; i < ARRAY_SIZE(dxgi_format_compatibility_list); ++i)
|
||||
{
|
||||
current = &vkd3d_format_compatibility_info[i];
|
||||
src = &dxgi_format_compatibility_list[i];
|
||||
dst = &lists[src->image_format];
|
||||
|
||||
if (current_list->typeless_format != current->typeless_format)
|
||||
{
|
||||
/* Avoid empty format lists. */
|
||||
if (current_list->format_count)
|
||||
{
|
||||
++current_list;
|
||||
++count;
|
||||
}
|
||||
dst->uint_format = src->uint_format;
|
||||
dst->vk_formats[dst->format_count++] = vkd3d_get_vk_format(src->image_format);
|
||||
|
||||
current_list->typeless_format = current->typeless_format;
|
||||
}
|
||||
|
||||
/* In Vulkan, each depth-stencil format is only compatible with itself. */
|
||||
if (dxgi_format_is_depth_stencil(current->format))
|
||||
continue;
|
||||
|
||||
if (!(vk_format = vkd3d_get_vk_format(current->format)))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < current_list->format_count; ++j)
|
||||
{
|
||||
if (current_list->vk_formats[j] == vk_format)
|
||||
break;
|
||||
}
|
||||
|
||||
if (j >= current_list->format_count)
|
||||
{
|
||||
assert(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT);
|
||||
current_list->vk_formats[current_list->format_count++] = vk_format;
|
||||
}
|
||||
for (j = 0; j < ARRAY_SIZE(src->view_formats) && src->view_formats[j]; j++)
|
||||
vkd3d_format_compatibility_list_add_format(dst, vkd3d_get_vk_format(src->view_formats[j]));
|
||||
}
|
||||
if (current_list->format_count)
|
||||
++count;
|
||||
|
||||
|
||||
device->format_compatibility_list_count = count;
|
||||
device->format_compatibility_lists = lists;
|
||||
|
@ -525,48 +613,6 @@ VkFormat vkd3d_internal_get_vk_format(const struct d3d12_device *device, DXGI_FO
|
|||
return VK_FORMAT_UNDEFINED;
|
||||
}
|
||||
|
||||
DXGI_FORMAT vkd3d_get_typeless_format(const struct d3d12_device *device, DXGI_FORMAT dxgi_format)
|
||||
{
|
||||
const struct vkd3d_format *format = vkd3d_get_format(device, dxgi_format, true);
|
||||
unsigned int i;
|
||||
|
||||
if (!format)
|
||||
return DXGI_FORMAT_UNKNOWN;
|
||||
|
||||
if (format->type == VKD3D_FORMAT_TYPE_TYPELESS)
|
||||
return dxgi_format;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
|
||||
{
|
||||
if (vkd3d_format_compatibility_info[i].format == dxgi_format)
|
||||
return vkd3d_format_compatibility_info[i].typeless_format;
|
||||
}
|
||||
|
||||
return DXGI_FORMAT_UNKNOWN;
|
||||
}
|
||||
|
||||
const struct vkd3d_format *vkd3d_find_uint_format(const struct d3d12_device *device, DXGI_FORMAT dxgi_format)
|
||||
{
|
||||
DXGI_FORMAT typeless_format = DXGI_FORMAT_UNKNOWN;
|
||||
const struct vkd3d_format *vkd3d_format;
|
||||
unsigned int i;
|
||||
|
||||
if (!(typeless_format = vkd3d_get_typeless_format(device, dxgi_format)))
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
|
||||
{
|
||||
if (vkd3d_format_compatibility_info[i].typeless_format != typeless_format)
|
||||
continue;
|
||||
|
||||
vkd3d_format = vkd3d_get_format(device, vkd3d_format_compatibility_info[i].format, false);
|
||||
if (vkd3d_format->type == VKD3D_FORMAT_TYPE_UINT)
|
||||
return vkd3d_format;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void vkd3d_format_copy_data(const struct vkd3d_format *format, const uint8_t *src,
|
||||
unsigned int src_row_pitch, unsigned int src_slice_pitch, uint8_t *dst, unsigned int dst_row_pitch,
|
||||
unsigned int dst_slice_pitch, unsigned int w, unsigned int h, unsigned int d)
|
||||
|
@ -626,6 +672,7 @@ bool is_valid_feature_level(D3D_FEATURE_LEVEL feature_level)
|
|||
{
|
||||
static const D3D_FEATURE_LEVEL valid_feature_levels[] =
|
||||
{
|
||||
D3D_FEATURE_LEVEL_12_2,
|
||||
D3D_FEATURE_LEVEL_12_1,
|
||||
D3D_FEATURE_LEVEL_12_0,
|
||||
D3D_FEATURE_LEVEL_11_1,
|
||||
|
@ -971,6 +1018,16 @@ HRESULT hresult_from_errno(int rc)
|
|||
|
||||
HRESULT hresult_from_vk_result(VkResult vr)
|
||||
{
|
||||
/* Wine tends to dispatch Vulkan calls to their own syscall stack.
|
||||
* Crashes are captured and return this magic VkResult.
|
||||
* Report it explicitly here so it's easier to debug when it happens. */
|
||||
if (vr == -1073741819)
|
||||
{
|
||||
ERR("Detected segfault in Wine syscall handler.\n");
|
||||
/* HACK: For ad-hoc debugging can also trigger backtrace printing here. */
|
||||
return E_POINTER;
|
||||
}
|
||||
|
||||
switch (vr)
|
||||
{
|
||||
case VK_SUCCESS:
|
||||
|
@ -980,6 +1037,9 @@ HRESULT hresult_from_vk_result(VkResult vr)
|
|||
/* fall-through */
|
||||
case VK_ERROR_OUT_OF_HOST_MEMORY:
|
||||
return E_OUTOFMEMORY;
|
||||
case VK_ERROR_VALIDATION_FAILED_EXT:
|
||||
/* NV driver sometimes returns this on invalid API usage. */
|
||||
return E_INVALIDARG;
|
||||
default:
|
||||
FIXME("Unhandled VkResult %d.\n", vr);
|
||||
/* fall-through */
|
||||
|
|
|
@ -52,7 +52,7 @@ void vkd3d_descriptor_debug_unregister_heap(uint64_t cookie);
|
|||
|
||||
void vkd3d_descriptor_debug_register_resource_cookie(
|
||||
struct vkd3d_descriptor_qa_global_info *global_info,
|
||||
uint64_t cookie, const D3D12_RESOURCE_DESC *desc);
|
||||
uint64_t cookie, const D3D12_RESOURCE_DESC1 *desc);
|
||||
void vkd3d_descriptor_debug_register_allocation_cookie(
|
||||
struct vkd3d_descriptor_qa_global_info *global_info,
|
||||
uint64_t cookie, const struct vkd3d_allocate_memory_info *info);
|
||||
|
|
|
@ -165,15 +165,28 @@ static CONST_VTBL struct ID3D12RootSignatureDeserializerVtbl d3d12_root_signatur
|
|||
|
||||
static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
enum vkd3d_root_signature_version target_version)
|
||||
enum vkd3d_root_signature_version target_version,
|
||||
bool raw_payload,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
struct vkd3d_versioned_root_signature_desc desc, converted_desc;
|
||||
int ret;
|
||||
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc)) < 0)
|
||||
if (raw_payload)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return ret;
|
||||
if ((ret = vkd3d_shader_parse_root_signature_raw(dxbc->code, dxbc->size, &desc, compatibility_hash)) < 0)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc, compatibility_hash)) < 0)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (desc.version == target_version)
|
||||
|
@ -197,15 +210,27 @@ static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code
|
|||
}
|
||||
|
||||
int vkd3d_parse_root_signature_v_1_0(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc)
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0);
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0, false,
|
||||
compatibility_hash);
|
||||
}
|
||||
|
||||
int vkd3d_parse_root_signature_v_1_1(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc)
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1);
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, false,
|
||||
compatibility_hash);
|
||||
}
|
||||
|
||||
int vkd3d_parse_root_signature_v_1_1_from_raw_payload(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, true,
|
||||
compatibility_hash);
|
||||
}
|
||||
|
||||
static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signature_deserializer *deserializer,
|
||||
|
@ -216,7 +241,7 @@ static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signatur
|
|||
deserializer->ID3D12RootSignatureDeserializer_iface.lpVtbl = &d3d12_root_signature_deserializer_vtbl;
|
||||
deserializer->refcount = 1;
|
||||
|
||||
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d)) < 0)
|
||||
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
|
||||
return hresult_from_vkd3d_result(ret);
|
||||
|
||||
return S_OK;
|
||||
|
@ -394,7 +419,7 @@ static HRESULT d3d12_versioned_root_signature_deserializer_init(struct d3d12_ver
|
|||
deserializer->ID3D12VersionedRootSignatureDeserializer_iface.lpVtbl = &d3d12_versioned_root_signature_deserializer_vtbl;
|
||||
deserializer->refcount = 1;
|
||||
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d)) < 0)
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return hresult_from_vkd3d_result(ret);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -26,6 +26,7 @@
|
|||
bool vkd3d_renderdoc_active(void);
|
||||
bool vkd3d_renderdoc_loaded_api(void);
|
||||
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash);
|
||||
bool vkd3d_renderdoc_global_capture_enabled(void);
|
||||
|
||||
bool vkd3d_renderdoc_begin_capture(void *instance);
|
||||
void vkd3d_renderdoc_end_capture(void *instance);
|
||||
|
|
|
@ -45,11 +45,14 @@ enum vkd3d_meta_copy_mode
|
|||
#include <cs_resolve_binary_queries.h>
|
||||
#include <cs_resolve_predicate.h>
|
||||
#include <cs_resolve_query.h>
|
||||
#include <cs_execute_indirect_patch.h>
|
||||
#include <cs_execute_indirect_patch_debug_ring.h>
|
||||
#include <vs_fullscreen_layer.h>
|
||||
#include <vs_fullscreen.h>
|
||||
#include <gs_fullscreen.h>
|
||||
#include <fs_copy_image_float.h>
|
||||
#include <fs_copy_image_uint.h>
|
||||
#include <fs_copy_image_stencil.h>
|
||||
#include <vs_swapchain_fullscreen.h>
|
||||
#include <fs_swapchain_fullscreen.h>
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ VK_INSTANCE_PFN(vkEnumeratePhysicalDevices)
|
|||
VK_INSTANCE_PFN(vkGetDeviceProcAddr)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceFeatures)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceFormatProperties)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceFormatProperties2)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceImageFormatProperties)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceMemoryProperties)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceProperties)
|
||||
|
@ -48,6 +49,7 @@ VK_INSTANCE_PFN(vkGetPhysicalDeviceQueueFamilyProperties)
|
|||
VK_INSTANCE_PFN(vkGetPhysicalDeviceSparseImageFormatProperties)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceFeatures2)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceProperties2)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceExternalSemaphoreProperties)
|
||||
|
||||
/* VK_EXT_debug_utils */
|
||||
VK_INSTANCE_EXT_PFN(vkCreateDebugUtilsMessengerEXT)
|
||||
|
@ -60,22 +62,14 @@ VK_DEVICE_PFN(vkAllocateCommandBuffers)
|
|||
VK_DEVICE_PFN(vkAllocateDescriptorSets)
|
||||
VK_DEVICE_PFN(vkAllocateMemory)
|
||||
VK_DEVICE_PFN(vkBeginCommandBuffer)
|
||||
VK_DEVICE_PFN(vkBindBufferMemory)
|
||||
VK_DEVICE_PFN(vkBindImageMemory)
|
||||
VK_DEVICE_PFN(vkCmdBeginQuery)
|
||||
VK_DEVICE_PFN(vkCmdBeginRenderPass)
|
||||
VK_DEVICE_PFN(vkCmdBindDescriptorSets)
|
||||
VK_DEVICE_PFN(vkCmdBindIndexBuffer)
|
||||
VK_DEVICE_PFN(vkCmdBindPipeline)
|
||||
VK_DEVICE_PFN(vkCmdBindVertexBuffers)
|
||||
VK_DEVICE_PFN(vkCmdBlitImage)
|
||||
VK_DEVICE_PFN(vkCmdClearAttachments)
|
||||
VK_DEVICE_PFN(vkCmdClearColorImage)
|
||||
VK_DEVICE_PFN(vkCmdClearDepthStencilImage)
|
||||
VK_DEVICE_PFN(vkCmdCopyBuffer)
|
||||
VK_DEVICE_PFN(vkCmdCopyBufferToImage)
|
||||
VK_DEVICE_PFN(vkCmdCopyImage)
|
||||
VK_DEVICE_PFN(vkCmdCopyImageToBuffer)
|
||||
VK_DEVICE_PFN(vkCmdCopyQueryPoolResults)
|
||||
VK_DEVICE_PFN(vkCmdDispatch)
|
||||
VK_DEVICE_PFN(vkCmdDispatchIndirect)
|
||||
|
@ -84,7 +78,6 @@ VK_DEVICE_PFN(vkCmdDrawIndexed)
|
|||
VK_DEVICE_PFN(vkCmdDrawIndexedIndirect)
|
||||
VK_DEVICE_PFN(vkCmdDrawIndirect)
|
||||
VK_DEVICE_PFN(vkCmdEndQuery)
|
||||
VK_DEVICE_PFN(vkCmdEndRenderPass)
|
||||
VK_DEVICE_PFN(vkCmdExecuteCommands)
|
||||
VK_DEVICE_PFN(vkCmdFillBuffer)
|
||||
VK_DEVICE_PFN(vkCmdNextSubpass)
|
||||
|
@ -92,7 +85,6 @@ VK_DEVICE_PFN(vkCmdPipelineBarrier)
|
|||
VK_DEVICE_PFN(vkCmdPushConstants)
|
||||
VK_DEVICE_PFN(vkCmdResetEvent)
|
||||
VK_DEVICE_PFN(vkCmdResetQueryPool)
|
||||
VK_DEVICE_PFN(vkCmdResolveImage)
|
||||
VK_DEVICE_PFN(vkCmdSetBlendConstants)
|
||||
VK_DEVICE_PFN(vkCmdSetDepthBias)
|
||||
VK_DEVICE_PFN(vkCmdSetDepthBounds)
|
||||
|
@ -121,7 +113,6 @@ VK_DEVICE_PFN(vkCreateImageView)
|
|||
VK_DEVICE_PFN(vkCreatePipelineCache)
|
||||
VK_DEVICE_PFN(vkCreatePipelineLayout)
|
||||
VK_DEVICE_PFN(vkCreateQueryPool)
|
||||
VK_DEVICE_PFN(vkCreateRenderPass)
|
||||
VK_DEVICE_PFN(vkCreateSampler)
|
||||
VK_DEVICE_PFN(vkCreateSemaphore)
|
||||
VK_DEVICE_PFN(vkCreateShaderModule)
|
||||
|
@ -139,7 +130,6 @@ VK_DEVICE_PFN(vkDestroyPipeline)
|
|||
VK_DEVICE_PFN(vkDestroyPipelineCache)
|
||||
VK_DEVICE_PFN(vkDestroyPipelineLayout)
|
||||
VK_DEVICE_PFN(vkDestroyQueryPool)
|
||||
VK_DEVICE_PFN(vkDestroyRenderPass)
|
||||
VK_DEVICE_PFN(vkDestroySampler)
|
||||
VK_DEVICE_PFN(vkDestroySemaphore)
|
||||
VK_DEVICE_PFN(vkDestroyShaderModule)
|
||||
|
@ -163,7 +153,6 @@ VK_DEVICE_PFN(vkGetImageSparseMemoryRequirements2)
|
|||
VK_DEVICE_PFN(vkGetImageSubresourceLayout)
|
||||
VK_DEVICE_PFN(vkGetPipelineCacheData)
|
||||
VK_DEVICE_PFN(vkGetQueryPoolResults)
|
||||
VK_DEVICE_PFN(vkGetRenderAreaGranularity)
|
||||
VK_DEVICE_PFN(vkInvalidateMappedMemoryRanges)
|
||||
VK_DEVICE_PFN(vkMapMemory)
|
||||
VK_DEVICE_PFN(vkMergePipelineCaches)
|
||||
|
@ -218,11 +207,34 @@ VK_DEVICE_EXT_PFN(vkCmdCopyAccelerationStructureKHR)
|
|||
VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceFragmentShadingRatesKHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetFragmentShadingRateKHR)
|
||||
|
||||
/* VK_KHR_create_renderpass2 */
|
||||
VK_DEVICE_EXT_PFN(vkCmdBeginRenderPass2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdEndRenderPass2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdNextSubpass2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCreateRenderPass2KHR)
|
||||
/* VK_KHR_bind_memory2 */
|
||||
VK_DEVICE_EXT_PFN(vkBindBufferMemory2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkBindImageMemory2KHR)
|
||||
|
||||
/* VK_KHR_copy_commands2 */
|
||||
VK_DEVICE_EXT_PFN(vkCmdBlitImage2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdCopyBuffer2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdCopyBufferToImage2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdCopyImage2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdCopyImageToBuffer2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdResolveImage2KHR)
|
||||
|
||||
/* VK_KHR_maintenance4 */
|
||||
VK_DEVICE_EXT_PFN(vkGetDeviceBufferMemoryRequirementsKHR)
|
||||
VK_DEVICE_EXT_PFN(vkGetDeviceImageMemoryRequirementsKHR)
|
||||
VK_DEVICE_EXT_PFN(vkGetDeviceImageSparseMemoryRequirementsKHR)
|
||||
|
||||
#ifdef VK_KHR_external_memory_win32
|
||||
/* VK_KHR_external_memory_win32 */
|
||||
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandleKHR)
|
||||
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandlePropertiesKHR)
|
||||
#endif
|
||||
|
||||
#ifdef VK_KHR_external_semaphore_win32
|
||||
/* VK_KHR_external_semaphore_win32 */
|
||||
VK_DEVICE_EXT_PFN(vkGetSemaphoreWin32HandleKHR)
|
||||
VK_DEVICE_EXT_PFN(vkImportSemaphoreWin32HandleKHR)
|
||||
#endif
|
||||
|
||||
/* VK_EXT_calibrated_timestamps */
|
||||
VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)
|
||||
|
@ -251,6 +263,9 @@ VK_DEVICE_EXT_PFN(vkCmdSetPrimitiveTopologyEXT)
|
|||
VK_DEVICE_EXT_PFN(vkCmdSetScissorWithCountEXT)
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetViewportWithCountEXT)
|
||||
|
||||
/* VK_EXT_extended_dynamic_state2 */
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetPrimitiveRestartEnableEXT)
|
||||
|
||||
/* VK_EXT_external_memory_host */
|
||||
VK_DEVICE_EXT_PFN(vkGetMemoryHostPointerPropertiesEXT)
|
||||
|
||||
|
@ -274,9 +289,20 @@ VK_DEVICE_EXT_PFN(vkGetSwapchainImagesKHR)
|
|||
VK_DEVICE_EXT_PFN(vkAcquireNextImageKHR)
|
||||
VK_DEVICE_EXT_PFN(vkQueuePresentKHR)
|
||||
|
||||
/* VK_KHR_dynamic_rendering */
|
||||
VK_DEVICE_EXT_PFN(vkCmdBeginRenderingKHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdEndRenderingKHR)
|
||||
|
||||
/* VK_KHR_ray_tracing_maintenance1 */
|
||||
VK_DEVICE_EXT_PFN(vkCmdTraceRaysIndirect2KHR)
|
||||
|
||||
/* VK_AMD_buffer_marker */
|
||||
VK_DEVICE_EXT_PFN(vkCmdWriteBufferMarkerAMD)
|
||||
|
||||
/* VK_NV_device_diagnostic_checkpoints */
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetCheckpointNV)
|
||||
VK_DEVICE_EXT_PFN(vkGetQueueCheckpointDataNV)
|
||||
|
||||
/* VK_NVX_binary_import */
|
||||
VK_DEVICE_EXT_PFN(vkCreateCuModuleNVX)
|
||||
VK_DEVICE_EXT_PFN(vkCreateCuFunctionNVX)
|
||||
|
@ -288,6 +314,16 @@ VK_DEVICE_EXT_PFN(vkCmdCuLaunchKernelNVX)
|
|||
VK_DEVICE_EXT_PFN(vkGetImageViewHandleNVX)
|
||||
VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX)
|
||||
|
||||
/* VK_VALVE_descriptor_set_host_mapping */
|
||||
VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutHostMappingInfoVALVE)
|
||||
VK_DEVICE_EXT_PFN(vkGetDescriptorSetHostMappingVALVE)
|
||||
|
||||
/* VK_NV_device_generated_commands */
|
||||
VK_DEVICE_EXT_PFN(vkCreateIndirectCommandsLayoutNV)
|
||||
VK_DEVICE_EXT_PFN(vkDestroyIndirectCommandsLayoutNV)
|
||||
VK_DEVICE_EXT_PFN(vkGetGeneratedCommandsMemoryRequirementsNV)
|
||||
VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV)
|
||||
|
||||
#undef VK_INSTANCE_PFN
|
||||
#undef VK_INSTANCE_EXT_PFN
|
||||
#undef VK_DEVICE_PFN
|
||||
|
|
11
meson.build
11
meson.build
|
@ -1,4 +1,4 @@
|
|||
project('vkd3d-proton', ['c'], version : '2.4', meson_version : '>= 0.49', default_options : [
|
||||
project('vkd3d-proton', ['c'], version : '2.6', meson_version : '>= 0.49', default_options : [
|
||||
'warning_level=2',
|
||||
])
|
||||
|
||||
|
@ -64,6 +64,11 @@ if not enable_trace
|
|||
add_project_arguments('-DVKD3D_NO_TRACE_MESSAGES', language : 'c')
|
||||
endif
|
||||
|
||||
enable_breadcrumbs = enable_trace
|
||||
if enable_breadcrumbs
|
||||
add_project_arguments('-DVKD3D_ENABLE_BREADCRUMBS', language : 'c')
|
||||
endif
|
||||
|
||||
vkd3d_external_includes = [ './subprojects/Vulkan-Headers/include', './subprojects/SPIRV-Headers/include' ]
|
||||
vkd3d_public_includes = [ './include' ] + vkd3d_external_includes
|
||||
vkd3d_private_includes = [ './include/private' ] + vkd3d_public_includes
|
||||
|
@ -78,8 +83,8 @@ idl_generator = generator(idl_compiler,
|
|||
arguments : [ '-h', '-o', '@OUTPUT@', '@INPUT@' ])
|
||||
|
||||
glsl_compiler = find_program('glslangValidator')
|
||||
glsl_args = [ '-V', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
|
||||
if run_command(glsl_compiler, [ '--quiet', '--version' ]).returncode() == 0
|
||||
glsl_args = [ '-V', '--target-env', 'vulkan1.1', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
|
||||
if run_command(glsl_compiler, [ '--quiet', '--version' ], check : false).returncode() == 0
|
||||
glsl_args += [ '--quiet' ]
|
||||
endif
|
||||
glsl_generator = generator(glsl_compiler,
|
||||
|
|
|
@ -75,17 +75,35 @@ def main():
|
|||
parser.add_argument('--per-iteration', action = 'store_true', help = 'Represent ticks in terms of ticks / iteration. Cannot be used with --divider.')
|
||||
parser.add_argument('--name', nargs = '+', type = str, help = 'Only display data for certain counters.')
|
||||
parser.add_argument('--sort', type = str, default = 'none', help = 'Sorts input data according to "iterations" or "ticks".')
|
||||
parser.add_argument('--delta', type = str, help = 'Subtract iterations and timing from other profile blob.')
|
||||
parser.add_argument('profile', help = 'The profile binary blob.')
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.profile:
|
||||
raise AssertionError('Need profile folder.')
|
||||
|
||||
delta_map = {}
|
||||
if args.delta is not None:
|
||||
with open(args.delta, 'rb') as f:
|
||||
for block in iter(lambda: f.read(64), b''):
|
||||
if is_valid_block(block):
|
||||
b = parse_block(block)
|
||||
delta_map[b.name] = b
|
||||
|
||||
blocks = []
|
||||
with open(args.profile, 'rb') as f:
|
||||
for block in iter(lambda: f.read(64), b''):
|
||||
if is_valid_block(block):
|
||||
blocks.append(parse_block(block))
|
||||
b = parse_block(block)
|
||||
if b.name in delta_map:
|
||||
d = delta_map[b.name]
|
||||
b = ProfileCase(ticks = b.ticks - d.ticks,
|
||||
iterations = b.iterations - d.iterations,
|
||||
name = b.name)
|
||||
if b.iterations < 0 or b.ticks < 0:
|
||||
raise AssertionError('After subtracting, iterations or ticks became negative.')
|
||||
if b.iterations > 0:
|
||||
blocks.append(b)
|
||||
|
||||
if args.divider is not None:
|
||||
if args.per_iteration:
|
||||
|
@ -114,11 +132,11 @@ def main():
|
|||
print(' Iterations:', block.iterations)
|
||||
|
||||
if args.divider is not None:
|
||||
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "us")
|
||||
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "Kcycles")
|
||||
elif args.per_iteration:
|
||||
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "us")
|
||||
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
|
||||
else:
|
||||
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "us")
|
||||
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit f9e1ffdcc1c123b79dd9f6002b418d9703d98904
|
||||
Subproject commit ae217c17809fadb232ec94b29304b4afcd417bb4
|
|
@ -1 +1 @@
|
|||
Subproject commit 07c4a37bcf41ea50aef6e98236abdfe8089fb4c6
|
||||
Subproject commit 245d25ce8c3337919dc7916d0e62e31a0d8748ab
|
|
@ -1 +1 @@
|
|||
Subproject commit f63e45cf6509278c82a4515ed94b0850a438af46
|
||||
Subproject commit 9f2fd6356c14376ab5b88518d6dd4e6787084525
|
|
@ -22,6 +22,441 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "d3d12_crosstest.h"
|
||||
|
||||
void test_bindless_heap_sm66(void)
|
||||
{
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
D3D12_FEATURE_DATA_SHADER_MODEL shader_model;
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
||||
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
|
||||
D3D12_ROOT_PARAMETER root_parameters[1];
|
||||
ID3D12DescriptorHeap *cpu_resource_heap;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle;
|
||||
unsigned int descriptor_size_sampler;
|
||||
ID3D12RootSignature *root_signature;
|
||||
ID3D12DescriptorHeap *resource_heap;
|
||||
ID3D12Resource *input_textures[256];
|
||||
ID3D12DescriptorHeap *sampler_heap;
|
||||
unsigned int i, descriptor_size;
|
||||
D3D12_SAMPLER_DESC sampler_desc;
|
||||
ID3D12DescriptorHeap *heaps[2];
|
||||
ID3D12Resource *output_buffer;
|
||||
ID3D12Resource *input_buffer;
|
||||
struct resource_readback rb;
|
||||
struct test_context context;
|
||||
ID3D12PipelineState *pso;
|
||||
ID3D12Device *device;
|
||||
FLOAT clear_values[4];
|
||||
unsigned int x, y;
|
||||
D3D12_RECT rect;
|
||||
HRESULT hr;
|
||||
|
||||
static const BYTE cs_code[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer heap_offsets : register(b0)
|
||||
{
|
||||
uint src_uav_tex_offset;
|
||||
uint src_srv_tex_offset;
|
||||
uint src_srv_buffer_offset;
|
||||
uint dst_uav_buffer_offset;
|
||||
};
|
||||
|
||||
struct Dummy { uint v; };
|
||||
|
||||
[numthreads(64, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
uint input_value = 0;
|
||||
|
||||
if (thr % 2 == 0)
|
||||
{
|
||||
Texture2D<float> src = ResourceDescriptorHeap[NonUniformResourceIndex(src_srv_tex_offset + thr)];
|
||||
SamplerState samp = SamplerDescriptorHeap[NonUniformResourceIndex(thr)];
|
||||
input_value += uint(src.SampleLevel(samp, float2(1.25, 1.25), 0.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
RWTexture2D<float> src = ResourceDescriptorHeap[NonUniformResourceIndex(src_uav_tex_offset + thr)];
|
||||
input_value += uint(src[int2(0, 0)]);
|
||||
}
|
||||
|
||||
// Test different descriptor types.
|
||||
if (thr % 16 == 0)
|
||||
{
|
||||
ConstantBuffer<Dummy> src = ResourceDescriptorHeap[NonUniformResourceIndex(src_srv_buffer_offset + thr)];
|
||||
input_value += src.v;
|
||||
}
|
||||
else if (thr % 4 == 0)
|
||||
{
|
||||
ByteAddressBuffer src = ResourceDescriptorHeap[NonUniformResourceIndex(src_srv_buffer_offset + thr)];
|
||||
input_value += src.Load(0);
|
||||
}
|
||||
else if (thr % 4 == 1)
|
||||
{
|
||||
Buffer<uint> src = ResourceDescriptorHeap[NonUniformResourceIndex(src_srv_buffer_offset + thr)];
|
||||
input_value += src[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
StructuredBuffer<uint> dst = ResourceDescriptorHeap[NonUniformResourceIndex(src_srv_buffer_offset + thr)];
|
||||
input_value += dst[0];
|
||||
}
|
||||
|
||||
// Test different descriptor types.
|
||||
if (thr % 4 == 0)
|
||||
{
|
||||
RWByteAddressBuffer dst = ResourceDescriptorHeap[NonUniformResourceIndex(dst_uav_buffer_offset + thr)];
|
||||
dst.Store(0, input_value);
|
||||
}
|
||||
else if (thr % 4 == 1)
|
||||
{
|
||||
RWBuffer<uint> dst = ResourceDescriptorHeap[NonUniformResourceIndex(dst_uav_buffer_offset + thr)];
|
||||
dst[0] = input_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
RWStructuredBuffer<uint> dst = ResourceDescriptorHeap[NonUniformResourceIndex(dst_uav_buffer_offset + thr)];
|
||||
dst[0] = input_value;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0x3a, 0x81, 0x6c, 0xe3, 0x19, 0x63, 0x28, 0xb7, 0x92, 0x22, 0xd1, 0x43, 0xf7, 0xac, 0x2d, 0x5a, 0x01, 0x00, 0x00, 0x00, 0x4c, 0x0a, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x60, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf1, 0x83, 0xbb, 0x3e,
|
||||
0xbd, 0xf9, 0x98, 0xb8, 0x80, 0xde, 0x7f, 0x96, 0x9b, 0x08, 0xdb, 0x6f, 0x44, 0x58, 0x49, 0x4c, 0x58, 0x09, 0x00, 0x00, 0x66, 0x00, 0x05, 0x00, 0x56, 0x02, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x06, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x4d, 0x02, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02,
|
||||
0x42, 0x92, 0x0b, 0x42, 0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90,
|
||||
0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
|
||||
0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04, 0x13, 0x23, 0xa4, 0x84,
|
||||
0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8c, 0x8c, 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0xa4, 0xc1, 0x08, 0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0xc3, 0x30, 0x10,
|
||||
0x31, 0x47, 0x80, 0x90, 0x71, 0xcf, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0x1f, 0x02, 0xcd, 0xb0, 0x10, 0x28, 0x38, 0xe6, 0x08, 0x82, 0x52, 0x20, 0xc3, 0x90, 0x24, 0xa4, 0x1c, 0x35, 0x5c, 0xfe,
|
||||
0x84, 0x3d, 0x84, 0xe4, 0x73, 0x1b, 0x55, 0xac, 0xc4, 0xe4, 0x23, 0xb7, 0x8d, 0x88, 0x61, 0x18, 0x86, 0x42, 0x30, 0x03, 0x32, 0x50, 0x33, 0x03, 0x70, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92,
|
||||
0xbf, 0x12, 0xd2, 0x4a, 0x4c, 0x7e, 0x71, 0xdb, 0xa8, 0x78, 0x9e, 0xe7, 0x19, 0x0a, 0x03, 0x0d, 0x08, 0xf2, 0x3c, 0xcf, 0x33, 0x0c, 0xc3, 0x43, 0x51, 0x51, 0xa0, 0x01, 0x19, 0x86, 0x61, 0x18,
|
||||
0x86, 0x81, 0xa6, 0x9b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x95, 0x90, 0x56, 0x62, 0xf2, 0x91, 0xdb, 0x46, 0xc5, 0x30, 0x0c, 0xc3, 0x50, 0x8e, 0x6a, 0x40, 0x86, 0xc1, 0x18, 0xc8, 0x2a,
|
||||
0x0b, 0x30, 0x20, 0xc3, 0x30, 0x0c, 0xc3, 0x60, 0x0c, 0x84, 0x15, 0x05, 0x18, 0x90, 0x61, 0x18, 0x86, 0x61, 0x30, 0x48, 0x2b, 0x45, 0x35, 0x20, 0xc3, 0x40, 0xdc, 0x6d, 0xc3, 0xe5, 0x4f, 0xd8,
|
||||
0x43, 0x48, 0xfe, 0x4a, 0x48, 0x0e, 0x15, 0x09, 0x44, 0x1a, 0x39, 0x0f, 0x11, 0x4d, 0x08, 0x21, 0x21, 0x61, 0x18, 0x0a, 0x81, 0x0c, 0x88, 0x47, 0xdf, 0x41, 0xc3, 0xe5, 0x4f, 0xd8, 0x43, 0x48,
|
||||
0xfe, 0x4a, 0x48, 0x1b, 0xd2, 0x0c, 0x88, 0x18, 0x86, 0xc1, 0x28, 0x05, 0x32, 0x84, 0x43, 0x42, 0xe2, 0x40, 0xc0, 0x31, 0x07, 0x01, 0x3c, 0xbf, 0x53, 0x14, 0x12, 0x31, 0x49, 0x88, 0x18, 0x86,
|
||||
0x61, 0xa0, 0x72, 0x8e, 0x00, 0x14, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50,
|
||||
0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
|
||||
0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07,
|
||||
0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0,
|
||||
0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x14, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x34,
|
||||
0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x89, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x33, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xc7, 0x02, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x06, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43,
|
||||
0x1e, 0x0d, 0x08, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x1c, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x3e, 0x20, 0x00, 0x02, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x88, 0x01, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x81, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
|
||||
0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0xa0, 0x18, 0x46, 0x00, 0x0a, 0xa4, 0x10, 0x0a, 0x30, 0x80, 0xce, 0x02, 0x04, 0x04, 0x04, 0xc4, 0x20, 0x73, 0x06,
|
||||
0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
|
||||
0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
|
||||
0x10, 0x04, 0x13, 0x84, 0x81, 0x99, 0x20, 0x0c, 0xcd, 0x06, 0x61, 0x20, 0x28, 0x8c, 0xcd, 0x6d, 0x18, 0x0c, 0x82, 0x98, 0x20, 0x0c, 0xce, 0x04, 0xc1, 0x0c, 0x26, 0x02, 0x13, 0x84, 0xe1, 0xd9,
|
||||
0x80, 0x20, 0x89, 0x82, 0x20, 0xc3, 0x02, 0x6c, 0x08, 0x98, 0x0d, 0x04, 0x00, 0x34, 0xc0, 0x04, 0x41, 0x00, 0x48, 0xb4, 0x85, 0xa5, 0xb9, 0x4d, 0x10, 0xce, 0x40, 0x9a, 0x20, 0x0c, 0xd0, 0x04,
|
||||
0x61, 0x88, 0x36, 0x0c, 0xd3, 0x30, 0x6c, 0x20, 0x90, 0x48, 0xa2, 0x36, 0x14, 0x0f, 0x04, 0x38, 0x55, 0x15, 0x36, 0x36, 0xbb, 0x36, 0x97, 0x34, 0xb2, 0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85,
|
||||
0x0c, 0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e, 0xed, 0xcd, 0x6d, 0x4a, 0x40, 0x34, 0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b, 0x12, 0x10, 0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2,
|
||||
0xc8, 0xca, 0xe4, 0x9a, 0xde, 0xc8, 0xca, 0xd8, 0xa6, 0x04, 0x46, 0x19, 0x32, 0x3c, 0x17, 0xb9, 0xb2, 0xb9, 0xb7, 0x3a, 0xb9, 0xb1, 0xb2, 0xb9, 0x29, 0x41, 0x53, 0x87, 0x0c, 0xcf, 0xa5, 0xcc,
|
||||
0x8d, 0x4e, 0x2e, 0x0f, 0xea, 0x2d, 0xcd, 0x8d, 0x6e, 0x6e, 0x4a, 0x50, 0x01, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66,
|
||||
0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e,
|
||||
0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b,
|
||||
0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0,
|
||||
0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83,
|
||||
0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76,
|
||||
0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30,
|
||||
0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43,
|
||||
0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4,
|
||||
0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90,
|
||||
0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0xc3, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0xc0, 0x1d, 0xde, 0xc1, 0x1d, 0xea, 0xc1, 0x1d, 0xd2, 0xc1, 0x1c, 0xde, 0x41, 0x1e, 0xda, 0x61, 0x06, 0x13, 0x91, 0x03,
|
||||
0x3e, 0xb8, 0x81, 0x38, 0xc8, 0x43, 0x39, 0x84, 0xc3, 0x3a, 0xb8, 0x81, 0x38, 0xc8, 0x03, 0x00, 0x71, 0x20, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0xa6, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x3e, 0xd0,
|
||||
0x34, 0xce, 0x04, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0x58, 0x02, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, 0x01, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0, 0x23, 0xb7, 0x6d, 0x08, 0xd5, 0x70, 0xf9, 0xce,
|
||||
0xe3, 0x07, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb7, 0x6d, 0x03, 0xdb, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x01, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0x50, 0x12, 0x06, 0x20, 0x60,
|
||||
0x3e, 0x72, 0xdb, 0xb6, 0xd0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0xc4, 0x17, 0x39, 0xcc, 0x86, 0x34, 0x03, 0xd2, 0x18, 0x26, 0xa0, 0x0d, 0x97, 0xef,
|
||||
0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0xc4, 0x17, 0x39, 0x4c, 0x48, 0x00, 0x8f, 0x19, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e,
|
||||
0x30, 0xf8, 0xc8, 0x6d, 0xdb, 0x01, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb7, 0x6d, 0x04, 0xd5, 0x70, 0xf9, 0xce, 0xe3, 0x4b,
|
||||
0x00, 0xf3, 0x2c, 0x44, 0x49, 0x54, 0xc4, 0xe2, 0x17, 0xb7, 0x6d, 0x05, 0xd5, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0xc4, 0x35, 0x51, 0x11, 0x51, 0x3a, 0xc0, 0xe0, 0x17, 0xb7, 0x6d, 0x01, 0xd2, 0x70,
|
||||
0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x1b, 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0xbb, 0x00, 0x00, 0x00, 0x13, 0x04, 0x50, 0x2c,
|
||||
0x10, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x34, 0xcc, 0x00, 0x14, 0xec, 0x40, 0xd1, 0x0e, 0x94, 0x5d, 0x69, 0x94, 0x24, 0x41, 0x59, 0x0a, 0x94, 0x22, 0x41, 0x61, 0x0a, 0x94, 0xec, 0x40,
|
||||
0xe9, 0x06, 0x14, 0x41, 0x71, 0x14, 0x5f, 0x11, 0x02, 0x15, 0x61, 0x40, 0x79, 0x94, 0x41, 0x59, 0x14, 0x45, 0x21, 0x06, 0x14, 0x46, 0x59, 0x02, 0x15, 0x25, 0x50, 0x29, 0x06, 0x14, 0x26, 0x10,
|
||||
0x19, 0x25, 0x50, 0x04, 0xe5, 0x41, 0x49, 0x0d, 0x8c, 0x00, 0xd0, 0x33, 0x46, 0x00, 0x82, 0x20, 0xa8, 0x7f, 0x33, 0x00, 0x23, 0x00, 0xe4, 0xcd, 0x21, 0x60, 0xcf, 0x1c, 0xc2, 0x97, 0xcd, 0x21,
|
||||
0x80, 0x81, 0x33, 0x87, 0x20, 0x06, 0xd9, 0x1c, 0x02, 0x06, 0xcd, 0x21, 0x98, 0x81, 0x33, 0x87, 0x70, 0x06, 0xdb, 0x1c, 0x42, 0x1a, 0x40, 0x73, 0x08, 0x6a, 0xe0, 0xcc, 0x21, 0xac, 0xc1, 0x36,
|
||||
0x87, 0xd0, 0x06, 0x10, 0x85, 0x73, 0x10, 0x8e, 0xe3, 0xbc, 0x01, 0x00, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x20, 0x06, 0x77, 0x50, 0x06, 0x01, 0x1d, 0x40, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
|
||||
0xf0, 0xe5, 0x81, 0x1b, 0x04, 0xcd, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x10, 0xa3, 0x60, 0x06, 0x76, 0x50, 0x41, 0x1e, 0xe8, 0x70, 0x43, 0x80, 0x07, 0x60, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08,
|
||||
0x06, 0x0d, 0x29, 0xc0, 0x01, 0x91, 0x07, 0xb3, 0x0c, 0x81, 0x20, 0x8c, 0x26, 0x04, 0x41, 0x05, 0x05, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x81, 0x82, 0x0a, 0x74, 0x10, 0x60, 0xd9, 0x88, 0xc1,
|
||||
0x01, 0x80, 0x20, 0x18, 0x7c, 0xa1, 0x60, 0x07, 0x01, 0x35, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x8a, 0x2a, 0xd8, 0x01, 0xb2, 0x6d, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0x8d, 0x02, 0x1e,
|
||||
0x04, 0xd5, 0x88, 0xc1, 0x03, 0x80, 0x20, 0x18, 0x44, 0xaa, 0xd0, 0x06, 0x43, 0xb0, 0x6d, 0x9a, 0x06, 0x0a, 0xa0, 0xa0, 0x07, 0xd9, 0x68, 0x42, 0x00, 0xcc, 0x12, 0x0c, 0xa3, 0x09, 0x09, 0x50,
|
||||
0x41, 0x03, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xa0, 0xc0, 0x02, 0x1f, 0x04, 0x60, 0x10, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xf0, 0xa5, 0x82, 0x1f, 0x04, 0xda, 0x88, 0x41, 0x03, 0x80,
|
||||
0x20, 0x18, 0x4c, 0xaf, 0x10, 0x07, 0x01, 0x28, 0x98, 0x82, 0x29, 0x80, 0x02, 0x28, 0x80, 0x02, 0x28, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0xc1, 0x30, 0x50, 0xf1, 0x38, 0x81, 0x20, 0x5c, 0x30, 0x4c,
|
||||
0x51, 0x74, 0xa0, 0xc3, 0x0d, 0xc1, 0x2a, 0x80, 0x41, 0x59, 0x75, 0xa0, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xd0, 0xe0, 0x02, 0x29, 0x60, 0xad, 0x30, 0x9a, 0x10, 0x08, 0x15, 0x64, 0x30, 0x62,
|
||||
0x80, 0x00, 0x20, 0x08, 0x06, 0x0a, 0x2f, 0xa0, 0x42, 0xc0, 0x06, 0x6d, 0x30, 0xcb, 0x40, 0x14, 0xc6, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xb5, 0xa0, 0x0a, 0x41, 0x19, 0x8c, 0x18, 0x1c,
|
||||
0x00, 0x08, 0x82, 0x41, 0xd3, 0x0b, 0xa9, 0x10, 0xc8, 0xc2, 0x68, 0x42, 0x00, 0xcc, 0x12, 0x28, 0xc3, 0x0d, 0x08, 0x2d, 0x80, 0xc1, 0x2c, 0x83, 0x71, 0x04, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60,
|
||||
0xf0, 0xe5, 0x82, 0x2b, 0x14, 0x68, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x96, 0x2f, 0xa8, 0x42, 0x60, 0x0b, 0xb0, 0x60, 0x07, 0xb4, 0x30, 0x9a, 0x10, 0x00, 0xb3, 0x04, 0xca, 0x70, 0x03,
|
||||
0xa3, 0x0b, 0x60, 0x30, 0xcb, 0x80, 0x24, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xbd, 0x20, 0x0b, 0xc9, 0x1a, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xf7, 0x0b, 0xa0, 0x10, 0xe8,
|
||||
0x02, 0x2d, 0x8c, 0x26, 0x04, 0xc0, 0x2c, 0x81, 0x32, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x1f, 0x38, 0xd4, 0x02, 0xd3, 0x06, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0x60, 0x95, 0x43, 0x2c, 0x04,
|
||||
0xbd, 0xd0, 0x0b, 0x7d, 0xb0, 0x0b, 0xa3, 0x09, 0x01, 0x30, 0x4b, 0xa0, 0x0c, 0x94, 0x0c, 0x7e, 0x61, 0x98, 0x41, 0x22, 0x1a, 0x88, 0x19, 0x10, 0x03, 0x25, 0x83, 0x64, 0x10, 0x89, 0x82, 0x68,
|
||||
0x44, 0x05, 0x18, 0x0c, 0x37, 0x0c, 0xe2, 0x00, 0x06, 0x23, 0x06, 0x07, 0x00, 0x82, 0x60, 0xd0, 0xb8, 0x83, 0x2e, 0xb8, 0xc1, 0x38, 0x8c, 0x26, 0x04, 0x43, 0x05, 0x6f, 0x00, 0x23, 0x06, 0x08,
|
||||
0x00, 0x82, 0x60, 0xa0, 0xc8, 0x83, 0x2f, 0x04, 0xa2, 0x30, 0x0a, 0xb3, 0x0c, 0x0b, 0x53, 0x8c, 0x18, 0x1c, 0x00, 0x08, 0x82, 0xc1, 0xb7, 0x0e, 0xe0, 0x10, 0xdc, 0xc1, 0x88, 0x81, 0x03, 0x80,
|
||||
0x20, 0x18, 0x60, 0xef, 0xa0, 0x0b, 0x01, 0x3a, 0x88, 0xc3, 0x21, 0x0e, 0xe2, 0x20, 0x0e, 0xa8, 0x60, 0x0e, 0xb3, 0x04, 0xcf, 0x70, 0x43, 0xa2, 0x0e, 0x60, 0x30, 0xcb, 0xd0, 0x38, 0xc1, 0x88,
|
||||
0xc1, 0x01, 0x80, 0x20, 0x18, 0x7c, 0xed, 0x20, 0x0e, 0x03, 0x1e, 0x8c, 0x18, 0x34, 0x00, 0x08, 0x82, 0x81, 0x06, 0x0f, 0xac, 0x10, 0xa8, 0x03, 0x39, 0x24, 0x49, 0x92, 0xa0, 0xc2, 0x2c, 0xc1,
|
||||
0x33, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x9f, 0x3b, 0x8c, 0x03, 0x81, 0x07, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0x80, 0xc9, 0x43, 0x2f, 0x04, 0xeb, 0xb0, 0x0e, 0x4a, 0x39, 0x94, 0x43, 0x39,
|
||||
0xac, 0x42, 0x3a, 0xcc, 0x12, 0x3c, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE cs_code_dxil = SHADER_BYTECODE(cs_code);
|
||||
uint32_t initial_buffer_data[ARRAY_SIZE(input_textures)];
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
device = context.device;
|
||||
command_list = context.list;
|
||||
descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
descriptor_size_sampler = ID3D12Device_GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
|
||||
|
||||
shader_model.HighestShaderModel = D3D_SHADER_MODEL_6_6;
|
||||
hr = ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_SHADER_MODEL,
|
||||
&shader_model, sizeof(shader_model));
|
||||
if (FAILED(hr) || shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_6)
|
||||
{
|
||||
skip("Shader model 6.6 not supported by device.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
|
||||
root_signature_desc.Flags =
|
||||
D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED |
|
||||
D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED;
|
||||
root_signature_desc.pParameters = root_parameters;
|
||||
root_signature_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
root_parameters[0].Constants.ShaderRegister = 0;
|
||||
root_parameters[0].Constants.RegisterSpace = 0;
|
||||
root_parameters[0].Constants.Num32BitValues = 4;
|
||||
|
||||
hr = create_root_signature(device, &root_signature_desc, &root_signature);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
|
||||
|
||||
pso = create_compute_pipeline_state(device, root_signature, cs_code_dxil);
|
||||
cpu_resource_heap = create_cpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
ARRAY_SIZE(input_textures) * root_parameters[0].Constants.Num32BitValues);
|
||||
resource_heap = create_gpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
ARRAY_SIZE(input_textures) * root_parameters[0].Constants.Num32BitValues);
|
||||
sampler_heap = create_gpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, ARRAY_SIZE(input_textures));
|
||||
|
||||
heaps[0] = resource_heap;
|
||||
heaps[1] = sampler_heap;
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(command_list, ARRAY_SIZE(heaps), heaps);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(input_textures); i++)
|
||||
{
|
||||
input_textures[i] = create_default_texture2d(device, 2, 2, 1, 1,
|
||||
DXGI_FORMAT_R32_FLOAT,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
memset(&uav_desc, 0, sizeof(uav_desc));
|
||||
uav_desc.Format = DXGI_FORMAT_R32_FLOAT;
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
||||
|
||||
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(resource_heap);
|
||||
cpu_handle.ptr += i * descriptor_size;
|
||||
ID3D12Device_CreateUnorderedAccessView(device, input_textures[i], NULL, &uav_desc, cpu_handle);
|
||||
|
||||
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu_resource_heap);
|
||||
cpu_handle.ptr += i * descriptor_size;
|
||||
ID3D12Device_CreateUnorderedAccessView(device, input_textures[i], NULL, &uav_desc, cpu_handle);
|
||||
|
||||
gpu_handle = ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(resource_heap);
|
||||
gpu_handle.ptr += i * descriptor_size;
|
||||
ID3D12Device_CreateUnorderedAccessView(device, input_textures[i], NULL, &uav_desc, cpu_handle);
|
||||
|
||||
memset(clear_values, 0, sizeof(clear_values));
|
||||
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
for (x = 0; x < 2; x++)
|
||||
{
|
||||
clear_values[0] = i + 1 + 1000 * (y * 2 + x);
|
||||
set_rect(&rect, x, y, x + 1, y + 1);
|
||||
ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(command_list, gpu_handle, cpu_handle,
|
||||
input_textures[i], clear_values, 1, &rect);
|
||||
}
|
||||
}
|
||||
|
||||
memset(&srv_desc, 0, sizeof(srv_desc));
|
||||
srv_desc.Format = DXGI_FORMAT_R32_FLOAT;
|
||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
||||
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
srv_desc.Texture2D.MipLevels = 1;
|
||||
|
||||
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(resource_heap);
|
||||
cpu_handle.ptr += (i + 1 * ARRAY_SIZE(input_textures)) * descriptor_size;
|
||||
ID3D12Device_CreateShaderResourceView(device, input_textures[i], &srv_desc, cpu_handle);
|
||||
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
transition_resource_state(command_list, input_textures[i],
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
||||
|
||||
memset(&sampler_desc, 0, sizeof(sampler_desc));
|
||||
sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
|
||||
sampler_desc.AddressU = i % 4 == 0 ? D3D12_TEXTURE_ADDRESS_MODE_CLAMP : D3D12_TEXTURE_ADDRESS_MODE_WRAP;
|
||||
sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
||||
sampler_desc.AddressW = sampler_desc.AddressU;
|
||||
|
||||
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(sampler_heap);
|
||||
cpu_handle.ptr += i * descriptor_size_sampler;
|
||||
ID3D12Device_CreateSampler(device, &sampler_desc, cpu_handle);
|
||||
}
|
||||
else
|
||||
uav_barrier(command_list, input_textures[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(input_textures); i++)
|
||||
initial_buffer_data[i] = i + 10000;
|
||||
input_buffer = create_upload_buffer(device, sizeof(initial_buffer_data), initial_buffer_data);
|
||||
|
||||
output_buffer = create_default_buffer(device, ARRAY_SIZE(input_textures) * sizeof(uint32_t),
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(input_textures); i++)
|
||||
{
|
||||
memset(&srv_desc, 0, sizeof(srv_desc));
|
||||
|
||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
||||
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
|
||||
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(resource_heap);
|
||||
cpu_handle.ptr += (i + 2 * ARRAY_SIZE(input_textures)) * descriptor_size;
|
||||
|
||||
if (i % 16 == 0)
|
||||
{
|
||||
memset(&cbv_desc, 0, sizeof(cbv_desc));
|
||||
cbv_desc.SizeInBytes = 256;
|
||||
cbv_desc.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(input_buffer) + (i % 32 == 0 ? 256 : 0);
|
||||
ID3D12Device_CreateConstantBufferView(device, &cbv_desc, cpu_handle);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i % 4 == 0)
|
||||
{
|
||||
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
srv_desc.Buffer.NumElements = 4;
|
||||
srv_desc.Buffer.FirstElement = i;
|
||||
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
||||
}
|
||||
else if (i % 4 == 1)
|
||||
{
|
||||
srv_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
srv_desc.Buffer.NumElements = 1;
|
||||
srv_desc.Buffer.FirstElement = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
srv_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srv_desc.Buffer.StructureByteStride = 4;
|
||||
srv_desc.Buffer.FirstElement = i;
|
||||
srv_desc.Buffer.NumElements = 1;
|
||||
}
|
||||
|
||||
ID3D12Device_CreateShaderResourceView(device, input_buffer, &srv_desc, cpu_handle);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(input_textures); i++)
|
||||
{
|
||||
memset(&uav_desc, 0, sizeof(uav_desc));
|
||||
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
if (i % 4 == 0)
|
||||
{
|
||||
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uav_desc.Buffer.NumElements = 4;
|
||||
uav_desc.Buffer.FirstElement = i;
|
||||
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||
}
|
||||
else if (i % 4 == 1)
|
||||
{
|
||||
uav_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
uav_desc.Buffer.NumElements = 1;
|
||||
uav_desc.Buffer.FirstElement = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
uav_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
uav_desc.Buffer.StructureByteStride = 4;
|
||||
uav_desc.Buffer.FirstElement = i;
|
||||
uav_desc.Buffer.NumElements = 1;
|
||||
}
|
||||
|
||||
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(resource_heap);
|
||||
cpu_handle.ptr += (i + 3 * ARRAY_SIZE(input_textures)) * descriptor_size;
|
||||
ID3D12Device_CreateUnorderedAccessView(device, output_buffer, NULL, &uav_desc, cpu_handle);
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, pso);
|
||||
for (i = 0; i < root_parameters[0].Constants.Num32BitValues; i++)
|
||||
ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(command_list, 0, i * ARRAY_SIZE(input_textures), i);
|
||||
ID3D12GraphicsCommandList_Dispatch(command_list, ARRAY_SIZE(input_textures) / 64, 1, 1);
|
||||
|
||||
transition_resource_state(command_list, output_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(output_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, command_list);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(input_textures); i++)
|
||||
{
|
||||
UINT value, expected;
|
||||
value = get_readback_uint(&rb, i, 0, 0);
|
||||
expected = 0;
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
expected += i + 1; /* SRV texture reads. */
|
||||
if (i % 4 == 0)
|
||||
expected += 3000; /* CLAMP sampler used, we'll sample pixel (1, 1). */
|
||||
else
|
||||
expected += 2000; /* WRAP, CLAMP used, sample pixel (0, 1). */
|
||||
}
|
||||
else
|
||||
{
|
||||
expected += i + 1; /* UAV texture reads. */
|
||||
}
|
||||
|
||||
if (i % 16 == 0)
|
||||
{
|
||||
/* CBV reads. */
|
||||
if (i % 32 == 0)
|
||||
expected += 64 + 10000;
|
||||
else
|
||||
expected += 10000;
|
||||
}
|
||||
else
|
||||
expected += i + 10000; /* Buffer reads. */
|
||||
ok(expected == value, "Value %u mismatch, expected %u, got %u.\n", i, expected, value);
|
||||
}
|
||||
|
||||
release_resource_readback(&rb);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(input_textures); i++)
|
||||
ID3D12Resource_Release(input_textures[i]);
|
||||
ID3D12Resource_Release(input_buffer);
|
||||
ID3D12Resource_Release(output_buffer);
|
||||
ID3D12DescriptorHeap_Release(cpu_resource_heap);
|
||||
ID3D12DescriptorHeap_Release(resource_heap);
|
||||
ID3D12DescriptorHeap_Release(sampler_heap);
|
||||
ID3D12RootSignature_Release(root_signature);
|
||||
ID3D12PipelineState_Release(pso);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
static void test_bindless_srv(bool use_dxil)
|
||||
{
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
|
|
|
@ -141,7 +141,7 @@ void test_clear_depth_stencil_view(void)
|
|||
void test_clear_render_target_view(void)
|
||||
{
|
||||
static const unsigned int array_expected_colors[] = {0xff00ff00, 0xff0000ff, 0xffff0000};
|
||||
static const struct vec4 array_colors[] =
|
||||
static const float array_colors[][4] =
|
||||
{
|
||||
{0.0f, 1.0f, 0.0f, 1.0f},
|
||||
{1.0f, 0.0f, 0.0f, 1.0f},
|
||||
|
@ -324,8 +324,7 @@ void test_clear_render_target_view(void)
|
|||
rtv_desc.Texture2DArray.ArraySize = 1;
|
||||
|
||||
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, &array_colors[i].x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
|
||||
}
|
||||
|
||||
transition_resource_state(command_list, resource,
|
||||
|
@ -355,8 +354,7 @@ void test_clear_render_target_view(void)
|
|||
rtv_desc.Texture2DMSArray.ArraySize = 1;
|
||||
|
||||
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, &array_colors[i].x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
|
||||
}
|
||||
|
||||
transition_resource_state(command_list, resource,
|
||||
|
@ -634,6 +632,7 @@ void test_clear_unordered_access_view_buffer(void)
|
|||
|
||||
void test_clear_unordered_access_view_image(void)
|
||||
{
|
||||
D3D12_FEATURE_DATA_FORMAT_SUPPORT format_support;
|
||||
unsigned int expected_colour, actual_colour;
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
ID3D12DescriptorHeap *cpu_heap, *gpu_heap;
|
||||
|
@ -703,6 +702,7 @@ void test_clear_unordered_access_view_image(void)
|
|||
{DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04030201},
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {0x123, 0, 0, 0}, 0x00000023},
|
||||
{DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04030201},
|
||||
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}}, {0, 0, 0, 0}, 0x00000000},
|
||||
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x00c01001},
|
||||
/* Test float clears with formats. */
|
||||
{DXGI_FORMAT_R16G16_UNORM, 1, 1, 0, 0, 1, 0, {{0}},
|
||||
|
@ -716,6 +716,9 @@ void test_clear_unordered_access_view_image(void)
|
|||
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}},
|
||||
{0x3f000000 /* 1.0f */, 0 /* 0.0f */, 0xbf800000 /* -1.0f */, 0x3f000000 /* 1.0f */},
|
||||
0x00000380, true},
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}},
|
||||
{0, 0, 0x3f000080 /* 0.5f + epsilon */, 0x3f800000 /* 1.0f */}, 0xff000080, true},
|
||||
{DXGI_FORMAT_B8G8R8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04010203},
|
||||
};
|
||||
|
||||
static const struct
|
||||
|
@ -755,6 +758,17 @@ void test_clear_unordered_access_view_image(void)
|
|||
if (tests[i].image_layers > 1 && !uav_dimensions[d].is_layered)
|
||||
continue;
|
||||
|
||||
memset(&format_support, 0, sizeof(format_support));
|
||||
format_support.Format = tests[i].format;
|
||||
|
||||
if (FAILED(hr = ID3D12Device_CheckFeatureSupport(device,
|
||||
D3D12_FEATURE_FORMAT_SUPPORT, &format_support, sizeof(format_support))) ||
|
||||
!(format_support.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW))
|
||||
{
|
||||
skip("Format %u not supported.\n", tests[i].format);
|
||||
continue;
|
||||
}
|
||||
|
||||
resource_desc.Dimension = uav_dimensions[d].resource_dim;
|
||||
resource_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
resource_desc.Width = IMAGE_SIZE;
|
||||
|
|
|
@ -1159,8 +1159,8 @@ void test_bundle_state_inheritance(void)
|
|||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
|
||||
|
||||
ID3D12CommandAllocator_Release(bundle_allocator);
|
||||
ID3D12GraphicsCommandList_Release(bundle);
|
||||
ID3D12CommandAllocator_Release(bundle_allocator);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
|
@ -1449,6 +1449,721 @@ void test_vbv_stride_edge_cases(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_execute_indirect_state(void)
|
||||
{
|
||||
static const struct vec4 values = { 1000.0f, 2000.0f, 3000.0f, 4000.0f };
|
||||
D3D12_INDIRECT_ARGUMENT_DESC indirect_argument_descs[2];
|
||||
D3D12_COMMAND_SIGNATURE_DESC command_signature_desc;
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12CommandSignature *command_signature;
|
||||
D3D12_SO_DECLARATION_ENTRY so_entries[1];
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_ROOT_PARAMETER root_parameters[4];
|
||||
ID3D12RootSignature *root_signatures[2];
|
||||
ID3D12Resource *argument_buffer_late;
|
||||
D3D12_STREAM_OUTPUT_BUFFER_VIEW sov;
|
||||
ID3D12Resource *streamout_buffer;
|
||||
D3D12_VERTEX_BUFFER_VIEW vbvs[2];
|
||||
ID3D12Resource *argument_buffer;
|
||||
struct test_context_desc desc;
|
||||
ID3D12PipelineState *psos[2];
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
D3D12_INDEX_BUFFER_VIEW ibv;
|
||||
ID3D12CommandQueue *queue;
|
||||
const UINT so_stride = 16;
|
||||
ID3D12Resource *vbo[3];
|
||||
ID3D12Resource *ibo[2];
|
||||
unsigned int i, j, k;
|
||||
ID3D12Resource *cbv;
|
||||
ID3D12Resource *srv;
|
||||
ID3D12Resource *uav;
|
||||
HRESULT hr;
|
||||
|
||||
static const D3D12_INPUT_ELEMENT_DESC layout_desc[] =
|
||||
{
|
||||
{"COLOR", 0, DXGI_FORMAT_R32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 1, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
struct test
|
||||
{
|
||||
const D3D12_INDIRECT_ARGUMENT_DESC *indirect_arguments;
|
||||
uint32_t indirect_argument_count;
|
||||
const void *argument_buffer_data;
|
||||
size_t argument_buffer_size;
|
||||
uint32_t api_max_count;
|
||||
const struct vec4 *expected_output;
|
||||
uint32_t expected_output_count;
|
||||
uint32_t stride;
|
||||
uint32_t pso_index;
|
||||
bool needs_root_sig;
|
||||
};
|
||||
|
||||
/* Modify root parameters. */
|
||||
struct root_constant_data
|
||||
{
|
||||
float constants[2];
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 2 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
|
||||
};
|
||||
|
||||
static const struct root_constant_data root_constant_data[] =
|
||||
{
|
||||
{
|
||||
.constants = { 100.0f, 500.0f },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.constants = { 200.0f, 800.0f },
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
|
||||
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 root_constant_expected[] =
|
||||
{
|
||||
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
|
||||
};
|
||||
|
||||
/* Modify root parameters, but very large root signature to test boundary conditions. */
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_spill_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 44 + 1, .Num32BitValuesToSet = 2 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
|
||||
};
|
||||
|
||||
static const struct root_constant_data root_constant_spill_data[] =
|
||||
{
|
||||
{
|
||||
.constants = { 100.0f, 500.0f },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.constants = { 200.0f, 800.0f },
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
|
||||
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 root_constant_spill_expected[] =
|
||||
{
|
||||
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
|
||||
};
|
||||
|
||||
/* Modify VBOs. */
|
||||
struct indirect_vbo_data
|
||||
{
|
||||
D3D12_VERTEX_BUFFER_VIEW view[2];
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_sig[3] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
/* Fill buffer locations later. */
|
||||
struct indirect_vbo_data indirect_vbo_data[] =
|
||||
{
|
||||
{
|
||||
.view = { { 0, 64, 8 }, { 0, 64, 16 } },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 2 }
|
||||
},
|
||||
{
|
||||
/* Test indirectly binding NULL descriptor and 0 stride. */
|
||||
.view = { { 0, 0, 0 }, { 0, 64, 0 } },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
}
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_vbo_expected[] =
|
||||
{
|
||||
{ 1064.0f, 2128.0f, 3000.0f, 4000.0f },
|
||||
{ 1066.0f, 2132.0f, 3000.0f, 4000.0f },
|
||||
{ 1064.0f, 2128.0f, 3000.0f, 4001.0f },
|
||||
{ 1066.0f, 2132.0f, 3000.0f, 4001.0f },
|
||||
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. We seem to get null descriptor instead. */
|
||||
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. */
|
||||
};
|
||||
|
||||
/* Modify just one VBO. */
|
||||
struct indirect_vbo_one_data
|
||||
{
|
||||
D3D12_VERTEX_BUFFER_VIEW view;
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_one_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
/* Fill buffer locations later. */
|
||||
struct indirect_vbo_one_data indirect_vbo_one_data[] =
|
||||
{
|
||||
{
|
||||
.view = { 0, 64, 8 },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
}
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_vbo_one_expected[] =
|
||||
{
|
||||
{ 1128.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1130.0f, 2065.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
};
|
||||
|
||||
/* Indirect IBO */
|
||||
struct indirect_ibo_data
|
||||
{
|
||||
D3D12_INDEX_BUFFER_VIEW view;
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_ibo_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
struct indirect_ibo_data indirect_ibo_data[] =
|
||||
{
|
||||
{
|
||||
.view = { 0, 0, DXGI_FORMAT_R32_UINT },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.view = { 0, 64, DXGI_FORMAT_R16_UINT },
|
||||
.indexed = { .IndexCountPerInstance = 4, .InstanceCount = 1 }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_ibo_expected[] =
|
||||
{
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1016.0f, 2080.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1017.0f, 2081.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
};
|
||||
|
||||
/* Indirect root arguments */
|
||||
struct indirect_root_descriptor_data
|
||||
{
|
||||
D3D12_GPU_VIRTUAL_ADDRESS cbv;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS srv;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS uav;
|
||||
D3D12_DRAW_ARGUMENTS array;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_root_descriptor_sig[4] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW, .ShaderResourceView = { .RootParameterIndex = 2 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW, .UnorderedAccessView = { .RootParameterIndex = 3 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
|
||||
};
|
||||
|
||||
struct indirect_root_descriptor_data indirect_root_descriptor_data[] =
|
||||
{
|
||||
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
|
||||
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_root_descriptor_expected[] =
|
||||
{
|
||||
{ 1000.0f, 2064.0f, 3000.0f + 64.0f, 4000.0f + 2.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f + 128.0f, 4000.0f + 3.0f },
|
||||
};
|
||||
|
||||
/* Test packing rules.
|
||||
* 64-bit aligned values are tightly packed with 32-bit alignment when they are in indirect command buffers. */
|
||||
struct indirect_alignment_data
|
||||
{
|
||||
float value;
|
||||
uint32_t cbv_va[2];
|
||||
D3D12_DRAW_ARGUMENTS arrays;
|
||||
};
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_alignment_sig[3] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
|
||||
};
|
||||
|
||||
struct indirect_alignment_data indirect_alignment_data[] =
|
||||
{
|
||||
{
|
||||
.value = 5.0f,
|
||||
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.value = 6.0f,
|
||||
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_alignment_expected[] =
|
||||
{
|
||||
{ 1000.0f, 69.0f, 3064.0f, 4000.0f },
|
||||
{ 1000.0f, 70.0f, 3128.0f, 4000.0f },
|
||||
};
|
||||
|
||||
#define DECL_TEST(t, pso_index, needs_root_sig) { t##_sig, ARRAY_SIZE(t##_sig), t##_data, sizeof(t##_data), ARRAY_SIZE(t##_data), \
|
||||
t##_expected, ARRAY_SIZE(t##_expected), sizeof(*(t##_data)), pso_index, needs_root_sig }
|
||||
const struct test tests[] =
|
||||
{
|
||||
DECL_TEST(root_constant, 0, true),
|
||||
DECL_TEST(indirect_vbo, 0, false),
|
||||
DECL_TEST(indirect_vbo_one, 0, false),
|
||||
DECL_TEST(indirect_ibo, 0, false),
|
||||
DECL_TEST(indirect_root_descriptor, 0, true),
|
||||
DECL_TEST(indirect_alignment, 0, true),
|
||||
DECL_TEST(root_constant_spill, 1, true),
|
||||
DECL_TEST(indirect_root_descriptor, 1, true),
|
||||
};
|
||||
#undef DECL_TEST
|
||||
|
||||
uint32_t ibo_data[ARRAY_SIZE(ibo)][64];
|
||||
float vbo_data[ARRAY_SIZE(vbo)][64];
|
||||
float generic_data[4096];
|
||||
|
||||
static const DWORD vs_code_small_cbv[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer RootCBV : register(b0)
|
||||
{
|
||||
float a;
|
||||
};
|
||||
|
||||
StructuredBuffer<float> RootSRV : register(t0);
|
||||
|
||||
cbuffer RootConstants : register(b0, space1)
|
||||
{
|
||||
float4 root;
|
||||
};
|
||||
|
||||
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
|
||||
{
|
||||
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x33b7b302, 0x34259b9b, 0x3e8568d9, 0x5a5e0c3e, 0x00000001, 0x00000268, 0x00000003,
|
||||
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
|
||||
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
|
||||
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
|
||||
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
|
||||
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x00000001, 0x00000001, 0x070000a2, 0x00307e46,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
|
||||
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
|
||||
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
|
||||
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
|
||||
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x0030800a, 0x00000001, 0x00000000, 0x00000000, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
|
||||
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x00000000, 0x0b000000, 0x00102042, 0x00000000,
|
||||
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x00000000,
|
||||
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
|
||||
0x00000000, 0x0100003e,
|
||||
};
|
||||
|
||||
static const DWORD vs_code_large_cbv[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer RootCBV : register(b0)
|
||||
{
|
||||
float a;
|
||||
};
|
||||
|
||||
StructuredBuffer<float> RootSRV : register(t0);
|
||||
|
||||
cbuffer RootConstants : register(b0, space1)
|
||||
{
|
||||
// Cannot use arrays for root constants in D3D12.
|
||||
float4 pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10;
|
||||
float4 root;
|
||||
};
|
||||
|
||||
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
|
||||
{
|
||||
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x99a057e8, 0x20344569, 0x434f8a7a, 0xf9171e08, 0x00000001, 0x00000268, 0x00000003,
|
||||
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
|
||||
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
|
||||
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
|
||||
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
|
||||
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x0000000c, 0x00000001, 0x070000a2, 0x00307e46,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
|
||||
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
|
||||
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
|
||||
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
|
||||
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x0030800a, 0x00000001, 0x00000000, 0x0000000b, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
|
||||
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x0000000b, 0x0b000000, 0x00102042, 0x00000000,
|
||||
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x0000000b,
|
||||
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
|
||||
0x0000000b, 0x0100003e,
|
||||
};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_root_signature = true;
|
||||
desc.no_pipeline = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(ibo); j++)
|
||||
for (i = 0; i < ARRAY_SIZE(ibo_data[j]); i++)
|
||||
ibo_data[j][i] = j * 16 + i;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(vbo); j++)
|
||||
for (i = 0; i < ARRAY_SIZE(vbo_data[j]); i++)
|
||||
vbo_data[j][i] = (float)(j * ARRAY_SIZE(vbo_data[j]) + i);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(generic_data); i++)
|
||||
generic_data[i] = (float)i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ibo); i++)
|
||||
ibo[i] = create_upload_buffer(context.device, sizeof(ibo_data[i]), ibo_data[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(vbo); i++)
|
||||
vbo[i] = create_upload_buffer(context.device, sizeof(vbo_data[i]), vbo_data[i]);
|
||||
cbv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
|
||||
srv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
|
||||
uav = create_default_buffer(context.device, sizeof(generic_data),
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
indirect_vbo_data[0].view[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
|
||||
indirect_vbo_data[0].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
|
||||
indirect_vbo_data[1].view[0].BufferLocation = 0;
|
||||
indirect_vbo_data[1].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]) + 64;
|
||||
|
||||
indirect_vbo_one_data[0].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
|
||||
indirect_vbo_one_data[1].view.BufferLocation = 0;
|
||||
|
||||
indirect_ibo_data[1].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[1]);
|
||||
|
||||
indirect_root_descriptor_data[0].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 256;
|
||||
indirect_root_descriptor_data[0].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 8;
|
||||
indirect_root_descriptor_data[0].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 4;
|
||||
indirect_root_descriptor_data[1].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 512;
|
||||
indirect_root_descriptor_data[1].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 12;
|
||||
indirect_root_descriptor_data[1].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 8;
|
||||
|
||||
memcpy(indirect_alignment_data[0].cbv_va, &indirect_root_descriptor_data[0].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
|
||||
memcpy(indirect_alignment_data[1].cbv_va, &indirect_root_descriptor_data[1].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
|
||||
|
||||
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
|
||||
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT |
|
||||
D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;
|
||||
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
root_signature_desc.pParameters = root_parameters;
|
||||
root_signature_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
root_parameters[0].Constants.RegisterSpace = 1;
|
||||
root_parameters[0].Constants.Num32BitValues = 4;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
root_parameters[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[0]);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
|
||||
root_parameters[0].Constants.Num32BitValues = 48;
|
||||
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[1]);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
|
||||
|
||||
memset(so_entries, 0, sizeof(so_entries));
|
||||
so_entries[0].ComponentCount = 4;
|
||||
so_entries[0].SemanticName = "SV_Position";
|
||||
|
||||
memset(&pso_desc, 0, sizeof(pso_desc));
|
||||
pso_desc.VS.pShaderBytecode = vs_code_small_cbv;
|
||||
pso_desc.VS.BytecodeLength = sizeof(vs_code_small_cbv);
|
||||
pso_desc.StreamOutput.NumStrides = 1;
|
||||
pso_desc.StreamOutput.pBufferStrides = &so_stride;
|
||||
pso_desc.StreamOutput.pSODeclaration = so_entries;
|
||||
pso_desc.StreamOutput.NumEntries = ARRAY_SIZE(so_entries);
|
||||
pso_desc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;
|
||||
pso_desc.pRootSignature = root_signatures[0];
|
||||
pso_desc.SampleDesc.Count = 1;
|
||||
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
|
||||
pso_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||
pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pso_desc.InputLayout.NumElements = ARRAY_SIZE(layout_desc);
|
||||
pso_desc.InputLayout.pInputElementDescs = layout_desc;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[0]);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
pso_desc.VS.pShaderBytecode = vs_code_large_cbv;
|
||||
pso_desc.VS.BytecodeLength = sizeof(vs_code_large_cbv);
|
||||
pso_desc.pRootSignature = root_signatures[1];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[1]);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
|
||||
/* Verify sanity checks.
|
||||
* As per validation layers, there must be exactly one command in the signature.
|
||||
* It must come last. Verify that we check for this. */
|
||||
memset(&command_signature_desc, 0, sizeof(command_signature_desc));
|
||||
command_signature_desc.NumArgumentDescs = 1;
|
||||
command_signature_desc.pArgumentDescs = indirect_argument_descs;
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_VERTEX_BUFFER_VIEW);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
command_signature_desc.NumArgumentDescs = 2;
|
||||
command_signature_desc.pArgumentDescs = indirect_argument_descs;
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_VERTEX_BUFFER_VIEW);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
struct vec4 expect_reset_state[2];
|
||||
const struct vec4 *expect, *v;
|
||||
uint32_t expected_output_size;
|
||||
uint32_t clear_vbo_mask;
|
||||
bool root_cbv;
|
||||
uint32_t size;
|
||||
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
command_signature_desc.ByteStride = tests[i].stride;
|
||||
command_signature_desc.pArgumentDescs = tests[i].indirect_arguments;
|
||||
command_signature_desc.NumArgumentDescs = tests[i].indirect_argument_count;
|
||||
command_signature_desc.NodeMask = 0;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc,
|
||||
tests[i].needs_root_sig ? root_signatures[tests[i].pso_index] : NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
|
||||
/* Updating root CBV requires push BDA path, which we don't enable on NV by default yet. */
|
||||
root_cbv = false;
|
||||
for (j = 0; j < tests[i].indirect_argument_count; j++)
|
||||
{
|
||||
if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW)
|
||||
{
|
||||
root_cbv = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
if (root_cbv && is_nvidia_device(context.device))
|
||||
skip("Creating indirect root CBV update failed. If the GPU is NVIDIA, try VKD3D_CONFIG=force_raw_va_cbv.\n");
|
||||
else
|
||||
skip("Failed creating command signature, skipping test.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
argument_buffer = create_upload_buffer(context.device, 256 * 1024, NULL);
|
||||
argument_buffer_late = create_default_buffer(context.device, 256 * 1024,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
|
||||
#define UNALIGNED_ARGUMENT_BUFFER_OFFSET (64 * 1024 + 4)
|
||||
#define UNALIGNED_COUNT_BUFFER_OFFSET (128 * 1024 + 4)
|
||||
#define ALIGNED_COUNT_BUFFER_OFFSET (128 * 1024 + 4 * 1024)
|
||||
{
|
||||
uint8_t *ptr;
|
||||
ID3D12Resource_Map(argument_buffer, 0, NULL, (void**)&ptr);
|
||||
memcpy(ptr, tests[i].argument_buffer_data, tests[i].argument_buffer_size);
|
||||
memcpy(ptr + UNALIGNED_ARGUMENT_BUFFER_OFFSET, tests[i].argument_buffer_data, tests[i].argument_buffer_size);
|
||||
memcpy(ptr + UNALIGNED_COUNT_BUFFER_OFFSET, &tests[i].api_max_count, sizeof(tests[i].api_max_count));
|
||||
memcpy(ptr + ALIGNED_COUNT_BUFFER_OFFSET, &tests[i].api_max_count, sizeof(tests[i].api_max_count));
|
||||
ID3D12Resource_Unmap(argument_buffer, 0, NULL);
|
||||
}
|
||||
|
||||
streamout_buffer = create_default_buffer(context.device, 64 * 1024,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, root_signatures[tests[i].pso_index]);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, psos[tests[i].pso_index]);
|
||||
sov.SizeInBytes = 64 * 1024 - sizeof(struct vec4);
|
||||
sov.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer) + sizeof(struct vec4);
|
||||
sov.BufferFilledSizeLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer);
|
||||
ID3D12GraphicsCommandList_SOSetTargets(command_list, 0, 1, &sov);
|
||||
|
||||
/* Set up default rendering state. */
|
||||
ibv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[0]);
|
||||
ibv.SizeInBytes = sizeof(ibo_data[0]);
|
||||
ibv.Format = DXGI_FORMAT_R32_UINT;
|
||||
vbvs[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]);
|
||||
vbvs[0].SizeInBytes = sizeof(vbo_data[0]);
|
||||
vbvs[0].StrideInBytes = 4;
|
||||
vbvs[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
|
||||
vbvs[1].SizeInBytes = sizeof(vbo_data[1]);
|
||||
vbvs[1].StrideInBytes = 4;
|
||||
|
||||
ID3D12GraphicsCommandList_IASetIndexBuffer(command_list, &ibv);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_POINTLIST);
|
||||
ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, 2, vbvs);
|
||||
|
||||
for (j = 0; j < (tests[i].pso_index ? 12 : 1); j++)
|
||||
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &values, 4 * j);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(cbv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
|
||||
ID3D12Resource_GetGPUVirtualAddress(srv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
|
||||
ID3D12Resource_GetGPUVirtualAddress(uav));
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, tests[i].api_max_count,
|
||||
argument_buffer, 0, NULL, 0);
|
||||
/* Test equivalent call with indirect count. */
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
|
||||
argument_buffer, UNALIGNED_ARGUMENT_BUFFER_OFFSET,
|
||||
argument_buffer, UNALIGNED_COUNT_BUFFER_OFFSET);
|
||||
/* Test equivalent, but now with late transition to INDIRECT. */
|
||||
ID3D12GraphicsCommandList_CopyResource(command_list, argument_buffer_late, argument_buffer);
|
||||
transition_resource_state(command_list, argument_buffer_late, D3D12_RESOURCE_STATE_COPY_DEST,
|
||||
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
|
||||
argument_buffer_late, 0, argument_buffer_late, ALIGNED_COUNT_BUFFER_OFFSET);
|
||||
|
||||
/* Root descriptors which are part of the state block are cleared to NULL. Recover them here
|
||||
* since attempting to draw next test will crash GPU. */
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(cbv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
|
||||
ID3D12Resource_GetGPUVirtualAddress(srv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
|
||||
ID3D12Resource_GetGPUVirtualAddress(uav));
|
||||
|
||||
/* Other state is cleared to 0. */
|
||||
|
||||
ID3D12GraphicsCommandList_DrawInstanced(command_list, 2, 1, 0, 0);
|
||||
transition_resource_state(command_list, streamout_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(streamout_buffer, DXGI_FORMAT_R32G32B32A32_FLOAT, &rb, queue, command_list);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
expected_output_size = (tests[i].expected_output_count * 3 + 2) * sizeof(struct vec4);
|
||||
size = get_readback_uint(&rb, 0, 0, 0);
|
||||
ok(size == expected_output_size, "Expected size %u, got %u.\n", expected_output_size, size);
|
||||
|
||||
for (j = 0; j < tests[i].expected_output_count; j++)
|
||||
{
|
||||
expect = &tests[i].expected_output[j];
|
||||
v = get_readback_vec4(&rb, j + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (direct count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
|
||||
v = get_readback_vec4(&rb, j + tests[i].expected_output_count + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (indirect count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
|
||||
v = get_readback_vec4(&rb, j + 2 * tests[i].expected_output_count + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (late latch) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
}
|
||||
|
||||
clear_vbo_mask = 0;
|
||||
expect_reset_state[0] = values;
|
||||
|
||||
/* Root constant state is cleared to zero if it's part of the signature. */
|
||||
for (j = 0; j < tests[i].indirect_argument_count; j++)
|
||||
{
|
||||
if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT)
|
||||
{
|
||||
for (k = 0; k < tests[i].indirect_arguments[j].Constant.Num32BitValuesToSet; k++)
|
||||
(&expect_reset_state[0].x)[(tests[i].indirect_arguments[j].Constant.DestOffsetIn32BitValues + k) % 4] = 0.0f;
|
||||
}
|
||||
else if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW)
|
||||
clear_vbo_mask |= 1u << tests[i].indirect_arguments[j].VertexBuffer.Slot;
|
||||
}
|
||||
|
||||
expect_reset_state[1] = expect_reset_state[0];
|
||||
|
||||
/* VBO/IBO state is cleared to zero if it's part of the signature.
|
||||
* A NULL IBO should be seen as a IBO which only reads 0 index. */
|
||||
if (!(clear_vbo_mask & (1u << 0)))
|
||||
expect_reset_state[1].x += 1.0f;
|
||||
|
||||
if (!(clear_vbo_mask & (1u << 1)))
|
||||
{
|
||||
expect_reset_state[0].y += 64.0f;
|
||||
expect_reset_state[1].y += 65.0f;
|
||||
}
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
v = get_readback_vec4(&rb, j + 1 + 3 * tests[i].expected_output_count, 0);
|
||||
expect = &expect_reset_state[j];
|
||||
ok(compare_vec4(v, expect, 0), "Post-reset element %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
}
|
||||
|
||||
ID3D12CommandSignature_Release(command_signature);
|
||||
ID3D12Resource_Release(argument_buffer);
|
||||
ID3D12Resource_Release(argument_buffer_late);
|
||||
ID3D12Resource_Release(streamout_buffer);
|
||||
release_resource_readback(&rb);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(psos); i++)
|
||||
ID3D12PipelineState_Release(psos[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(root_signatures); i++)
|
||||
ID3D12RootSignature_Release(root_signatures[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(vbo); i++)
|
||||
ID3D12Resource_Release(vbo[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(ibo); i++)
|
||||
ID3D12Resource_Release(ibo[i]);
|
||||
ID3D12Resource_Release(cbv);
|
||||
ID3D12Resource_Release(srv);
|
||||
ID3D12Resource_Release(uav);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_execute_indirect(void)
|
||||
{
|
||||
ID3D12Resource *argument_buffer, *count_buffer, *uav;
|
||||
|
@ -2860,9 +3575,9 @@ void test_conditional_rendering(void)
|
|||
|
||||
void test_write_buffer_immediate(void)
|
||||
{
|
||||
D3D12_WRITEBUFFERIMMEDIATE_PARAMETER parameters[2];
|
||||
D3D12_WRITEBUFFERIMMEDIATE_PARAMETER parameters[3];
|
||||
ID3D12GraphicsCommandList2 *command_list2;
|
||||
D3D12_WRITEBUFFERIMMEDIATE_MODE modes[2];
|
||||
D3D12_WRITEBUFFERIMMEDIATE_MODE modes[3];
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
struct resource_readback rb;
|
||||
struct test_context context;
|
||||
|
@ -2872,7 +3587,7 @@ void test_write_buffer_immediate(void)
|
|||
unsigned int value;
|
||||
HRESULT hr;
|
||||
|
||||
static const unsigned int data_values[] = {0xdeadbeef, 0xf00baa};
|
||||
static const unsigned int data_values[] = {0xdeadbeef, 0xf00baa, 0xdeadbeef, 0xf00baa};
|
||||
|
||||
if (!init_test_context(&context, NULL))
|
||||
return;
|
||||
|
@ -2897,6 +3612,8 @@ void test_write_buffer_immediate(void)
|
|||
parameters[0].Value = 0x1020304;
|
||||
parameters[1].Dest = parameters[0].Dest + sizeof(data_values[0]);
|
||||
parameters[1].Value = 0xc0d0e0f;
|
||||
parameters[2].Dest = parameters[0].Dest + sizeof(data_values[0]) * 3;
|
||||
parameters[2].Value = 0x5060708;
|
||||
ID3D12GraphicsCommandList2_WriteBufferImmediate(command_list2, ARRAY_SIZE(parameters), parameters, NULL);
|
||||
hr = ID3D12GraphicsCommandList_Close(command_list);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
@ -2909,13 +3626,19 @@ void test_write_buffer_immediate(void)
|
|||
ok(value == parameters[0].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[0].Value);
|
||||
value = get_readback_uint(&rb, 1, 0, 0);
|
||||
ok(value == parameters[1].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[1].Value);
|
||||
value = get_readback_uint(&rb, 2, 0, 0);
|
||||
ok(value == data_values[2], "Got unexpected value %#x, expected %#x.\n", value, data_values[2]);
|
||||
value = get_readback_uint(&rb, 3, 0, 0);
|
||||
ok(value == parameters[2].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[2].Value);
|
||||
release_resource_readback(&rb);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
parameters[0].Value = 0x2030405;
|
||||
parameters[1].Value = 0xb0c0d0e;
|
||||
modes[0] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_IN;
|
||||
modes[1] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_OUT;
|
||||
parameters[2].Value = 0x708090a;
|
||||
modes[0] = D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT;
|
||||
modes[1] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_IN;
|
||||
modes[2] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_OUT;
|
||||
ID3D12GraphicsCommandList2_WriteBufferImmediate(command_list2, ARRAY_SIZE(parameters), parameters, modes);
|
||||
hr = ID3D12GraphicsCommandList_Close(command_list);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
@ -2928,6 +3651,8 @@ void test_write_buffer_immediate(void)
|
|||
ok(value == parameters[0].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[0].Value);
|
||||
value = get_readback_uint(&rb, 1, 0, 0);
|
||||
ok(value == parameters[1].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[1].Value);
|
||||
value = get_readback_uint(&rb, 3, 0, 0);
|
||||
ok(value == parameters[2].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[2].Value);
|
||||
release_resource_readback(&rb);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
|
@ -3094,6 +3819,69 @@ void test_aliasing_barrier(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
static void test_discard_resource_uav_type(bool compute_queue)
|
||||
{
|
||||
static const float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav;
|
||||
struct test_context context;
|
||||
ID3D12DescriptorHeap *gpu;
|
||||
ID3D12DescriptorHeap *cpu;
|
||||
ID3D12Resource *resource;
|
||||
|
||||
if (compute_queue)
|
||||
{
|
||||
/* Creates a COMPUTE list instead of DIRECT. */
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!init_test_context(&context, NULL))
|
||||
return;
|
||||
}
|
||||
|
||||
/* In compute lists, we can discard UAV enabled resources,
|
||||
* and the resource must be in UAV state. */
|
||||
|
||||
resource = create_default_texture2d(context.device, 4, 4, 1, 1, DXGI_FORMAT_R32_FLOAT,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
gpu = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
|
||||
cpu = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
|
||||
|
||||
memset(&uav, 0, sizeof(uav));
|
||||
uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
||||
uav.Format = DXGI_FORMAT_R32_FLOAT;
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu));
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu));
|
||||
|
||||
ID3D12GraphicsCommandList_DiscardResource(context.list, resource, NULL);
|
||||
ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(context.list,
|
||||
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu),
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu),
|
||||
resource, white, 0, NULL);
|
||||
|
||||
transition_resource_state(context.list, resource,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_float(resource, 0, context.queue, context.list, 1.0f, 0);
|
||||
|
||||
ID3D12DescriptorHeap_Release(gpu);
|
||||
ID3D12DescriptorHeap_Release(cpu);
|
||||
ID3D12Resource_Release(resource);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_discard_resource_uav(void)
|
||||
{
|
||||
vkd3d_test_set_context("Test graphics");
|
||||
test_discard_resource_uav_type(false);
|
||||
vkd3d_test_set_context("Test compute");
|
||||
test_discard_resource_uav_type(true);
|
||||
}
|
||||
|
||||
void test_discard_resource(void)
|
||||
{
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
|
|
|
@ -26,8 +26,11 @@ void test_copy_texture(void)
|
|||
{
|
||||
D3D12_TEXTURE_COPY_LOCATION src_location, dst_location;
|
||||
ID3D12Resource *src_texture, *dst_texture;
|
||||
ID3D12PipelineState *pipeline_state_float;
|
||||
ID3D12PipelineState *pipeline_state_uint;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_SUBRESOURCE_DATA texture_data;
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv;
|
||||
struct depth_stencil_resource ds;
|
||||
struct test_context_desc desc;
|
||||
struct test_context context;
|
||||
|
@ -83,15 +86,40 @@ void test_copy_texture(void)
|
|||
};
|
||||
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
|
||||
|
||||
static const DWORD ps_code_uint[] =
|
||||
{
|
||||
#if 0
|
||||
Texture2D<uint> t;
|
||||
|
||||
float main(float4 position : SV_Position) : SV_Target
|
||||
{
|
||||
return float(t[int2(position.x, position.y)]);
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x9a3fe38f, 0x3c222734, 0x1abb807b, 0xeb4ccda3, 0x00000001, 0x0000014c, 0x00000003,
|
||||
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000030f, 0x505f5653, 0x7469736f, 0x006e6f69,
|
||||
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003,
|
||||
0x00000000, 0x00000e01, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x000000b0, 0x00000050,
|
||||
0x0000002c, 0x0100086a, 0x04001858, 0x00107000, 0x00000000, 0x00004444, 0x04002064, 0x00101032,
|
||||
0x00000000, 0x00000001, 0x03000065, 0x00102012, 0x00000000, 0x02000068, 0x00000001, 0x0500001b,
|
||||
0x00100032, 0x00000000, 0x00101046, 0x00000000, 0x08000036, 0x001000c2, 0x00000000, 0x00004002,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x8900002d, 0x800000c2, 0x00111103, 0x00100012,
|
||||
0x00000000, 0x00100e46, 0x00000000, 0x00107e46, 0x00000000, 0x05000056, 0x00102012, 0x00000000,
|
||||
0x0010000a, 0x00000000, 0x0100003e,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE ps_uint = {ps_code_uint, sizeof(ps_code_uint)};
|
||||
|
||||
struct depth_copy_test
|
||||
{
|
||||
float depth_value;
|
||||
UINT stencil_value;
|
||||
DXGI_FORMAT ds_format;
|
||||
DXGI_FORMAT ds_view_format;
|
||||
DXGI_FORMAT color_format;
|
||||
DXGI_FORMAT readback_format;
|
||||
bool stencil;
|
||||
bool roundtrip;
|
||||
bool requires_stencil_export;
|
||||
};
|
||||
static const struct depth_copy_test depth_copy_tests[] = {
|
||||
{ 0.0f, 0, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
|
@ -104,6 +132,16 @@ void test_copy_texture(void)
|
|||
{ 0.4f, 20, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ 0.4f, 20, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT, false, true },
|
||||
{ 1.0f, 21, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R8_UINT, true, false },
|
||||
|
||||
/* Single aspect copies between depth-stencil images. Should hit plain vkCmdCopyImage paths. */
|
||||
{ 0.4f, 40, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS, false, false },
|
||||
{ 0.7f, 41, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS, true, false },
|
||||
{ 0.2f, 42, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS, false, true },
|
||||
{ 0.5f, 43, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS, true, true },
|
||||
|
||||
/* Test color <-> stencil copies. */
|
||||
{ 1.0f, 44, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R8_UINT, true, false },
|
||||
{ 1.0f, 45, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R8_UINT, true, true, true },
|
||||
};
|
||||
|
||||
static const D3D12_RESOURCE_STATES resource_states[] =
|
||||
|
@ -174,7 +212,7 @@ void test_copy_texture(void)
|
|||
|
||||
for (i = 0; i < ARRAY_SIZE(resource_states); ++i)
|
||||
{
|
||||
src_texture = create_default_texture(device, 16, 16, DXGI_FORMAT_R8G8B8A8_UNORM,
|
||||
src_texture = create_default_texture(device, 4, 4, DXGI_FORMAT_R8G8B8A8_UNORM,
|
||||
0, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
texture_data.pData = bitmap_data;
|
||||
texture_data.RowPitch = 4 * sizeof(*bitmap_data);
|
||||
|
@ -224,8 +262,10 @@ void test_copy_texture(void)
|
|||
|
||||
context.root_signature = create_texture_root_signature(device,
|
||||
D3D12_SHADER_VISIBILITY_PIXEL, 0, 0);
|
||||
context.pipeline_state = create_pipeline_state(device,
|
||||
pipeline_state_float = create_pipeline_state(device,
|
||||
context.root_signature, context.render_target_desc.Format, NULL, &ps, NULL);
|
||||
pipeline_state_uint = create_pipeline_state(device,
|
||||
context.root_signature, context.render_target_desc.Format, NULL, &ps_uint, NULL);
|
||||
|
||||
heap = create_gpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
|
||||
|
||||
|
@ -248,9 +288,19 @@ void test_copy_texture(void)
|
|||
transition_sub_resource_state(command_list, ds.texture, depth_copy_tests[i].stencil ? 1 : 0,
|
||||
D3D12_RESOURCE_STATE_DEPTH_WRITE, resource_states[i % ARRAY_SIZE(resource_states)]);
|
||||
|
||||
dst_texture = create_default_texture(device, 32, 32, depth_copy_tests[i].color_format,
|
||||
dst_texture = create_default_texture(device, 32, 32, depth_copy_tests[i].readback_format,
|
||||
0, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
ID3D12Device_CreateShaderResourceView(device, dst_texture, NULL,
|
||||
|
||||
memset(&srv, 0, sizeof(srv));
|
||||
srv.Format = depth_copy_tests[i].readback_format;
|
||||
srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
||||
srv.Texture2D.MipLevels = 1;
|
||||
srv.Texture2D.PlaneSlice = depth_copy_tests[i].stencil &&
|
||||
depth_copy_tests[i].readback_format != DXGI_FORMAT_R8_UINT ? 1 : 0;
|
||||
if (srv.Format == DXGI_FORMAT_R32G8X24_TYPELESS)
|
||||
srv.Format = srv.Texture2D.PlaneSlice ? DXGI_FORMAT_X32_TYPELESS_G8X24_UINT : DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
|
||||
srv.Shader4ComponentMapping = srv.Texture2D.PlaneSlice ? D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(1, 1, 1, 1) : D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
ID3D12Device_CreateShaderResourceView(device, dst_texture, &srv,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap));
|
||||
|
||||
src_location.pResource = ds.texture;
|
||||
|
@ -258,30 +308,30 @@ void test_copy_texture(void)
|
|||
src_location.SubresourceIndex = depth_copy_tests[i].stencil ? 1 : 0;
|
||||
dst_location.pResource = dst_texture;
|
||||
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
||||
dst_location.SubresourceIndex = 0;
|
||||
dst_location.SubresourceIndex = srv.Texture2D.PlaneSlice;
|
||||
ID3D12GraphicsCommandList_CopyTextureRegion(command_list, &dst_location, 0, 0, 0,
|
||||
&src_location, NULL);
|
||||
|
||||
if (depth_copy_tests[i].roundtrip)
|
||||
{
|
||||
/* Test color to depth copy. */
|
||||
/* Test color to depth/stencil copy. */
|
||||
D3D12_TEXTURE_COPY_LOCATION tmp_src_location = dst_location;
|
||||
D3D12_TEXTURE_COPY_LOCATION tmp_dst_location = src_location;
|
||||
transition_sub_resource_state(command_list, dst_texture, 0,
|
||||
transition_sub_resource_state(command_list, dst_texture, srv.Texture2D.PlaneSlice,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
transition_sub_resource_state(command_list, ds.texture, 0,
|
||||
transition_sub_resource_state(command_list, ds.texture, depth_copy_tests[i].stencil ? 1 : 0,
|
||||
resource_states[i % ARRAY_SIZE(resource_states)], D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
ID3D12GraphicsCommandList_CopyTextureRegion(command_list, &tmp_dst_location, 0, 0, 0,
|
||||
&tmp_src_location, NULL);
|
||||
transition_sub_resource_state(command_list, dst_texture, 0,
|
||||
transition_sub_resource_state(command_list, dst_texture, srv.Texture2D.PlaneSlice,
|
||||
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
transition_sub_resource_state(command_list, ds.texture, 0,
|
||||
transition_sub_resource_state(command_list, ds.texture, depth_copy_tests[i].stencil ? 1 : 0,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
ID3D12GraphicsCommandList_CopyTextureRegion(command_list, &dst_location, 0, 0, 0,
|
||||
&src_location, NULL);
|
||||
}
|
||||
|
||||
transition_sub_resource_state(command_list, dst_texture, 0,
|
||||
transition_sub_resource_state(command_list, dst_texture, srv.Texture2D.PlaneSlice,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
@ -291,7 +341,7 @@ void test_copy_texture(void)
|
|||
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(command_list, 1, &heap);
|
||||
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, depth_copy_tests[i].stencil ? pipeline_state_uint : pipeline_state_float);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(command_list, 0,
|
||||
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap));
|
||||
|
@ -303,7 +353,9 @@ void test_copy_texture(void)
|
|||
|
||||
if (depth_copy_tests[i].stencil)
|
||||
{
|
||||
check_sub_resource_uint(context.render_target, 0, queue, command_list, depth_copy_tests[i].stencil_value, 0);
|
||||
/* Supported on AMD, but not NV. Need buffer copy roundtrip workaround for that to work. */
|
||||
todo_if(depth_copy_tests[i].requires_stencil_export)
|
||||
check_sub_resource_float(context.render_target, 0, queue, command_list, (float)depth_copy_tests[i].stencil_value, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -318,6 +370,8 @@ void test_copy_texture(void)
|
|||
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
}
|
||||
|
||||
ID3D12PipelineState_Release(pipeline_state_float);
|
||||
ID3D12PipelineState_Release(pipeline_state_uint);
|
||||
ID3D12DescriptorHeap_Release(heap);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
@ -500,9 +554,9 @@ void test_copy_texture_buffer(void)
|
|||
|
||||
void test_copy_buffer_to_depth_stencil(void)
|
||||
{
|
||||
ID3D12Resource *src_buffer_stencil = NULL;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
struct resource_readback rb_stencil;
|
||||
ID3D12Resource *src_buffer_stencil;
|
||||
struct resource_readback rb_depth;
|
||||
ID3D12Resource *src_buffer_depth;
|
||||
struct test_context_desc desc;
|
||||
|
@ -510,7 +564,7 @@ void test_copy_buffer_to_depth_stencil(void)
|
|||
ID3D12Resource *dst_texture;
|
||||
ID3D12CommandQueue *queue;
|
||||
ID3D12Device *device;
|
||||
unsigned int i;
|
||||
unsigned int i, x, y;
|
||||
|
||||
struct test
|
||||
{
|
||||
|
@ -554,31 +608,40 @@ void test_copy_buffer_to_depth_stencil(void)
|
|||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
uint32_t depth_data[(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT / 4) * 4];
|
||||
uint8_t stencil_data[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 4];
|
||||
D3D12_TEXTURE_COPY_LOCATION dst, src;
|
||||
uint8_t stencil_data;
|
||||
uint32_t depth_data;
|
||||
D3D12_BOX src_box;
|
||||
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
dst_texture = create_default_texture2d(device, 1, 1, 1, 1,
|
||||
dst_texture = create_default_texture2d(device, 2, 2, 1, 1,
|
||||
tests[i].format, tests[i].flags, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
|
||||
depth_data = tests[i].input_depth;
|
||||
src_buffer_depth = create_upload_buffer(device, sizeof(depth_data), &depth_data);
|
||||
memset(depth_data, 0, sizeof(depth_data));
|
||||
depth_data[0] = tests[i].input_depth;
|
||||
depth_data[1] = tests[i].input_depth;
|
||||
depth_data[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT / 4] = tests[i].input_depth;
|
||||
depth_data[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT / 4 + 1] = tests[i].input_depth;
|
||||
|
||||
src_buffer_depth = create_upload_buffer(device, sizeof(depth_data), depth_data);
|
||||
|
||||
if (tests[i].stencil)
|
||||
{
|
||||
stencil_data = 0xaa;
|
||||
src_buffer_stencil = create_upload_buffer(device, sizeof(stencil_data), &stencil_data);
|
||||
memset(stencil_data, 0, sizeof(stencil_data));
|
||||
stencil_data[0] = 0xaa;
|
||||
stencil_data[1] = 0xab;
|
||||
stencil_data[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT + 0] = 0xac;
|
||||
stencil_data[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT + 1] = 0xad;
|
||||
src_buffer_stencil = create_upload_buffer(device, sizeof(stencil_data), stencil_data);
|
||||
}
|
||||
|
||||
set_box(&src_box, 0, 0, 0, 1, 1, 1);
|
||||
set_box(&src_box, 0, 0, 0, 2, 2, 1);
|
||||
dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
||||
src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
||||
dst.pResource = dst_texture;
|
||||
src.PlacedFootprint.Offset = 0;
|
||||
src.PlacedFootprint.Footprint.Width = 1;
|
||||
src.PlacedFootprint.Footprint.Height = 1;
|
||||
src.PlacedFootprint.Footprint.Width = 2;
|
||||
src.PlacedFootprint.Footprint.Height = 2;
|
||||
src.PlacedFootprint.Footprint.Depth = 1;
|
||||
src.PlacedFootprint.Footprint.RowPitch = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
|
||||
|
||||
|
@ -605,13 +668,25 @@ void test_copy_buffer_to_depth_stencil(void)
|
|||
reset_command_list(command_list, context.allocator);
|
||||
}
|
||||
|
||||
depth_data = get_readback_uint(&rb_depth, 0, 0, 0);
|
||||
ok(depth_data == tests[i].output_depth_24 || depth_data == tests[i].input_depth, "Depth is 0x%x\n", depth_data);
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
for (x = 0; x < 2; x++)
|
||||
{
|
||||
uint32_t v = get_readback_uint(&rb_depth, x, y, 0);
|
||||
ok((v & 0xffffffu) == tests[i].output_depth_24 || v == tests[i].input_depth, "Depth is 0x%x\n", v);
|
||||
}
|
||||
}
|
||||
|
||||
if (tests[i].stencil)
|
||||
{
|
||||
stencil_data = get_readback_uint8(&rb_stencil, 0, 0);
|
||||
ok(stencil_data == 0xaa, "Stencil is 0x%x\n", stencil_data);
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
for (x = 0; x < 2; x++)
|
||||
{
|
||||
uint8_t v = get_readback_uint8(&rb_stencil, x, y);
|
||||
ok(v == 0xaa + y * 2 + x, "Stencil is 0x%x\n", v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
release_resource_readback(&rb_depth);
|
||||
|
@ -1112,7 +1187,7 @@ void test_multisample_resolve(void)
|
|||
src_rect.bottom = 4;
|
||||
ID3D12GraphicsCommandList1_ResolveSubresourceRegion(list1, ms_render_target, 0, 4, 0, ms_render_target, 0, &src_rect, DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_RESOLVE_MODE_DECOMPRESS);
|
||||
|
||||
/* DECOMPRESS to other resource MSAA <-> MSAA. vkCmdCopyImage path. */
|
||||
/* DECOMPRESS to other resource MSAA <-> MSAA. vkCmdCopyImage2KHR path. */
|
||||
ID3D12GraphicsCommandList1_ResolveSubresourceRegion(list1, ms_render_target_copy, 0, 0, 0, ms_render_target, 0, &src_rect, DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_RESOLVE_MODE_DECOMPRESS);
|
||||
transition_resource_state(context.list, ms_render_target_copy, D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
|
||||
|
||||
|
@ -1150,3 +1225,99 @@ void test_multisample_resolve(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_copy_buffer_overlap(void)
|
||||
{
|
||||
uint32_t reference_output[4][16 * 1024] = {{0}};
|
||||
ID3D12Resource *dst_buffer[4];
|
||||
uint32_t src_data[16 * 1024];
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12Resource *src_buffer;
|
||||
unsigned int i, j;
|
||||
|
||||
struct copy_command
|
||||
{
|
||||
unsigned int buf_index;
|
||||
unsigned int dst_index;
|
||||
unsigned int src_index;
|
||||
unsigned int count;
|
||||
};
|
||||
static const struct copy_command commands[] =
|
||||
{
|
||||
/* These should be able to run without any barriers. */
|
||||
{ 0, 0, 0, 8192 },
|
||||
{ 0, 8192, 8192, 8192 },
|
||||
{ 1, 0, 0, 8192 },
|
||||
{ 1, 8192, 8192, 8192 },
|
||||
{ 2, 0, 0, 8192 },
|
||||
{ 2, 8192, 8192, 8192 },
|
||||
{ 3, 0, 0, 8192 },
|
||||
{ 3, 8192, 8192, 8192 },
|
||||
/* Needs barrier. */
|
||||
{ 0, 1, 0, 8192 },
|
||||
/* Needs barrier. */
|
||||
{ 0, 8000, 4, 1 },
|
||||
{ 1, 1000, 5001, 3},
|
||||
/* Needs barrier. */
|
||||
{ 1, 1000, 5000, 8192 },
|
||||
{ 2, 0, 0, 8192 },
|
||||
/* Needs barrier. */
|
||||
{ 2, 1, 0, 8192 },
|
||||
/* Needs barrier. */
|
||||
{ 2, 2, 0, 8192 },
|
||||
/* Needs barrier. */
|
||||
{ 2, 3, 0, 8192 },
|
||||
};
|
||||
|
||||
/* Drivers are required to implicitly synchronize any overlapping copies to same destination.
|
||||
* There is no Transfer barrier after all, only UAV ...
|
||||
* For images we do this implicitly through image layout transitions on entry/exit,
|
||||
* but for buffers, we need to explicitly inject barriers as necessary.
|
||||
* Verify that reordering of copy commands does not happen. */
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(src_data); i++)
|
||||
src_data[i] = i;
|
||||
|
||||
src_buffer = create_upload_buffer(context.device, sizeof(src_data), src_data);
|
||||
for (i = 0; i < ARRAY_SIZE(dst_buffer); i++)
|
||||
{
|
||||
dst_buffer[i] = create_default_buffer(context.device, sizeof(src_data),
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(commands); i++)
|
||||
{
|
||||
ID3D12GraphicsCommandList_CopyBufferRegion(context.list,
|
||||
dst_buffer[commands[i].buf_index], commands[i].dst_index * sizeof(uint32_t),
|
||||
src_buffer, commands[i].src_index * sizeof(uint32_t),
|
||||
commands[i].count * sizeof(uint32_t));
|
||||
|
||||
for (j = 0; j < commands[i].count; j++)
|
||||
reference_output[commands[i].buf_index][commands[i].dst_index + j] = commands[i].src_index + j;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(dst_buffer); i++)
|
||||
{
|
||||
transition_resource_state(context.list, dst_buffer[i],
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(dst_buffer[i], DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(reference_output[i]); j++)
|
||||
{
|
||||
ok(get_readback_uint(&rb, j, 0, 0) == reference_output[i][j], "%u, %u: Expected %u, got %u.\n",
|
||||
i, j, reference_output[i][j], get_readback_uint(&rb, j, 0, 0));
|
||||
}
|
||||
|
||||
release_resource_readback(&rb);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(dst_buffer); i++)
|
||||
ID3D12Resource_Release(dst_buffer[i]);
|
||||
ID3D12Resource_Release(src_buffer);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
|
|
|
@ -1533,3 +1533,461 @@ void test_stencil_export_dxil(void)
|
|||
test_stencil_export(true);
|
||||
}
|
||||
|
||||
void test_depth_stencil_layout_tracking(void)
|
||||
{
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
struct depth_stencil_resource ds;
|
||||
struct test_context_desc desc;
|
||||
ID3D12PipelineState *psos[4];
|
||||
struct test_context context;
|
||||
D3D12_DISCARD_REGION region;
|
||||
ID3D12RootSignature *rs;
|
||||
unsigned int i, j;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD vs_code[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer C : register(b0)
|
||||
{
|
||||
float z;
|
||||
};
|
||||
|
||||
float4 main(uint vid : SV_VertexID) : SV_Position
|
||||
{
|
||||
if (vid == 0)
|
||||
return float4(-1.0, -1.0, z, 1.0);
|
||||
else if (vid == 1)
|
||||
return float4(-1.0, +3.0, z, 1.0);
|
||||
else
|
||||
return float4(+3.0, -1.0, z, 1.0);
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x31be9212, 0x8e44bbde, 0x8f0a87b5, 0xb8d5783b, 0x00000001, 0x000001dc, 0x00000003,
|
||||
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978,
|
||||
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003,
|
||||
0x00000000, 0x0000000f, 0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000140, 0x00010050,
|
||||
0x00000050, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x04000060, 0x00101012,
|
||||
0x00000000, 0x00000006, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001,
|
||||
0x0300001f, 0x0010100a, 0x00000000, 0x08000036, 0x001020b2, 0x00000000, 0x00004002, 0xbf800000,
|
||||
0xbf800000, 0x00000000, 0x3f800000, 0x06000036, 0x00102042, 0x00000000, 0x0020800a, 0x00000000,
|
||||
0x00000000, 0x0100003e, 0x01000012, 0x07000020, 0x00100012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x00004001, 0x00000001, 0x0304001f, 0x0010000a, 0x00000000, 0x08000036, 0x001020b2, 0x00000000,
|
||||
0x00004002, 0xbf800000, 0x40400000, 0x00000000, 0x3f800000, 0x06000036, 0x00102042, 0x00000000,
|
||||
0x0020800a, 0x00000000, 0x00000000, 0x0100003e, 0x01000012, 0x08000036, 0x001020b2, 0x00000000,
|
||||
0x00004002, 0x40400000, 0xbf800000, 0x00000000, 0x3f800000, 0x06000036, 0x00102042, 0x00000000,
|
||||
0x0020800a, 0x00000000, 0x00000000, 0x0100003e, 0x01000015, 0x01000015, 0x0100003e,
|
||||
};
|
||||
|
||||
static const DWORD ps_code[] =
|
||||
{
|
||||
#if 0
|
||||
void main() {}
|
||||
#endif
|
||||
0x43425844, 0x499d4ed5, 0xbbe2842c, 0x179313ee, 0xde5cd5d9, 0x00000001, 0x00000064, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000010, 0x00000050, 0x00000004, 0x0100086a,
|
||||
0x0100003e,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE vs = SHADER_BYTECODE(vs_code);
|
||||
static const D3D12_SHADER_BYTECODE ps = SHADER_BYTECODE(ps_code);
|
||||
|
||||
enum draw_type
|
||||
{
|
||||
DRAW_TYPE_DRAW,
|
||||
DRAW_TYPE_TRANSITION,
|
||||
DRAW_TYPE_CLEAR,
|
||||
DRAW_TYPE_DISCARD,
|
||||
};
|
||||
|
||||
struct draw
|
||||
{
|
||||
bool depth_write;
|
||||
bool stencil_write;
|
||||
enum draw_type type;
|
||||
D3D12_RECT rect;
|
||||
float z;
|
||||
uint8_t stencil;
|
||||
};
|
||||
|
||||
static const struct draw test_full_promotion[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_promotion_no_read[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_promotion[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
/* Expect transition to WRITE/READ */
|
||||
{ true, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
/* Expect transition to WRITE/WRITE */
|
||||
{ false, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_implicit_transition[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_explicit_transition[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_TRANSITION },
|
||||
{ true, true, DRAW_TYPE_TRANSITION },
|
||||
/* We should already know the attachment is optimal. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_transition_depth[] =
|
||||
{
|
||||
{ false, true, DRAW_TYPE_TRANSITION },
|
||||
/* Mark depth as optimal. */
|
||||
{ true, true, DRAW_TYPE_TRANSITION },
|
||||
/* Promote stencil state here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_transition_stencil[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_TRANSITION },
|
||||
/* Mark stencil as optimal. */
|
||||
{ true, true, DRAW_TYPE_TRANSITION },
|
||||
/* Promote depth state here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_clear_transition[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* We should already know the attachment is optimal. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_discard_transition[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DISCARD },
|
||||
/* We should already know the attachment is optimal. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_clear_depth[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* Promote stencil here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_clear_stencil[] =
|
||||
{
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* Promote depth here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_discard_depth[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_DISCARD },
|
||||
/* Promote stencil here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_discard_stencil[] =
|
||||
{
|
||||
{ false, true, DRAW_TYPE_DISCARD },
|
||||
/* Promote depth here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_decay[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
/* This should decay the resource back to READ_ONLY. */
|
||||
{ false, false, DRAW_TYPE_TRANSITION },
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_decay_depth[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 128 },
|
||||
{ false, true, DRAW_TYPE_TRANSITION },
|
||||
{ false, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_decay_stencil[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 0 },
|
||||
{ true, false, DRAW_TYPE_TRANSITION },
|
||||
{ true, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_no_render_pass[] =
|
||||
{
|
||||
/* Both of these will be emitted as separate clear passes, but no UNDEFINED transition. */
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_separate_no_render_pass[] =
|
||||
{
|
||||
/* Same as above, but separate layouts. */
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_after_discard[] =
|
||||
{
|
||||
/* Both of these will be emitted as separate clear passes, but no UNDEFINED transition. */
|
||||
{ true, true, DRAW_TYPE_DISCARD },
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_separate_after_discard[] =
|
||||
{
|
||||
/* Same as above, but separate layouts. */
|
||||
{ true, false, DRAW_TYPE_DISCARD },
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_DISCARD },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_clear_in_render_pass[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* No need to split render pass here and promote layout. */
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_clear_in_render_pass_promote[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* Need to split render pass here and promote layout. */
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_clear_in_render_pass_promote[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 128 },
|
||||
/* Need to split render pass here and promote layout. */
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
struct test
|
||||
{
|
||||
const struct draw *draws;
|
||||
unsigned int draw_count;
|
||||
};
|
||||
|
||||
/* It's also useful to test this with validation layers on, since this is mostly a test to see if we handle
|
||||
* the layout transitions correctly. */
|
||||
static const struct test tests[] =
|
||||
{
|
||||
{ test_full_promotion, ARRAY_SIZE(test_full_promotion) },
|
||||
{ test_full_promotion_no_read, ARRAY_SIZE(test_full_promotion_no_read) },
|
||||
{ test_partial_promotion, ARRAY_SIZE(test_partial_promotion) },
|
||||
{ test_full_implicit_transition, ARRAY_SIZE(test_full_implicit_transition) },
|
||||
{ test_full_explicit_transition, ARRAY_SIZE(test_full_explicit_transition) },
|
||||
{ test_full_clear_transition, ARRAY_SIZE(test_full_clear_transition) },
|
||||
{ test_full_discard_transition, ARRAY_SIZE(test_full_discard_transition) },
|
||||
{ test_partial_transition_depth, ARRAY_SIZE(test_partial_transition_depth) },
|
||||
{ test_partial_transition_stencil, ARRAY_SIZE(test_partial_transition_stencil) },
|
||||
{ test_partial_clear_depth, ARRAY_SIZE(test_partial_clear_depth) },
|
||||
{ test_partial_clear_stencil, ARRAY_SIZE(test_partial_clear_stencil) },
|
||||
{ test_partial_discard_depth, ARRAY_SIZE(test_partial_discard_depth) },
|
||||
{ test_partial_discard_stencil, ARRAY_SIZE(test_partial_discard_stencil) },
|
||||
{ test_decay, ARRAY_SIZE(test_decay) },
|
||||
{ test_decay_depth, ARRAY_SIZE(test_decay_depth) },
|
||||
{ test_decay_stencil, ARRAY_SIZE(test_decay_stencil) },
|
||||
{ test_sub_clear_no_render_pass, ARRAY_SIZE(test_sub_clear_no_render_pass) },
|
||||
{ test_sub_clear_separate_no_render_pass, ARRAY_SIZE(test_sub_clear_separate_no_render_pass) },
|
||||
{ test_sub_clear_after_discard, ARRAY_SIZE(test_sub_clear_after_discard) },
|
||||
{ test_sub_clear_separate_after_discard, ARRAY_SIZE(test_sub_clear_separate_after_discard) },
|
||||
{ test_clear_in_render_pass, ARRAY_SIZE(test_clear_in_render_pass) },
|
||||
{ test_clear_in_render_pass_promote, ARRAY_SIZE(test_clear_in_render_pass_promote) },
|
||||
{ test_partial_clear_in_render_pass_promote, ARRAY_SIZE(test_partial_clear_in_render_pass_promote) },
|
||||
};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_render_target = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
init_depth_stencil(&ds, context.device, 1024, 1024, 1, 1,
|
||||
DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, NULL);
|
||||
rs = create_32bit_constants_root_signature(context.device, 0, 1, D3D12_SHADER_VISIBILITY_VERTEX);
|
||||
|
||||
init_pipeline_state_desc(&pso_desc, rs, 0, &vs, &ps, NULL);
|
||||
|
||||
pso_desc.NumRenderTargets = 0;
|
||||
pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
|
||||
pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(psos); i++)
|
||||
{
|
||||
pso_desc.DepthStencilState.StencilEnable = TRUE;
|
||||
pso_desc.DepthStencilState.DepthEnable = TRUE;
|
||||
pso_desc.DepthStencilState.StencilReadMask = 0xFF;
|
||||
|
||||
if (i >= 2)
|
||||
{
|
||||
pso_desc.DepthStencilState.StencilWriteMask = 0xFF;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
|
||||
}
|
||||
else
|
||||
{
|
||||
pso_desc.DepthStencilState.StencilWriteMask = 0x00;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP;
|
||||
}
|
||||
|
||||
pso_desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
pso_desc.DepthStencilState.BackFace = pso_desc.DepthStencilState.FrontFace;
|
||||
|
||||
pso_desc.DepthStencilState.DepthWriteMask = (i & 1) ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
|
||||
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&psos[i]);
|
||||
ok(SUCCEEDED(hr), "Failed to create graphics pipeline state, hr %#x.\n", hr);
|
||||
}
|
||||
|
||||
/* In the tests, begin command lists from a clean slate.
|
||||
* Implementation must assume the depth-stencil image is in read-only state until proven otherwise. */
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
const D3D12_VIEWPORT vp = { 0, 0, 1024, 1024, 0, 1 };
|
||||
D3D12_RESOURCE_STATES stencil_state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||
D3D12_RESOURCE_STATES depth_state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||
D3D12_RESOURCE_STATES new_stencil_state;
|
||||
D3D12_RESOURCE_STATES new_depth_state;
|
||||
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
/* Initialize the DS image to a known state. */
|
||||
ID3D12GraphicsCommandList_ClearDepthStencilView(context.list, ds.dsv_handle,
|
||||
D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL,
|
||||
1.0f, 255, 0, NULL);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 1.0f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
check_sub_resource_uint8(ds.texture, 1, context.queue, context.list, 255, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
ID3D12GraphicsCommandList_Close(context.list);
|
||||
exec_command_list(context.queue, context.list);
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(context.list, 0, NULL, FALSE, &ds.dsv_handle);
|
||||
ID3D12GraphicsCommandList_RSSetViewports(context.list, 1, &vp);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(context.list, rs);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(context.list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
for (j = 0; j < tests[i].draw_count; j++)
|
||||
{
|
||||
switch (tests[i].draws[j].type)
|
||||
{
|
||||
case DRAW_TYPE_DRAW:
|
||||
ID3D12GraphicsCommandList_RSSetScissorRects(context.list, 1, &tests[i].draws[j].rect);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, psos[tests[i].draws[j].depth_write + tests[i].draws[j].stencil_write * 2]);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(context.list, 0, 1, &tests[i].draws[j].z, 0);
|
||||
ID3D12GraphicsCommandList_OMSetStencilRef(context.list, tests[i].draws[j].stencil);
|
||||
ID3D12GraphicsCommandList_DrawInstanced(context.list, 3, 1, 0, 0);
|
||||
break;
|
||||
|
||||
case DRAW_TYPE_TRANSITION:
|
||||
new_depth_state = tests[i].draws[j].depth_write ? D3D12_RESOURCE_STATE_DEPTH_WRITE :
|
||||
(D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
new_stencil_state = tests[i].draws[j].stencil_write ? D3D12_RESOURCE_STATE_DEPTH_WRITE :
|
||||
(D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
|
||||
if (new_depth_state != depth_state)
|
||||
{
|
||||
transition_sub_resource_state(context.list, ds.texture, 0, depth_state, new_depth_state);
|
||||
depth_state = new_depth_state;
|
||||
}
|
||||
|
||||
if (new_stencil_state != stencil_state)
|
||||
{
|
||||
transition_sub_resource_state(context.list, ds.texture, 1, stencil_state, new_stencil_state);
|
||||
stencil_state = new_stencil_state;
|
||||
}
|
||||
break;
|
||||
|
||||
case DRAW_TYPE_CLEAR:
|
||||
ID3D12GraphicsCommandList_ClearDepthStencilView(context.list, ds.dsv_handle,
|
||||
(tests[i].draws[j].depth_write ? D3D12_CLEAR_FLAG_DEPTH : 0) |
|
||||
(tests[i].draws[j].stencil_write ? D3D12_CLEAR_FLAG_STENCIL : 0),
|
||||
tests[i].draws[j].z, tests[i].draws[j].stencil, 1, &tests[i].draws[j].rect);
|
||||
break;
|
||||
|
||||
case DRAW_TYPE_DISCARD:
|
||||
region.NumRects = 0;
|
||||
region.pRects = NULL;
|
||||
|
||||
if (tests[i].draws[j].depth_write && tests[i].draws[j].stencil_write)
|
||||
{
|
||||
region.FirstSubresource = 0;
|
||||
region.NumSubresources = 2;
|
||||
}
|
||||
else if (tests[i].draws[j].depth_write)
|
||||
{
|
||||
region.FirstSubresource = 0;
|
||||
region.NumSubresources = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
region.FirstSubresource = 1;
|
||||
region.NumSubresources = 1;
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_DiscardResource(context.list, ds.texture, ®ion);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Normalize the resource state back to DEPTH_WRITE. */
|
||||
if (depth_state != D3D12_RESOURCE_STATE_DEPTH_WRITE)
|
||||
transition_sub_resource_state(context.list, ds.texture, 0, depth_state, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
if (stencil_state != D3D12_RESOURCE_STATE_DEPTH_WRITE)
|
||||
transition_sub_resource_state(context.list, ds.texture, 1, stencil_state, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 0.0f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
check_sub_resource_uint8(ds.texture, 1, context.queue, context.list, 0, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
ID3D12GraphicsCommandList_Close(context.list);
|
||||
exec_command_list(context.queue, context.list);
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 0.0f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
check_sub_resource_uint8(ds.texture, 1, context.queue, context.list, 0, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
|
||||
ID3D12RootSignature_Release(rs);
|
||||
for (i = 0; i < ARRAY_SIZE(psos); i++)
|
||||
ID3D12PipelineState_Release(psos[i]);
|
||||
destroy_depth_stencil(&ds);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -327,18 +327,6 @@ void test_format_support(void)
|
|||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
static const struct
|
||||
{
|
||||
D3D12_FEATURE_DATA_FORMAT_SUPPORT f;
|
||||
bool broken;
|
||||
}
|
||||
unsupported_format_features[] =
|
||||
{
|
||||
/* A recent version of WARP suppots B8G8R8A8 UAVs even on D3D_FEATURE_LEVEL_11_0. */
|
||||
{{DXGI_FORMAT_B8G8R8A8_TYPELESS, D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW,
|
||||
D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD | D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE}, true},
|
||||
};
|
||||
|
||||
if (!(device = create_device()))
|
||||
{
|
||||
skip("Failed to create device.\n");
|
||||
|
@ -354,23 +342,6 @@ void test_format_support(void)
|
|||
ok(!format_support.Support2 || format_support.Support2 == D3D12_FORMAT_SUPPORT2_TILED,
|
||||
"Got unexpected support2 %#x.\n", format_support.Support2);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(unsupported_format_features); ++i)
|
||||
{
|
||||
memset(&format_support, 0, sizeof(format_support));
|
||||
format_support.Format = unsupported_format_features[i].f.Format;
|
||||
hr = ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_FORMAT_SUPPORT,
|
||||
&format_support, sizeof(format_support));
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
ok(!(format_support.Support1 & unsupported_format_features[i].f.Support1)
|
||||
|| broken_on_warp(unsupported_format_features[i].broken),
|
||||
"Format %#x supports %#x.\n", unsupported_format_features[i].f.Format,
|
||||
format_support.Support1 & unsupported_format_features[i].f.Support1);
|
||||
ok(!(format_support.Support2 & unsupported_format_features[i].f.Support2)
|
||||
|| broken_on_warp(unsupported_format_features[i].broken),
|
||||
"Format %#x supports %#x.\n", unsupported_format_features[i].f.Format,
|
||||
format_support.Support2 & unsupported_format_features[i].f.Support2);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(depth_stencil_formats); ++i)
|
||||
{
|
||||
memset(&format_support, 0, sizeof(format_support));
|
||||
|
@ -1103,10 +1074,10 @@ void test_reset_command_allocator(void)
|
|||
command_allocator, NULL, &IID_ID3D12GraphicsCommandList, (void **)&command_list2);
|
||||
ok(hr == S_OK, "Failed to create command list, hr %#x.\n", hr);
|
||||
|
||||
ID3D12CommandAllocator_Release(command_allocator);
|
||||
ID3D12CommandAllocator_Release(command_allocator2);
|
||||
ID3D12GraphicsCommandList_Release(command_list);
|
||||
ID3D12GraphicsCommandList_Release(command_list2);
|
||||
ID3D12CommandAllocator_Release(command_allocator);
|
||||
ID3D12CommandAllocator_Release(command_allocator2);
|
||||
}
|
||||
|
||||
refcount = ID3D12Device_Release(device);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -97,6 +97,101 @@ void test_create_compute_pipeline_state(void)
|
|||
ok(!refcount, "ID3D12Device has %u references left.\n", (unsigned int)refcount);
|
||||
}
|
||||
|
||||
void test_integer_blending_pipeline_state(void)
|
||||
{
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12RootSignature *root_signature;
|
||||
ID3D12PipelineState *pso;
|
||||
D3D12_BLEND_DESC *blend;
|
||||
ID3D12Device *device;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD ps_code[] =
|
||||
{
|
||||
#if 0
|
||||
uint main() : SV_Target
|
||||
{
|
||||
return 10;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x9f26b611, 0xc59570a7, 0x9b327871, 0xb1015fc6, 0x00000001, 0x000000a8, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x00000070, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
|
||||
0x00000e01, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x00000030, 0x00000050, 0x0000000c,
|
||||
0x0100086a, 0x03000065, 0x00102012, 0x00000000, 0x05000036, 0x00102012, 0x00000000, 0x00004001,
|
||||
0x0000000a, 0x0100003e,
|
||||
};
|
||||
|
||||
static const DWORD ps_code_no_rt[] =
|
||||
{
|
||||
#if 0
|
||||
void main()
|
||||
{
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x499d4ed5, 0xbbe2842c, 0x179313ee, 0xde5cd5d9, 0x00000001, 0x00000064, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000010, 0x00000050, 0x00000004, 0x0100086a,
|
||||
0x0100003e,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE ps = { ps_code, sizeof(ps_code) };
|
||||
static const D3D12_SHADER_BYTECODE ps_no_rt = { ps_code_no_rt, sizeof(ps_code_no_rt) };
|
||||
|
||||
struct test
|
||||
{
|
||||
HRESULT hr;
|
||||
const D3D12_SHADER_BYTECODE *ps;
|
||||
UINT8 write_mask;
|
||||
};
|
||||
static const struct test tests[] =
|
||||
{
|
||||
{ S_OK, &ps_no_rt, D3D12_COLOR_WRITE_ENABLE_ALL },
|
||||
{ E_INVALIDARG, &ps, 0 },
|
||||
{ E_INVALIDARG, &ps, D3D12_COLOR_WRITE_ENABLE_ALL },
|
||||
};
|
||||
|
||||
if (!(device = create_device()))
|
||||
{
|
||||
skip("Failed to create device.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
root_signature_desc.NumParameters = 0;
|
||||
root_signature_desc.pParameters = NULL;
|
||||
root_signature_desc.NumStaticSamplers = 0;
|
||||
root_signature_desc.pStaticSamplers = NULL;
|
||||
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
||||
hr = create_root_signature(device, &root_signature_desc, &root_signature);
|
||||
ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
init_pipeline_state_desc(&pso_desc, root_signature, DXGI_FORMAT_R32_UINT, NULL, tests[i].ps, NULL);
|
||||
blend = &pso_desc.BlendState;
|
||||
blend->IndependentBlendEnable = false;
|
||||
blend->RenderTarget[0].BlendEnable = true;
|
||||
blend->RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
|
||||
blend->RenderTarget[0].DestBlend = D3D12_BLEND_DEST_ALPHA;
|
||||
blend->RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
|
||||
blend->RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE;
|
||||
blend->RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO;
|
||||
blend->RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||
blend->RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&pso);
|
||||
ok(hr == tests[i].hr, "Unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(pso);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
ID3D12RootSignature_Release(root_signature);
|
||||
ID3D12Device_Release(device);
|
||||
}
|
||||
|
||||
void test_create_graphics_pipeline_state(void)
|
||||
{
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
|
@ -297,33 +392,15 @@ void test_create_pipeline_state(void)
|
|||
0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x0100003e,
|
||||
};
|
||||
|
||||
static const union d3d12_root_signature_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
ID3D12RootSignature *root_signature;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
root_signature_subobject =
|
||||
static const union d3d12_root_signature_subobject root_signature_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE,
|
||||
NULL, /* fill in dynamically */
|
||||
}};
|
||||
|
||||
static const union d3d12_shader_bytecode_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_SHADER_BYTECODE shader_bytecode;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
vs_subobject = {{ D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS, { vs_code, sizeof(vs_code) } }},
|
||||
ps_subobject = {{ D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS, { ps_code, sizeof(ps_code) } }},
|
||||
cs_subobject = {{ D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS, { cs_code, sizeof(cs_code) } }};
|
||||
static const union d3d12_shader_bytecode_subobject vs_subobject = {{ D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS, { vs_code, sizeof(vs_code) } }};
|
||||
static const union d3d12_shader_bytecode_subobject ps_subobject = {{ D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS, { ps_code, sizeof(ps_code) } }};
|
||||
static const union d3d12_shader_bytecode_subobject cs_subobject = {{ D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS, { cs_code, sizeof(cs_code) } }};
|
||||
|
||||
static const D3D12_SO_DECLARATION_ENTRY so_entries[] =
|
||||
{
|
||||
|
@ -332,16 +409,7 @@ void test_create_pipeline_state(void)
|
|||
|
||||
static const UINT so_strides[] = { 16u };
|
||||
|
||||
static const union d3d12_stream_output_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_STREAM_OUTPUT_DESC stream_output_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
stream_output_subobject =
|
||||
static const union d3d12_stream_output_subobject stream_output_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_STREAM_OUTPUT,
|
||||
{ so_entries, ARRAY_SIZE(so_entries),
|
||||
|
@ -349,16 +417,7 @@ void test_create_pipeline_state(void)
|
|||
D3D12_SO_NO_RASTERIZED_STREAM },
|
||||
}};
|
||||
|
||||
static const union d3d12_blend_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_BLEND_DESC blend_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
blend_subobject =
|
||||
static const union d3d12_blend_subobject blend_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND,
|
||||
{ FALSE, TRUE,
|
||||
|
@ -369,31 +428,13 @@ void test_create_pipeline_state(void)
|
|||
}
|
||||
}};
|
||||
|
||||
static const union d3d12_sample_mask_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
UINT sample_mask;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
sample_mask_subobject =
|
||||
static const union d3d12_sample_mask_subobject sample_mask_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK,
|
||||
0xFFFFFFFFu
|
||||
}};
|
||||
|
||||
static const union d3d12_rasterizer_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_RASTERIZER_DESC rasterizer_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
rasterizer_subobject =
|
||||
static const union d3d12_rasterizer_subobject rasterizer_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER,
|
||||
{ D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_BACK,
|
||||
|
@ -401,16 +442,7 @@ void test_create_pipeline_state(void)
|
|||
D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF },
|
||||
}};
|
||||
|
||||
static const union d3d12_depth_stencil_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
depth_stencil_subobject =
|
||||
static const union d3d12_depth_stencil_subobject depth_stencil_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL,
|
||||
{ TRUE, D3D12_DEPTH_WRITE_MASK_ALL, D3D12_COMPARISON_FUNC_LESS_EQUAL, TRUE, 0xFF, 0xFF,
|
||||
|
@ -423,151 +455,61 @@ void test_create_pipeline_state(void)
|
|||
{ "POS", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
|
||||
};
|
||||
|
||||
static const union d3d12_input_layout_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_INPUT_LAYOUT_DESC input_layout;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
input_layout_subobject =
|
||||
static const union d3d12_input_layout_subobject input_layout_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT,
|
||||
{ input_elements, ARRAY_SIZE(input_elements) },
|
||||
}};
|
||||
|
||||
static const union d3d12_ib_strip_cut_value_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE strip_cut_value;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
ib_strip_cut_value_subobject =
|
||||
static const union d3d12_ib_strip_cut_value_subobject ib_strip_cut_value_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE,
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF,
|
||||
}};
|
||||
|
||||
static const union d3d12_primitive_topology_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE primitive_topology_type;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
primitive_topology_subobject =
|
||||
static const union d3d12_primitive_topology_subobject primitive_topology_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY,
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
|
||||
}};
|
||||
|
||||
static const union d3d12_render_target_formats_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_RT_FORMAT_ARRAY render_target_formats;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
render_target_formats_subobject =
|
||||
static const union d3d12_render_target_formats_subobject render_target_formats_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS,
|
||||
{ { DXGI_FORMAT_R8G8B8A8_UNORM }, 1 },
|
||||
}};
|
||||
|
||||
static const union d3d12_depth_stencil_format_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
DXGI_FORMAT depth_stencil_format;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
depth_stencil_format_subobject =
|
||||
static const union d3d12_depth_stencil_format_subobject depth_stencil_format_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT,
|
||||
DXGI_FORMAT_D32_FLOAT_S8X24_UINT,
|
||||
}};
|
||||
|
||||
static const union d3d12_sample_desc_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
DXGI_SAMPLE_DESC sample_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
sample_desc_subobject =
|
||||
static const union d3d12_sample_desc_subobject sample_desc_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC,
|
||||
{ 1, 0 },
|
||||
}};
|
||||
|
||||
static const union d3d12_node_mask_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
UINT node_mask;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
node_mask_subobject =
|
||||
static const union d3d12_node_mask_subobject node_mask_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_NODE_MASK,
|
||||
0x0,
|
||||
}};
|
||||
|
||||
static const union d3d12_cached_pso_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_CACHED_PIPELINE_STATE cached_pso;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
cached_pso_subobject =
|
||||
static const union d3d12_cached_pso_subobject cached_pso_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CACHED_PSO,
|
||||
{ NULL, 0 },
|
||||
}};
|
||||
|
||||
static const union d3d12_flags_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_PIPELINE_STATE_FLAGS flags;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
flags_subobject =
|
||||
static const union d3d12_flags_subobject flags_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_FLAGS,
|
||||
D3D12_PIPELINE_STATE_FLAG_NONE,
|
||||
}};
|
||||
|
||||
static const union d3d12_depth_stencil1_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_DEPTH_STENCIL_DESC1 depth_stencil_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
depth_stencil1_subobject =
|
||||
static const union d3d12_depth_stencil1_subobject depth_stencil1_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1,
|
||||
{ TRUE, D3D12_DEPTH_WRITE_MASK_ALL, D3D12_COMPARISON_FUNC_LESS_EQUAL, TRUE, 0xFF, 0xFF,
|
||||
|
@ -575,16 +517,7 @@ void test_create_pipeline_state(void)
|
|||
{ D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_INCR, D3D12_COMPARISON_FUNC_EQUAL } },
|
||||
}};
|
||||
|
||||
static const union d3d12_view_instancing_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_VIEW_INSTANCING_DESC view_instancing_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
}
|
||||
view_instancing_subobject =
|
||||
static const union d3d12_view_instancing_subobject view_instancing_subobject =
|
||||
{{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING,
|
||||
{ 0, NULL, D3D12_VIEW_INSTANCING_FLAG_NONE },
|
||||
|
@ -1700,10 +1633,35 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x49, 0x19, 0x48, 0x60, 0xf0, 0x3d, 0xc2, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x24, 0x65, 0x20, 0x81, 0xc1, 0xd7, 0x04, 0x18, 0x0e, 0x04, 0x00, 0x05, 0x00,
|
||||
0x00, 0x00, 0xc5, 0x01, 0x91, 0x8e, 0xec, 0xb7, 0x38, 0xcc, 0x9e, 0x7f, 0xc7, 0xe2, 0xba, 0xd9, 0x5c, 0x96, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
static const DWORD ps_code_3rt_dxbc[] =
|
||||
{
|
||||
#if 0
|
||||
float4 c0;
|
||||
float4 c1;
|
||||
|
||||
void main(out float4 o0 : SV_Target0, out float4 o1 : SV_Target1, out float4 o2 : SV_Target2)
|
||||
{
|
||||
o0 = c0;
|
||||
o1 = c1;
|
||||
o2 = 1.0.xxxx;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0xe1e2c26b, 0x10d9607c, 0x4a0f0786, 0xc368f603, 0x00000001, 0x0000013c, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x000000a0, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x0000005c, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000000, 0x00000003, 0x00000000,
|
||||
0x0000000f, 0x00000050, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x0000000f, 0x00000050,
|
||||
0x00000002, 0x00000000, 0x00000003, 0x00000002, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074,
|
||||
0x58454853, 0x00000094, 0x00000050, 0x00000025, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000,
|
||||
0x00000002, 0x03000065, 0x001020f2, 0x00000000, 0x03000065, 0x001020f2, 0x00000001, 0x03000065,
|
||||
0x001020f2, 0x00000002, 0x06000036, 0x001020f2, 0x00000000, 0x00208e46, 0x00000000, 0x00000000,
|
||||
0x06000036, 0x001020f2, 0x00000001, 0x00208e46, 0x00000000, 0x00000001, 0x08000036, 0x001020f2,
|
||||
0x00000002, 0x00004002, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x0100003e,
|
||||
};
|
||||
const D3D12_SHADER_BYTECODE ps = {
|
||||
use_dxil ? (const void*)ps_code_dxil : (const void*)ps_code_dxbc,
|
||||
use_dxil ? sizeof(ps_code_dxil) : sizeof(ps_code_dxbc)
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE ps_3rt = SHADER_BYTECODE(ps_code_3rt_dxbc);
|
||||
static const struct
|
||||
{
|
||||
struct
|
||||
|
@ -1748,6 +1706,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
init_pipeline_state_desc(&pso_desc, context.root_signature,
|
||||
context.render_target_desc.Format, NULL, &ps, NULL);
|
||||
}
|
||||
|
||||
pso_desc.BlendState.RenderTarget[0].BlendEnable = true;
|
||||
pso_desc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_COLOR;
|
||||
pso_desc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_SRC1_COLOR;
|
||||
|
@ -1755,6 +1714,54 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
|
||||
pso_desc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_SRC1_ALPHA;
|
||||
pso_desc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||
|
||||
pso_desc.NumRenderTargets = 2;
|
||||
pso_desc.RTVFormats[1] = pso_desc.RTVFormats[0];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
|
||||
|
||||
/* Write mask of 0 is not enough. */
|
||||
pso_desc.BlendState.IndependentBlendEnable = TRUE;
|
||||
pso_desc.BlendState.RenderTarget[1].RenderTargetWriteMask = 0;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
|
||||
|
||||
/* This appears to be allowed however. */
|
||||
pso_desc.RTVFormats[1] = DXGI_FORMAT_UNKNOWN;
|
||||
pso_desc.BlendState.IndependentBlendEnable = FALSE;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
|
||||
/* >2 RTs is also allowed as long as we keep using NULL format. */
|
||||
pso_desc.NumRenderTargets = 3;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
|
||||
/* This is still allowed. We need to only consider RTs with IOSIG entry apparently ... */
|
||||
pso_desc.RTVFormats[2] = pso_desc.RTVFormats[0];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
|
||||
if (!use_dxil)
|
||||
{
|
||||
/* If we try to write to o2 however, this must fail. */
|
||||
pso_desc.PS = ps_3rt;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
|
||||
pso_desc.PS = ps;
|
||||
}
|
||||
|
||||
pso_desc.NumRenderTargets = 1;
|
||||
pso_desc.RTVFormats[2] = DXGI_FORMAT_UNKNOWN;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
|
@ -1794,7 +1801,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[1] = pso_desc.BlendState.RenderTarget[0];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
todo ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
context.pipeline_state = NULL;
|
||||
|
@ -1803,7 +1810,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[1].DestBlend = D3D12_BLEND_SRC_COLOR;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
todo ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
context.pipeline_state = NULL;
|
||||
|
@ -1820,7 +1827,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
todo ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
context.pipeline_state = NULL;
|
||||
|
@ -2294,3 +2301,119 @@ void test_mismatching_pso_stages(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_pipeline_no_ps_nonzero_rts(void)
|
||||
{
|
||||
const FLOAT white[] = { 100.0f, 100.0f, 100.0f, 100.0f };
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
struct depth_stencil_resource ds;
|
||||
D3D12_INPUT_LAYOUT_DESC layout;
|
||||
D3D12_INPUT_ELEMENT_DESC elem;
|
||||
struct test_context_desc desc;
|
||||
D3D12_VERTEX_BUFFER_VIEW vbv;
|
||||
struct test_context context;
|
||||
ID3D12DescriptorHeap *rtv;
|
||||
ID3D12Resource *vbo;
|
||||
ID3D12Resource *rt;
|
||||
D3D12_VIEWPORT vp;
|
||||
D3D12_RECT sci;
|
||||
|
||||
static const FLOAT vbo_data[] =
|
||||
{
|
||||
-1.0f, -1.0f, 0.5f, 1.0f,
|
||||
+3.0f, -1.0f, 0.5f, 1.0f,
|
||||
-1.0f, +3.0f, 0.5f, 1.0f,
|
||||
};
|
||||
|
||||
static const DWORD vs_code[] =
|
||||
{
|
||||
#if 0
|
||||
float4 main(float4 a : A) : SV_Position
|
||||
{
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0xecd820c8, 0x89ee4b40, 0xb73efa73, 0x4ed91573, 0x00000001, 0x000000d4, 0x00000003,
|
||||
0x0000002c, 0x00000058, 0x0000008c, 0x4e475349, 0x00000024, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0xabab0041, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000040, 0x00010050, 0x00000010, 0x0100086a,
|
||||
0x0300005f, 0x001010f2, 0x00000000, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x05000036,
|
||||
0x001020f2, 0x00000000, 0x00101e46, 0x00000000, 0x0100003e,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE vs = SHADER_BYTECODE(vs_code);
|
||||
|
||||
layout.NumElements = 1;
|
||||
layout.pInputElementDescs = &elem;
|
||||
memset(&elem, 0, sizeof(elem));
|
||||
elem.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
elem.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
|
||||
elem.SemanticName = "A";
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_pipeline = true;
|
||||
desc.no_root_signature = true;
|
||||
desc.no_render_target = true;
|
||||
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
init_depth_stencil(&ds, context.device, 1, 1, 1, 1, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_D32_FLOAT, NULL);
|
||||
rt = create_default_texture2d(context.device, 1, 1, 1, 1, DXGI_FORMAT_R32_FLOAT,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
|
||||
rtv = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1);
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
rs_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
|
||||
init_pipeline_state_desc(&pso, context.root_signature, DXGI_FORMAT_R8G8B8A8_UNORM, &vs, NULL, &layout);
|
||||
pso.DSVFormat = DXGI_FORMAT_D32_FLOAT;
|
||||
pso.DepthStencilState.DepthEnable = TRUE;
|
||||
pso.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
|
||||
pso.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS;
|
||||
pso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pso.PS.BytecodeLength = 0;
|
||||
pso.PS.pShaderBytecode = NULL;
|
||||
|
||||
rtv_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(rtv);
|
||||
|
||||
ID3D12Device_CreateGraphicsPipelineState(context.device, &pso, &IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ID3D12Device_CreateRenderTargetView(context.device, rt, NULL, rtv_handle);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(context.list, rtv_handle, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearDepthStencilView(context.list, ds.dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(context.list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
|
||||
set_viewport(&vp, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
|
||||
ID3D12GraphicsCommandList_RSSetViewports(context.list, 1, &vp);
|
||||
set_rect(&sci, 0, 0, 1, 1);
|
||||
ID3D12GraphicsCommandList_RSSetScissorRects(context.list, 1, &sci);
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(context.list, 1, &rtv_handle, TRUE, &ds.dsv_handle);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(context.list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
|
||||
vbo = create_upload_buffer(context.device, sizeof(vbo_data), vbo_data);
|
||||
vbv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo);
|
||||
vbv.SizeInBytes = sizeof(vbo_data);
|
||||
vbv.StrideInBytes = 16;
|
||||
ID3D12GraphicsCommandList_IASetVertexBuffers(context.list, 0, 1, &vbv);
|
||||
ID3D12GraphicsCommandList_DrawInstanced(context.list, 3, 1, 0, 0);
|
||||
|
||||
transition_resource_state(context.list, rt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
/* Verify depth buffer was written to. */
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 0.5f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
/* Verify that the invalid R32_FLOAT RTV was just ignored. */
|
||||
check_sub_resource_float(rt, 0, context.queue, context.list, 100.0f, 0);
|
||||
|
||||
ID3D12Resource_Release(rt);
|
||||
ID3D12Resource_Release(vbo);
|
||||
ID3D12DescriptorHeap_Release(rtv);
|
||||
destroy_depth_stencil(&ds);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ void test_get_cached_blob(void)
|
|||
{
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC compute_desc;
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
ID3D12RootSignature *root_signature_alt;
|
||||
ID3D12RootSignature *root_signature;
|
||||
struct test_context context;
|
||||
ID3D12PipelineState *state;
|
||||
|
@ -45,6 +46,18 @@ void test_get_cached_blob(void)
|
|||
0x0400009b, 0x00000001, 0x00000001, 0x00000001, 0x0100003e,
|
||||
};
|
||||
|
||||
#if 0
|
||||
[numthreads(2,1,1)]
|
||||
void main() { }
|
||||
#endif
|
||||
static const DWORD cs_dxbc_2[] =
|
||||
{
|
||||
0x43425844, 0xcdd3f1fb, 0x7e892d91, 0xe5a2ea15, 0xab4fc56d, 0x00000001, 0x00000074, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000020, 0x00050050, 0x00000008, 0x0100086a,
|
||||
0x0400009b, 0x00000002, 0x00000001, 0x00000001, 0x0100003e,
|
||||
};
|
||||
|
||||
if (!init_test_context(&context, NULL))
|
||||
return;
|
||||
|
||||
|
@ -54,6 +67,10 @@ void test_get_cached_blob(void)
|
|||
hr = create_root_signature(device, &root_signature_desc, &root_signature);
|
||||
ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr);
|
||||
|
||||
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
|
||||
hr = create_root_signature(device, &root_signature_desc, &root_signature_alt);
|
||||
ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr);
|
||||
|
||||
memset(&compute_desc, 0, sizeof(compute_desc));
|
||||
compute_desc.pRootSignature = root_signature;
|
||||
compute_desc.CS.pShaderBytecode = cs_dxbc;
|
||||
|
@ -77,7 +94,24 @@ void test_get_cached_blob(void)
|
|||
ok(hr == S_OK, "Failed to create compute pipeline, hr %#x.\n", hr);
|
||||
|
||||
ID3D12PipelineState_Release(state);
|
||||
|
||||
/* Using mismatched shader code must fail. */
|
||||
compute_desc.CS.pShaderBytecode = cs_dxbc_2;
|
||||
compute_desc.CS.BytecodeLength = sizeof(cs_dxbc_2);
|
||||
hr = ID3D12Device_CreateComputePipelineState(device,
|
||||
&compute_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
|
||||
|
||||
/* Using mismatched root signature must fail. */
|
||||
compute_desc.CS.pShaderBytecode = cs_dxbc;
|
||||
compute_desc.CS.BytecodeLength = sizeof(cs_dxbc);
|
||||
compute_desc.pRootSignature = root_signature_alt;
|
||||
hr = ID3D12Device_CreateComputePipelineState(device,
|
||||
&compute_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
|
||||
|
||||
ID3D12RootSignature_Release(root_signature);
|
||||
ID3D12RootSignature_Release(root_signature_alt);
|
||||
|
||||
ID3D10Blob_Release(blob);
|
||||
destroy_test_context(&context);
|
||||
|
@ -91,11 +125,15 @@ void test_pipeline_library(void)
|
|||
ID3D12PipelineLibrary *pipeline_library;
|
||||
ID3D12RootSignature *root_signature;
|
||||
struct test_context context;
|
||||
ID3D12PipelineState *state3;
|
||||
ID3D12PipelineState *state2;
|
||||
ID3D12PipelineState *state;
|
||||
ULONG reference_refcount;
|
||||
size_t serialized_size;
|
||||
ID3D12Device1 *device1;
|
||||
void *serialized_data;
|
||||
ID3D12Device *device;
|
||||
ID3D12Fence *fence;
|
||||
HRESULT hr;
|
||||
|
||||
#if 0
|
||||
|
@ -156,7 +194,11 @@ void test_pipeline_library(void)
|
|||
|
||||
/* Test adding pipelines to an empty pipeline library */
|
||||
hr = ID3D12Device1_CreatePipelineLibrary(device1, NULL, 0, &IID_ID3D12PipelineLibrary, (void**)&pipeline_library);
|
||||
ok(hr == S_OK, "Failed to create pipeline library, hr %#x.\n");
|
||||
ok(hr == S_OK, "Failed to create pipeline library, hr %#x.\n", hr);
|
||||
|
||||
/* ppData == NULL means a query */
|
||||
hr = ID3D12Device1_CreatePipelineLibrary(device1, NULL, 0, NULL, NULL);
|
||||
ok(hr == S_FALSE, "Failed to query pipeline library, hr %#x.\n", hr);
|
||||
|
||||
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
|
||||
hr = create_root_signature(device, &root_signature_desc, &root_signature);
|
||||
|
@ -204,11 +246,20 @@ void test_pipeline_library(void)
|
|||
ok(hr == S_OK, "Failed to create graphics pipeline, hr %#x.\n", hr);
|
||||
|
||||
hr = ID3D12PipelineLibrary_StorePipeline(pipeline_library, graphics_name, state);
|
||||
ok(hr == S_OK, "Failed to store compute pipeline, hr %x.\n", hr);
|
||||
ok(hr == S_OK, "Failed to store graphics pipeline, hr %x.\n", hr);
|
||||
|
||||
/* Try to load PSO after a Store. Verify that we have a ref-count. */
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library, graphics_name, &graphics_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&state2);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline, hr %x.\n", hr);
|
||||
ok(state == state2, "Resulting PSOs must point to same object.\n");
|
||||
ok(get_refcount(state2) == 2, "Refcount %u != 2.\n", get_refcount(state2));
|
||||
|
||||
hr = ID3D12PipelineLibrary_StorePipeline(pipeline_library, compute_name, state);
|
||||
ok(hr == E_INVALIDARG, "Storing pipeline with already existing name succeeded, hr %x.\n", hr);
|
||||
|
||||
ID3D12PipelineState_Release(state);
|
||||
ID3D12PipelineState_Release(state2);
|
||||
|
||||
/* Test looking up pipelines in a new pipeline library */
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
|
@ -221,6 +272,13 @@ void test_pipeline_library(void)
|
|||
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(state);
|
||||
|
||||
/* Verify that modifying a PSO description must be invalidated by runtime. */
|
||||
graphics_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
|
||||
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
|
||||
graphics_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
|
||||
serialized_size = ID3D12PipelineLibrary_GetSerializedSize(pipeline_library);
|
||||
ok(serialized_size > 0, "Serialized size for pipeline library is 0.\n");
|
||||
|
||||
|
@ -238,24 +296,78 @@ void test_pipeline_library(void)
|
|||
serialized_size, &IID_ID3D12PipelineLibrary, (void**)&pipeline_library);
|
||||
ok(hr == S_OK, "Failed to create pipeline library, hr %#x.\n");
|
||||
|
||||
/* Verify that PSO library must internally ref-count a unique PSO. */
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
|
||||
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(state);
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
|
||||
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state2);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
|
||||
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state3);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
|
||||
|
||||
ok(state == state2 && state == state3, "Resulting PSOs must point to same object.\n");
|
||||
ok(get_refcount(state) == 3, "Refcount %u != 3.\n", get_refcount(state));
|
||||
ok(get_refcount(state2) == 3, "Refcount %u != 3.\n", get_refcount(state2));
|
||||
ok(get_refcount(state3) == 3, "Refcount %u != 3.\n", get_refcount(state3));
|
||||
ID3D12PipelineState_Release(state);
|
||||
ID3D12PipelineState_Release(state2);
|
||||
ID3D12PipelineState_Release(state3);
|
||||
|
||||
reference_refcount = get_refcount(context.device);
|
||||
|
||||
/* Verify that PSO library must internally ref-count a unique PSO. */
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state2);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state3);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
|
||||
ok(get_refcount(context.device) == reference_refcount + 1, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 1);
|
||||
ID3D12Device_CreateFence(context.device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void**)&fence);
|
||||
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
|
||||
|
||||
ID3D12PipelineState_SetPrivateDataInterface(state, &IID_ID3D12Fence, (const IUnknown *)fence);
|
||||
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
|
||||
|
||||
ok(state == state2 && state == state3, "Resulting PSOs must point to same object.\n");
|
||||
ok(state && get_refcount(state) == 3, "Refcount %u != 3.\n", get_refcount(state));
|
||||
ok(state2 && get_refcount(state2) == 3, "Refcount %u != 3.\n", get_refcount(state2));
|
||||
ok(state3 && get_refcount(state3) == 3, "Refcount %u != 3.\n", get_refcount(state3));
|
||||
ID3D12PipelineState_Release(state);
|
||||
ID3D12PipelineState_Release(state2);
|
||||
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
|
||||
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
|
||||
ok(ID3D12PipelineState_Release(state3) == 0, "Refcount did not hit 0.\n");
|
||||
/* Releasing the last public reference does not release private data. */
|
||||
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
|
||||
/* Device ref count does release however ... */
|
||||
ok(get_refcount(context.device) == reference_refcount + 1, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 1);
|
||||
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state2);
|
||||
/* Device ref count increases here again. */
|
||||
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
ok(state == state2, "Reloading dead PSO must point to same object.\n");
|
||||
ID3D12PipelineState_Release(state2);
|
||||
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
graphics_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
todo ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
|
||||
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(state);
|
||||
|
||||
ID3D12PipelineLibrary_Release(pipeline_library);
|
||||
/* This should release the fence reference. */
|
||||
ok(get_refcount(fence) == 1, "Refcount %u != 1.\n", get_refcount(fence));
|
||||
ID3D12Fence_Release(fence);
|
||||
|
||||
free(serialized_data);
|
||||
ID3D12RootSignature_Release(root_signature);
|
||||
|
|
|
@ -77,7 +77,7 @@ void test_create_query_heap(void)
|
|||
|
||||
void test_query_timestamp(void)
|
||||
{
|
||||
uint64_t timestamps[4], timestamp_frequency, timestamp_diff, time_diff;
|
||||
UINT64 timestamps[4], timestamp_frequency, timestamp_diff, time_diff;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_QUERY_HEAP_DESC heap_desc;
|
||||
struct test_context_desc desc;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -22,9 +22,106 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "d3d12_crosstest.h"
|
||||
|
||||
void test_unbound_rtv_rendering(void)
|
||||
{
|
||||
static const struct vec4 red = { 1.0f, 0.0f, 0.0f, 1.0f };
|
||||
static const float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rt_handle;
|
||||
struct test_context_desc desc;
|
||||
struct test_context context;
|
||||
ID3D12CommandQueue *queue;
|
||||
ID3D12Resource *fp32_rt;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD ps_code[] =
|
||||
{
|
||||
#if 0
|
||||
Outputs main()
|
||||
{
|
||||
Outputs o;
|
||||
o.col0 = float4(1.0, 0.0, 0.0, 1.0);
|
||||
o.col1 = 0.5;
|
||||
return o;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0xbbb26641, 0x99a7dc17, 0xc556a4cd, 0x3aa2843e, 0x00000001, 0x000000ec, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x00000088, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000044, 0x00000002, 0x00000008, 0x00000038, 0x00000000, 0x00000000, 0x00000003, 0x00000000,
|
||||
0x0000000f, 0x00000038, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x545f5653,
|
||||
0x65677261, 0xabab0074, 0x58454853, 0x0000005c, 0x00000050, 0x00000017, 0x0100086a, 0x03000065,
|
||||
0x001020f2, 0x00000000, 0x03000065, 0x00102012, 0x00000001, 0x08000036, 0x001020f2, 0x00000000,
|
||||
0x00004002, 0x3f800000, 0x00000000, 0x00000000, 0x3f800000, 0x05000036, 0x00102012, 0x00000001,
|
||||
0x00004001, 0x3f000000, 0x0100003e,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.rt_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
desc.rt_width = 32;
|
||||
desc.rt_height = 32;
|
||||
desc.rt_descriptor_count = 2;
|
||||
desc.no_pipeline = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
fp32_rt = create_default_texture2d(context.device, 32, 32,
|
||||
1, 1, DXGI_FORMAT_R32_FLOAT, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
|
||||
rt_handle = context.rtv;
|
||||
rt_handle.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
||||
ID3D12Device_CreateRenderTargetView(context.device, fp32_rt, NULL, rt_handle);
|
||||
|
||||
/* Apparently, rendering to an NULL RTV is fine. D3D12 validation does not complain about this case at all. */
|
||||
init_pipeline_state_desc(&pso_desc, context.root_signature, 0, NULL, &ps, NULL);
|
||||
pso_desc.NumRenderTargets = 2;
|
||||
pso_desc.RTVFormats[0] = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
pso_desc.RTVFormats[1] = DXGI_FORMAT_R32_FLOAT;
|
||||
pso_desc.DSVFormat = DXGI_FORMAT_UNKNOWN;
|
||||
pso_desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
|
||||
pso_desc.BlendState.RenderTarget[1].RenderTargetWriteMask = 0xf;
|
||||
pso_desc.DepthStencilState.DepthEnable = false;
|
||||
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
|
||||
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rt_handle, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.5f, 0.5f);
|
||||
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
|
||||
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
|
||||
|
||||
/* First, render to both RTs, but then only render to 1 RT. */
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 2, &context.rtv, true, NULL);
|
||||
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
|
||||
|
||||
transition_resource_state(command_list, context.render_target,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
transition_resource_state(command_list, fp32_rt,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &red, 0);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
check_sub_resource_float(fp32_rt, 0, queue, command_list, 0.5f, 0);
|
||||
ID3D12Resource_Release(fp32_rt);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_unknown_rtv_format(void)
|
||||
{
|
||||
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const struct vec4 vec4_white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
struct vec4 expected_vec4 = {0.0f, 0.0f, 0.0f, 1.0f};
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
|
@ -89,7 +186,7 @@ void test_unknown_rtv_format(void)
|
|||
create_render_target(&context, &desc, &render_targets[1], &rtvs[2]);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(rtvs); ++i)
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtvs[i], &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtvs[i], white, 0, NULL);
|
||||
|
||||
/* NULL RTV */
|
||||
memset(&rtv_desc, 0, sizeof(rtv_desc));
|
||||
|
@ -116,7 +213,7 @@ void test_unknown_rtv_format(void)
|
|||
transition_resource_state(command_list, render_targets[1],
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &white, 0);
|
||||
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &vec4_white, 0);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
expected_vec4.x = 2.0f;
|
||||
check_sub_resource_vec4(render_targets[0], 0, queue, command_list, &expected_vec4, 0);
|
||||
|
@ -341,6 +438,7 @@ void test_depth_stencil_test_no_dsv(void)
|
|||
{
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
struct depth_stencil_resource ds;
|
||||
struct test_context_desc desc;
|
||||
struct test_context context;
|
||||
ID3D12CommandQueue *queue;
|
||||
|
@ -380,6 +478,8 @@ void test_depth_stencil_test_no_dsv(void)
|
|||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
init_depth_stencil(&ds, context.device, 32, 32, 1, 1, DXGI_FORMAT_D32_FLOAT, 0, NULL);
|
||||
|
||||
context.root_signature = create_32bit_constants_root_signature(context.device,
|
||||
0, 4, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
|
||||
|
@ -387,14 +487,16 @@ void test_depth_stencil_test_no_dsv(void)
|
|||
pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT;
|
||||
pso_desc.DepthStencilState.DepthEnable = true;
|
||||
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
|
||||
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_EQUAL;
|
||||
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create graphics pipeline state, hr %#x.\n", hr);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle, D3D12_CLEAR_FLAG_DEPTH,
|
||||
1.0f, 0, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, &ds.dsv_handle);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
|
@ -410,6 +512,9 @@ void test_depth_stencil_test_no_dsv(void)
|
|||
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
|
||||
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
|
||||
|
||||
/* Now, dynamically disable the depth attachment. */
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
|
||||
/* Native behavior seems to be that depth test is just disabled entirely here.
|
||||
* This last draw is the color we should get on NV at least.
|
||||
* D3D12 validation layers report errors here of course,
|
||||
|
@ -423,8 +528,9 @@ void test_depth_stencil_test_no_dsv(void)
|
|||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
/* vkd3d-proton just skips the draw call in this situation.
|
||||
* At least test that we don't crash. */
|
||||
todo check_sub_resource_vec4(context.render_target, 0, queue, command_list, &blue, 0);
|
||||
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &blue, 0);
|
||||
|
||||
destroy_depth_stencil(&ds);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
|
|
|
@ -122,6 +122,13 @@ void test_create_committed_resource(void)
|
|||
ID3D12Resource_Release(resource);
|
||||
resource_desc.MipLevels = 1;
|
||||
|
||||
resource_desc.SampleDesc.Count = 0;
|
||||
hr = ID3D12Device_CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
&resource_desc, D3D12_RESOURCE_STATE_RENDER_TARGET, &clear_value,
|
||||
&IID_ID3D12Resource, (void **)&resource);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
resource_desc.SampleDesc.Count = 1;
|
||||
|
||||
hr = ID3D12Device_CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
&resource_desc, D3D12_RESOURCE_STATE_RENDER_TARGET | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
||||
&clear_value, &IID_ID3D12Resource, (void **)&resource);
|
||||
|
@ -1349,8 +1356,8 @@ void test_get_copyable_footprints_planar(void)
|
|||
void test_get_copyable_footprints(void)
|
||||
{
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layouts[10];
|
||||
uint64_t row_sizes[10], total_size;
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
UINT64 row_sizes[10], total_size;
|
||||
unsigned int sub_resource_count;
|
||||
unsigned int i, j, k, l;
|
||||
ID3D12Device *device;
|
||||
|
@ -1860,6 +1867,358 @@ void test_suballocate_small_textures(void)
|
|||
ok(!refcount, "ID3D12Device has %u references left.\n", (unsigned int)refcount);
|
||||
}
|
||||
|
||||
void test_read_subresource_rt(void)
|
||||
{
|
||||
const FLOAT white[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE desc_handle;
|
||||
D3D12_HEAP_PROPERTIES heap_properties;
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
ID3D12DescriptorHeap *desc_heap;
|
||||
struct test_context_desc desc;
|
||||
struct test_context context;
|
||||
ID3D12Resource *resource;
|
||||
uint32_t pixels[4 * 4];
|
||||
ID3D12Device *device;
|
||||
D3D12_RECT rect;
|
||||
uint32_t pixel;
|
||||
uint32_t x, y;
|
||||
D3D12_BOX box;
|
||||
HRESULT hr;
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_pipeline = true;
|
||||
desc.no_render_target = true;
|
||||
desc.no_render_target = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
device = context.device;
|
||||
|
||||
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
resource_desc.Alignment = 0;
|
||||
resource_desc.Width = 4;
|
||||
resource_desc.Height = 4;
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.MipLevels = 1;
|
||||
resource_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
resource_desc.SampleDesc.Count = 1;
|
||||
resource_desc.SampleDesc.Quality = 0;
|
||||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||
|
||||
memset(&heap_properties, 0, sizeof(heap_properties));
|
||||
heap_properties.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
heap_properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
|
||||
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
hr = ID3D12Device_CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
&resource_desc, D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&resource);
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Cannot create CPU accessible render target. Skipping test.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
pixel = 0x80808080;
|
||||
ID3D12Resource_Map(resource, 0, NULL, NULL);
|
||||
for (y = 0; y < 4; y++)
|
||||
{
|
||||
for (x = 0; x < 4; x++)
|
||||
{
|
||||
set_box(&box, x, y, 0, x + 1, y + 1, 1);
|
||||
ID3D12Resource_WriteToSubresource(resource, 0, &box, &pixel,
|
||||
sizeof(uint32_t), sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
ID3D12Resource_Unmap(resource, 0, NULL);
|
||||
|
||||
desc_heap = create_cpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1);
|
||||
desc_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(desc_heap);
|
||||
ID3D12Device_CreateRenderTargetView(device, resource, NULL, desc_handle);
|
||||
transition_resource_state(context.list, resource,
|
||||
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
|
||||
for (x = 0; x < 4; x++)
|
||||
{
|
||||
set_rect(&rect, x, x, x + 1, x + 1);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(context.list, desc_handle, white, 1, &rect);
|
||||
}
|
||||
|
||||
transition_resource_state(context.list, resource,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COMMON);
|
||||
|
||||
ID3D12GraphicsCommandList_Close(context.list);
|
||||
exec_command_list(context.queue, context.list);
|
||||
wait_queue_idle(device, context.queue);
|
||||
|
||||
ID3D12Resource_Map(resource, 0, NULL, NULL);
|
||||
set_box(&box, 0, 0, 0, 4, 4, 1);
|
||||
ID3D12Resource_ReadFromSubresource(resource, pixels,
|
||||
4 * sizeof(uint32_t), 16 * sizeof(uint32_t), 0, &box);
|
||||
ID3D12Resource_Unmap(resource, 0, NULL);
|
||||
|
||||
for (y = 0; y < 4; y++)
|
||||
{
|
||||
for (x = 0; x < 4; x++)
|
||||
{
|
||||
uint32_t expected = x == y ? UINT32_MAX : pixel;
|
||||
ok(pixels[y * 4 + x] == expected, "Pixel %u, %u: %#x != %#x\n", x, y, pixels[y * 4 + x], expected);
|
||||
}
|
||||
}
|
||||
|
||||
ID3D12DescriptorHeap_Release(desc_heap);
|
||||
ID3D12Resource_Release(resource);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
/* Reduced test case which runs on more implementations. */
|
||||
void test_read_write_subresource_2d(void)
|
||||
{
|
||||
D3D12_TEXTURE_COPY_LOCATION src_location, dst_location;
|
||||
uint32_t *dst_buffer, *zero_buffer, *ptr;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_HEAP_PROPERTIES heap_properties;
|
||||
D3D12_SUBRESOURCE_DATA texture_data;
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
struct test_context_desc desc;
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12Resource *src_texture;
|
||||
ID3D12Resource *dst_texture;
|
||||
ID3D12CommandQueue *queue;
|
||||
ID3D12Resource *rb_buffer;
|
||||
unsigned int buffer_size;
|
||||
unsigned int slice_pitch;
|
||||
unsigned int row_pitch;
|
||||
uint32_t got, expected;
|
||||
unsigned int x, y, i;
|
||||
ID3D12Device *device;
|
||||
D3D12_BOX box;
|
||||
HRESULT hr;
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_render_target = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
device = context.device;
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
row_pitch = 128 * sizeof(unsigned int);
|
||||
slice_pitch = row_pitch * 100;
|
||||
buffer_size = slice_pitch * 1;
|
||||
|
||||
/* Buffers are not supported */
|
||||
rb_buffer = create_readback_buffer(device, buffer_size);
|
||||
dst_buffer = malloc(buffer_size);
|
||||
ok(dst_buffer, "Failed to allocate memory.\n");
|
||||
zero_buffer = malloc(buffer_size);
|
||||
ok(zero_buffer, "Failed to allocate memory.\n");
|
||||
memset(zero_buffer, 0, buffer_size);
|
||||
|
||||
set_box(&box, 0, 0, 0, 1, 1, 1);
|
||||
hr = ID3D12Resource_WriteToSubresource(rb_buffer, 0, &box, dst_buffer, row_pitch, slice_pitch);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
hr = ID3D12Resource_ReadFromSubresource(rb_buffer, dst_buffer, row_pitch, slice_pitch, 0, &box);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
ID3D12Resource_Release(rb_buffer);
|
||||
|
||||
/* Only texture on custom heaps is legal for ReadFromSubresource/WriteToSubresource */
|
||||
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
resource_desc.Alignment = 0;
|
||||
resource_desc.Width = 128;
|
||||
resource_desc.Height = 100;
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.MipLevels = 1;
|
||||
resource_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
resource_desc.SampleDesc.Count = 1;
|
||||
resource_desc.SampleDesc.Quality = 0;
|
||||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
resource_desc.Flags = 0;
|
||||
|
||||
memset(&heap_properties, 0, sizeof(heap_properties));
|
||||
heap_properties.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
heap_properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
|
||||
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
hr = ID3D12Device_CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
&resource_desc, D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&src_texture);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Failed to create texture on custom heap.\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Invalid box */
|
||||
set_box(&box, 0, 0, 0, 128, 100, 2);
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, dst_buffer, row_pitch, slice_pitch, 0, &box);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
set_box(&box, 0, 0, 2, 128, 100, 2);
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, dst_buffer, row_pitch, slice_pitch, 0, &box);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
set_box(&box, 128, 0, 0, 129, 100, 1);
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, dst_buffer, row_pitch, slice_pitch, 0, &box);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
/* NULL box */
|
||||
hr = ID3D12Resource_WriteToSubresource(src_texture, 0, NULL, dst_buffer, row_pitch, slice_pitch);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, dst_buffer, row_pitch, slice_pitch, 0, NULL);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
/* Empty box */
|
||||
set_box(&box, 128, 100, 1, 128, 100, 1);
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, dst_buffer, row_pitch, slice_pitch, 0, &box);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
set_box(&box, 0, 0, 0, 0, 0, 0);
|
||||
hr = ID3D12Resource_WriteToSubresource(src_texture, 0, &box, dst_buffer, row_pitch, slice_pitch);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, dst_buffer, row_pitch, slice_pitch, 0, &box);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
for (y = 0; y < 100; ++y)
|
||||
{
|
||||
for (x = 0; x < 128; ++x)
|
||||
{
|
||||
ptr = &dst_buffer[y * 128 + x];
|
||||
if (x < 2 && y < 2) /* Region 1 */
|
||||
*ptr = (y + 1) << 8 | (x + 1);
|
||||
else if (2 <= x && x < 11 && 2 <= y && y < 13) /* Region 2 */
|
||||
*ptr = (y + 2) << 8 | (x + 2);
|
||||
else
|
||||
*ptr = 0xdeadbeef;
|
||||
}
|
||||
}
|
||||
|
||||
if (i)
|
||||
{
|
||||
hr = ID3D12Resource_WriteToSubresource(src_texture, 0, NULL, zero_buffer, row_pitch, slice_pitch);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
/* Write region 1 */
|
||||
set_box(&box, 0, 0, 0, 2, 2, 1);
|
||||
hr = ID3D12Resource_WriteToSubresource(src_texture, 0, &box, dst_buffer, row_pitch, slice_pitch);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
/* Write region 2 */
|
||||
set_box(&box, 2, 2, 0, 11, 13, 1);
|
||||
hr = ID3D12Resource_WriteToSubresource(src_texture, 0, &box, &dst_buffer[2 * 128 + 2],
|
||||
row_pitch, slice_pitch);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Upload the test data */
|
||||
transition_resource_state(command_list, src_texture,
|
||||
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
texture_data.pData = dst_buffer;
|
||||
texture_data.RowPitch = row_pitch;
|
||||
texture_data.SlicePitch = slice_pitch;
|
||||
upload_texture_data(src_texture, &texture_data, 1, queue, command_list);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
transition_resource_state(command_list, src_texture,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON);
|
||||
}
|
||||
|
||||
memset(dst_buffer, 0, buffer_size);
|
||||
|
||||
/* Read region 1 */
|
||||
set_box(&box, 0, 0, 0, 2, 2, 1);
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, dst_buffer, row_pitch, slice_pitch, 0, &box);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
/* Read region 2 */
|
||||
set_box(&box, 2, 2, 0, 11, 13, 1);
|
||||
hr = ID3D12Resource_ReadFromSubresource(src_texture, &dst_buffer[2 * 128 + 2], row_pitch,
|
||||
slice_pitch, 0, &box);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
||||
for (y = 0; y < 100; ++y)
|
||||
{
|
||||
for (x = 0; x < 128; ++x)
|
||||
{
|
||||
if (x < 2 && y < 2) /* Region 1 */
|
||||
expected = (y + 1) << 8 | (x + 1);
|
||||
else if (2 <= x && x < 11 && 2 <= y && y < 13) /* Region 2 */
|
||||
expected = (y + 2) << 8 | (x + 2);
|
||||
else /* Untouched */
|
||||
expected = 0;
|
||||
|
||||
got = dst_buffer[y * 128 + x];
|
||||
if (got != expected)
|
||||
break;
|
||||
}
|
||||
if (got != expected)
|
||||
break;
|
||||
}
|
||||
ok(got == expected, "Got unexpected value 0x%08x at (%u, %u), expected 0x%08x.\n", got, x, y, expected);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
/* Test layout is the same */
|
||||
dst_texture = create_default_texture2d(device, 128, 100, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
memset(dst_buffer, 0, buffer_size);
|
||||
texture_data.pData = dst_buffer;
|
||||
texture_data.RowPitch = row_pitch;
|
||||
texture_data.SlicePitch = slice_pitch;
|
||||
upload_texture_data(dst_texture, &texture_data, 1, queue, command_list);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
src_location.pResource = src_texture;
|
||||
src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
||||
src_location.SubresourceIndex = 0;
|
||||
dst_location.pResource = dst_texture;
|
||||
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
||||
dst_location.SubresourceIndex = 0;
|
||||
set_box(&box, 0, 0, 0, 128, 100, 1);
|
||||
ID3D12GraphicsCommandList_CopyTextureRegion(command_list, &dst_location, 0, 0, 0, &src_location, &box);
|
||||
|
||||
transition_resource_state(command_list, dst_texture,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_texture_readback_with_command_list(dst_texture, 0, &rb, queue, command_list);
|
||||
for (y = 0; y < 100; ++y)
|
||||
{
|
||||
for (x = 0; x < 128; ++x)
|
||||
{
|
||||
if (x < 2 && y < 2) /* Region 1 */
|
||||
expected = (y + 1) << 8 | (x + 1);
|
||||
else if (2 <= x && x < 11 && 2 <= y && y < 13) /* Region 2 */
|
||||
expected = (y + 2) << 8 | (x + 2);
|
||||
else /* Untouched */
|
||||
expected = 0;
|
||||
|
||||
got = get_readback_uint(&rb, x, y, 0);
|
||||
if (got != expected)
|
||||
break;
|
||||
}
|
||||
if (got != expected)
|
||||
break;
|
||||
}
|
||||
ok(got == expected, "Got unexpected value 0x%08x at (%u, %u), expected 0x%08x.\n", got, x, y, expected);
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Resource_Release(src_texture);
|
||||
ID3D12Resource_Release(dst_texture);
|
||||
|
||||
done:
|
||||
free(dst_buffer);
|
||||
free(zero_buffer);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_read_write_subresource(void)
|
||||
{
|
||||
D3D12_TEXTURE_COPY_LOCATION src_location, dst_location;
|
||||
|
@ -2279,7 +2638,8 @@ void test_stress_suballocation_thread(void *userdata)
|
|||
{
|
||||
/* Randomly allocate heaps and place a buffer on top of it. */
|
||||
alloc_heap = rand_r(&seed) % 2 == 0;
|
||||
alloc_size = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT * (1 + rand_r(&seed) % 20);
|
||||
/* Ensures we sometimes hit dedicated allocation paths. (2 MiB limit). */
|
||||
alloc_size = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT * (1 + rand_r(&seed) % 40);
|
||||
keep_alive = rand_r(&seed) % 2 == 0;
|
||||
|
||||
if (buffers[i] && keep_alive)
|
||||
|
@ -2425,6 +2785,101 @@ void test_stress_suballocation_thread(void *userdata)
|
|||
#undef rand_r
|
||||
}
|
||||
|
||||
void test_stress_fallback_render_target_allocation_device(void)
|
||||
{
|
||||
D3D12_RESOURCE_ALLOCATION_INFO alloc_info;
|
||||
struct test_context context;
|
||||
D3D12_HEAP_DESC heap_desc;
|
||||
D3D12_RESOURCE_DESC desc;
|
||||
ID3D12Resource *resource;
|
||||
ID3D12Heap *heaps[1024];
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
/* Spam allocate enough that we should exhaust VRAM and require fallbacks to system memory.
|
||||
* Verify that we don't collapse in such a situation.
|
||||
* Render targets hit some particular edge cases on NV that we should focus on testing. */
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.Width = 2048;
|
||||
desc.Height = 2048;
|
||||
desc.DepthOrArraySize = 1;
|
||||
desc.MipLevels = 1;
|
||||
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
desc.SampleDesc.Count = 1;
|
||||
|
||||
alloc_info = ID3D12Device_GetResourceAllocationInfo(context.device, 0, 1, &desc);
|
||||
|
||||
memset(&heap_desc, 0, sizeof(heap_desc));
|
||||
heap_desc.SizeInBytes = alloc_info.SizeInBytes;
|
||||
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
|
||||
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
memset(heaps, 0, sizeof(heaps));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(heaps); i++)
|
||||
{
|
||||
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heaps[i]);
|
||||
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
|
||||
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = ID3D12Device_CreatePlacedResource(context.device, heaps[i], 0, &desc,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, NULL, &IID_ID3D12Resource, (void**)&resource);
|
||||
ok(SUCCEEDED(hr), "Failed to place resource, hr #%x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12Resource_Release(resource);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(heaps); i++)
|
||||
if (heaps[i])
|
||||
ID3D12Heap_Release(heaps[i]);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_stress_suballocation_rebar(void)
|
||||
{
|
||||
ID3D12Resource *resources_suballocate[4096];
|
||||
ID3D12Resource *resources_direct[1024];
|
||||
struct test_context context;
|
||||
unsigned int i;
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
/* Spam allocate enough that we should either exhaust small BAR, or our budget.
|
||||
* Verify that we don't collapse in such a situation. */
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(resources_suballocate); i++)
|
||||
{
|
||||
resources_suballocate[i] = create_upload_buffer(context.device, 256 * 1024, NULL);
|
||||
ok(!!resources_suballocate[i], "Failed to create buffer.\n");
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(resources_suballocate); i++)
|
||||
if (resources_suballocate[i])
|
||||
ID3D12Resource_Release(resources_suballocate[i]);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(resources_direct); i++)
|
||||
{
|
||||
resources_direct[i] = create_upload_buffer(context.device, 2 * 1024 * 1024, NULL);
|
||||
ok(!!resources_direct[i], "Failed to create buffer.\n");
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(resources_direct); i++)
|
||||
if (resources_direct[i])
|
||||
ID3D12Resource_Release(resources_direct[i]);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_stress_suballocation(void)
|
||||
{
|
||||
struct suballocation_thread_data data;
|
||||
|
@ -2616,3 +3071,322 @@ void test_placed_image_alignment(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_map_texture_validation(void)
|
||||
{
|
||||
D3D12_RESOURCE_ALLOCATION_INFO alloc_info;
|
||||
D3D12_HEAP_PROPERTIES heap_props;
|
||||
struct test_context context;
|
||||
D3D12_HEAP_DESC heap_desc;
|
||||
bool todo_host_visible_rt;
|
||||
D3D12_RESOURCE_DESC desc;
|
||||
ID3D12Resource *resource;
|
||||
ID3D12Device *device;
|
||||
ID3D12Heap *heap;
|
||||
void *mapped_ptr;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
struct test
|
||||
{
|
||||
D3D12_HEAP_FLAGS heap_flags;
|
||||
D3D12_TEXTURE_LAYOUT layout;
|
||||
D3D12_RESOURCE_DIMENSION dimension;
|
||||
UINT mip_levels;
|
||||
UINT depth_or_array_size;
|
||||
D3D12_RESOURCE_FLAGS flags;
|
||||
D3D12_CPU_PAGE_PROPERTY page_property;
|
||||
HRESULT heap_creation_hr;
|
||||
HRESULT creation_hr;
|
||||
HRESULT map_hr_with_ppdata;
|
||||
HRESULT map_hr_without_ppdata;
|
||||
bool custom_heap;
|
||||
bool is_todo;
|
||||
};
|
||||
|
||||
/* Various weird cases all come together to make mapping ROW_MAJOR textures impossible in D3D12. */
|
||||
static const struct test tests[] =
|
||||
{
|
||||
/* MipLevel 2 not allowed. */
|
||||
{ D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER | D3D12_HEAP_FLAG_SHARED,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
2, 1, D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_BACK,
|
||||
E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, true },
|
||||
|
||||
/* LayerCount 2 not allowed. */
|
||||
{ D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER | D3D12_HEAP_FLAG_SHARED,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
1, 2, D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_BACK,
|
||||
E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, true },
|
||||
|
||||
/* Need SHARED resource flag. */
|
||||
{ D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_NONE,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_BACK,
|
||||
S_OK, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, true },
|
||||
|
||||
/* WRITE_BACK not allowed. */
|
||||
{ D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER | D3D12_HEAP_FLAG_SHARED,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_BACK,
|
||||
E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, true },
|
||||
|
||||
/* OK, but cannot map. */
|
||||
{ D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER | D3D12_HEAP_FLAG_SHARED,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER,
|
||||
D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
|
||||
S_OK, S_OK, E_INVALIDARG, E_INVALIDARG, true, true },
|
||||
|
||||
/* 1D texture not allowed. */
|
||||
{ D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER | D3D12_HEAP_FLAG_SHARED,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE1D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER,
|
||||
D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
|
||||
S_OK, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, true },
|
||||
|
||||
/* 3D texture not allowed. */
|
||||
{ D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER | D3D12_HEAP_FLAG_SHARED,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE3D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER,
|
||||
D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
|
||||
S_OK, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, true },
|
||||
|
||||
/* UPLOAD heap not allowed. */
|
||||
{ D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER | D3D12_HEAP_FLAG_SHARED,
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER,
|
||||
D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||
E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, false },
|
||||
|
||||
/* UPLOAD heap not allowed. */
|
||||
{ D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_TEXTURE_LAYOUT_UNKNOWN,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
2, 2, D3D12_RESOURCE_FLAG_NONE,
|
||||
D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||
S_OK, E_INVALIDARG, E_INVALIDARG, E_INVALIDARG, false },
|
||||
|
||||
/* Allowed, but cannot get concrete pointer.
|
||||
* TODO: 1D linear not supported in general. */
|
||||
{ D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_TEXTURE_LAYOUT_UNKNOWN,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE1D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_NONE,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
|
||||
S_OK, S_OK, E_INVALIDARG, S_OK, true, true },
|
||||
|
||||
/* Allowed, but cannot get concrete pointer. */
|
||||
{ D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_TEXTURE_LAYOUT_UNKNOWN,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
1, 1, D3D12_RESOURCE_FLAG_NONE,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
|
||||
S_OK, S_OK, E_INVALIDARG, S_OK, true, true },
|
||||
|
||||
/* Allowed, but cannot get concrete pointer.
|
||||
* TODO: Mipmapped linear not supported in general. */
|
||||
{ D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_TEXTURE_LAYOUT_UNKNOWN,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
2, 2, D3D12_RESOURCE_FLAG_NONE,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
|
||||
S_OK, S_OK, E_INVALIDARG, S_OK, true, true },
|
||||
|
||||
/* Allowed, but cannot map 3D with mip levels > 1.
|
||||
* TODO: 3D linear not supported in general. */
|
||||
{ D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_TEXTURE_LAYOUT_UNKNOWN,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE3D,
|
||||
2, 2, D3D12_RESOURCE_FLAG_NONE,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
|
||||
S_OK, S_OK, E_INVALIDARG, E_INVALIDARG, true, true },
|
||||
|
||||
/* Allowed.
|
||||
* TODO: 3D linear not supported in general. */
|
||||
{ D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_TEXTURE_LAYOUT_UNKNOWN,
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE3D,
|
||||
1, 2, D3D12_RESOURCE_FLAG_NONE,
|
||||
D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
|
||||
S_OK, S_OK, E_INVALIDARG, S_OK, true, true },
|
||||
};
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
device = context.device;
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
desc.Width = 64;
|
||||
desc.Height = 1;
|
||||
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
desc.SampleDesc.Count = 1;
|
||||
|
||||
memset(&heap_props, 0, sizeof(heap_props));
|
||||
heap_props.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
heap_props.CPUPageProperty = tests[i].page_property;
|
||||
desc.MipLevels = tests[i].mip_levels;
|
||||
desc.DepthOrArraySize = tests[i].depth_or_array_size;
|
||||
desc.Flags = tests[i].flags;
|
||||
desc.Layout = tests[i].layout;
|
||||
desc.Dimension = tests[i].dimension;
|
||||
|
||||
if (tests[i].custom_heap)
|
||||
{
|
||||
heap_props.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
}
|
||||
else
|
||||
{
|
||||
heap_props.Type = D3D12_HEAP_TYPE_UPLOAD;
|
||||
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
}
|
||||
|
||||
alloc_info = ID3D12Device_GetResourceAllocationInfo(device, 0, 1, &desc);
|
||||
|
||||
if (alloc_info.SizeInBytes != UINT64_MAX)
|
||||
{
|
||||
memset(&heap_desc, 0, sizeof(heap_desc));
|
||||
heap_desc.Properties = heap_props;
|
||||
heap_desc.Flags = tests[i].heap_flags;
|
||||
|
||||
/* According to docs (https://docs.microsoft.com/en-us/windows/win32/direct3d12/shared-heaps),
|
||||
* with SHARED_CROSS_ADAPTER, a heap must be created with ALLOW_ALL_BUFFERS_AND_TEXTURES.
|
||||
* Unsure if this particular case requires HEAP_TIER_2? */
|
||||
if (!(heap_desc.Flags & D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER))
|
||||
heap_desc.Flags |= D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES;
|
||||
heap_desc.SizeInBytes = alloc_info.SizeInBytes;
|
||||
hr = ID3D12Device_CreateHeap(device, &heap_desc, &IID_ID3D12Heap, (void**)&heap);
|
||||
|
||||
/* We cannot successfully create host visible linear RT on all implementations. */
|
||||
todo_host_visible_rt = !(heap_desc.Flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) &&
|
||||
heap_desc.Properties.CPUPageProperty != D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE;
|
||||
todo_if(tests[i].is_todo || todo_host_visible_rt)
|
||||
ok(hr == tests[i].heap_creation_hr, "Unexpected hr %#x.\n", hr);
|
||||
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = ID3D12Device_CreatePlacedResource(device, heap, 0, &desc, D3D12_RESOURCE_STATE_GENERIC_READ,
|
||||
NULL, &IID_ID3D12Resource, (void**)&resource);
|
||||
todo_if(tests[i].is_todo) ok(hr == tests[i].creation_hr, "Unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12Resource_Release(resource);
|
||||
ID3D12Heap_Release(heap);
|
||||
}
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateCommittedResource(device, &heap_props, tests[i].heap_flags,
|
||||
&desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &IID_ID3D12Resource,
|
||||
(void**)&resource);
|
||||
todo_if(tests[i].is_todo) ok(hr == tests[i].creation_hr, "Unexpected hr %#x.\n", hr);
|
||||
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = ID3D12Resource_Map(resource, 0, NULL, &mapped_ptr);
|
||||
ok(hr == tests[i].map_hr_with_ppdata, "Unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12Resource_Unmap(resource, 0, NULL);
|
||||
|
||||
hr = ID3D12Resource_Map(resource, 0, NULL, NULL);
|
||||
ok(hr == tests[i].map_hr_without_ppdata, "Unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12Resource_Unmap(resource, 0, NULL);
|
||||
ID3D12Resource_Release(resource);
|
||||
}
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_aliasing_barrier_edge_cases(void)
|
||||
{
|
||||
const FLOAT color[4] = { 0.0f, 0.0f, 1.0f, 0.0f };
|
||||
D3D12_RESOURCE_ALLOCATION_INFO alloc_info;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rtv[3];
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
D3D12_RESOURCE_BARRIER barrier;
|
||||
struct test_context_desc desc;
|
||||
ID3D12Resource *resources[3];
|
||||
struct test_context context;
|
||||
ID3D12DescriptorHeap *rtvs;
|
||||
D3D12_HEAP_DESC heap_desc;
|
||||
ID3D12Heap *heap;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_render_target = true;
|
||||
desc.no_pipeline = true;
|
||||
desc.no_root_signature = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
memset(&resource_desc, 0, sizeof(resource_desc));
|
||||
resource_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
resource_desc.Width = 2048;
|
||||
resource_desc.Height = 2048;
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
resource_desc.SampleDesc.Count = 1;
|
||||
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||
resource_desc.MipLevels = 1;
|
||||
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||
|
||||
alloc_info = ID3D12Device_GetResourceAllocationInfo(context.device, 0, 1, &resource_desc);
|
||||
|
||||
memset(&heap_desc, 0, sizeof(heap_desc));
|
||||
heap_desc.SizeInBytes = alloc_info.SizeInBytes;
|
||||
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
|
||||
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap);
|
||||
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
|
||||
|
||||
rtvs = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, ARRAY_SIZE(resources));
|
||||
for (i = 0; i < ARRAY_SIZE(resources); i++)
|
||||
{
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(rtvs);
|
||||
h.ptr += i * ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
||||
hr = ID3D12Device_CreatePlacedResource(context.device, heap, 0, &resource_desc, D3D12_RESOURCE_STATE_RENDER_TARGET,
|
||||
NULL, &IID_ID3D12Resource, (void**)&resources[i]);
|
||||
ok(SUCCEEDED(hr), "Failed to create resource, hr #%x.\n", hr);
|
||||
ID3D12Device_CreateRenderTargetView(context.device, resources[i], NULL, h);
|
||||
rtv[i] = h;
|
||||
}
|
||||
|
||||
/* D3D12 validation does not complain about any of this, and it works on native drivers, somehow ...
|
||||
* It's somewhat clear from this that aliasing barrier on its own should not modify any image layout,
|
||||
* we should only consider global memory barriers here. */
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(context.list, rtv[0], color, 0, NULL);
|
||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
|
||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
barrier.Aliasing.pResourceBefore = resources[2];
|
||||
barrier.Aliasing.pResourceAfter = resources[1];
|
||||
ID3D12GraphicsCommandList_ResourceBarrier(context.list, 1, &barrier);
|
||||
transition_resource_state(context.list, resources[0], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_uint(resources[0], 0, context.queue, context.list, 0x00ff0000, 0);
|
||||
|
||||
ID3D12DescriptorHeap_Release(rtvs);
|
||||
for (i = 0; i < ARRAY_SIZE(resources); i++)
|
||||
ID3D12Resource_Release(resources[i]);
|
||||
ID3D12Heap_Release(heap);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,582 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "d3d12_crosstest.h"
|
||||
|
||||
void test_buffers_oob_behavior_vectorized_byte_address(void)
|
||||
{
|
||||
/* Vectorized structured buffers are handled by other tests, but
|
||||
* vectorized byte address buffers are particularly
|
||||
* weird due to component based robustness.
|
||||
* Intended to trip vectorized load-store optimizations in dxil-spirv. */
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
D3D12_FEATURE_DATA_SHADER_MODEL shader_model;
|
||||
D3D12_DESCRIPTOR_RANGE descriptor_ranges[1];
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS4 options4;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_ROOT_PARAMETER root_parameters[1];
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle;
|
||||
ID3D12PipelineState *write_pso_32bit;
|
||||
ID3D12PipelineState *write_pso_16bit;
|
||||
ID3D12PipelineState *read_pso_32bit;
|
||||
ID3D12PipelineState *read_pso_16bit;
|
||||
unsigned int descriptor_size, i, j;
|
||||
ID3D12Resource *read_output_buffer;
|
||||
ID3D12DescriptorHeap *gpu_heap;
|
||||
ID3D12DescriptorHeap *cpu_heap;
|
||||
ID3D12Resource *output_buffer;
|
||||
struct resource_readback rb;
|
||||
struct test_context context;
|
||||
ID3D12CommandQueue *queue;
|
||||
HRESULT hr;
|
||||
|
||||
static const BYTE cs_code_write_32bit_dxil[] =
|
||||
{
|
||||
#if 0
|
||||
RWByteAddressBuffer WriteUint1 : register(u2);
|
||||
RWByteAddressBuffer WriteUint2 : register(u3);
|
||||
RWByteAddressBuffer WriteUint3 : register(u4);
|
||||
RWByteAddressBuffer WriteUint4 : register(u5);
|
||||
|
||||
[numthreads(64, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
WriteUint1.Store<uint>(4 * thr, thr);
|
||||
WriteUint2.Store<uint2>(8 * thr, 2 * thr + uint2(0, 1));
|
||||
WriteUint3.Store<uint3>(12 * thr, 3 * thr + uint3(0, 1, 2));
|
||||
WriteUint4.Store<uint4>(16 * thr, 4 * thr + uint4(0, 1, 2, 3));
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0x43, 0x0a, 0xd2, 0xf1, 0x15, 0xa8, 0xa9, 0xb3, 0x2e, 0xfb, 0x1b, 0x25, 0xf9, 0xe5, 0xe7, 0x8a, 0x01, 0x00, 0x00, 0x00, 0xc0, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xc6, 0x67, 0xdd, 0x93, 0xf1, 0x52, 0x56, 0xbf, 0xa6, 0x3e, 0xf7, 0xfa, 0xa4, 0x05, 0x9f, 0x04, 0x44, 0x58, 0x49, 0x4c, 0x84, 0x05, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00,
|
||||
0x61, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x6c, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x58, 0x01, 0x00, 0x00,
|
||||
0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
|
||||
0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5,
|
||||
0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00,
|
||||
0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09,
|
||||
0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x48, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6,
|
||||
0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0xc1, 0x29, 0x0b, 0x18, 0x68, 0x8c, 0x31, 0xc6,
|
||||
0x30, 0x83, 0xd2, 0x1c, 0x41, 0x50, 0x0c, 0x34, 0xcc, 0x18, 0x8b, 0xd8, 0x40, 0xc0, 0x69, 0xd2, 0x14, 0x51, 0xc2, 0xe4, 0xaf, 0xf0, 0x86, 0x4d, 0x84, 0x36, 0x0c, 0x11, 0x21, 0x49, 0x1b, 0x55,
|
||||
0x14, 0x44, 0x84, 0x82, 0x41, 0x6f, 0x8e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50,
|
||||
0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
|
||||
0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07,
|
||||
0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0,
|
||||
0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x12, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x30,
|
||||
0x40, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x05, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
|
||||
0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5, 0x30, 0x02, 0x50, 0x16, 0x65, 0x50, 0x08, 0xa5, 0x50, 0x80, 0x01, 0xb4, 0x46, 0x00, 0x28, 0x16, 0x08, 0xc1, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
|
||||
0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
|
||||
0x81, 0x98, 0x20, 0x0c, 0xc5, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xc3, 0x31, 0x41, 0x80, 0x1e, 0x02, 0x13, 0x84,
|
||||
0x01, 0x99, 0x20, 0x2c, 0xcd, 0x86, 0x45, 0x59, 0x18, 0x85, 0x18, 0x1a, 0xc7, 0x71, 0x80, 0x09, 0xc2, 0x90, 0x6c, 0x58, 0x86, 0x85, 0x51, 0xa0, 0xa1, 0x71, 0x1c, 0x07, 0x98, 0x20, 0x0c, 0xca,
|
||||
0x86, 0x85, 0x58, 0x18, 0x45, 0x1a, 0x1a, 0xc7, 0x71, 0x80, 0x09, 0xc2, 0xb0, 0x6c, 0x58, 0xa0, 0x85, 0x51, 0xa8, 0xa1, 0x71, 0x1c, 0x07, 0xd8, 0x40, 0x3c, 0xd1, 0x54, 0x6d, 0x20, 0x00, 0x0b,
|
||||
0x00, 0x26, 0x08, 0x02, 0x40, 0xa2, 0x2d, 0x2c, 0xcd, 0x6d, 0x82, 0x10, 0x39, 0x13, 0x84, 0x81, 0xd9, 0x30, 0x6c, 0xc3, 0xb0, 0x81, 0x50, 0x34, 0x89, 0xdb, 0x50, 0x60, 0x19, 0x70, 0x75, 0x55,
|
||||
0xd8, 0xd8, 0xec, 0xda, 0x5c, 0xd2, 0xc8, 0xca, 0xdc, 0xe8, 0xa6, 0x04, 0x41, 0x15, 0x32, 0x3c, 0x17, 0xbb, 0x32, 0xb9, 0xb9, 0xb4, 0x37, 0xb7, 0x29, 0x01, 0xd1, 0x84, 0x0c, 0xcf, 0xc5, 0x2e,
|
||||
0x8c, 0xcd, 0xae, 0x4c, 0x6e, 0x4a, 0x60, 0xd4, 0x21, 0xc3, 0x73, 0x99, 0x43, 0x0b, 0x23, 0x2b, 0x93, 0x6b, 0x7a, 0x23, 0x2b, 0x63, 0x9b, 0x12, 0x20, 0x65, 0xc8, 0xf0, 0x5c, 0xe4, 0xca, 0xe6,
|
||||
0xde, 0xea, 0xe4, 0xc6, 0xca, 0xe6, 0xa6, 0x04, 0x56, 0x1d, 0x32, 0x3c, 0x97, 0x32, 0x37, 0x3a, 0xb9, 0x3c, 0xa8, 0xb7, 0x34, 0x37, 0xba, 0xb9, 0x29, 0x41, 0x07, 0x00, 0x79, 0x18, 0x00, 0x00,
|
||||
0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
|
||||
0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
|
||||
0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
|
||||
0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
|
||||
0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
|
||||
0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
|
||||
0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
|
||||
0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
|
||||
0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
|
||||
0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x36, 0x20, 0x0d, 0x97,
|
||||
0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x09, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb,
|
||||
0x16, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44, 0x04, 0x30, 0x88, 0x83, 0x8f, 0xdc, 0xb6, 0x01, 0x10, 0x0c, 0x80, 0x34, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00,
|
||||
0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x4a, 0xae, 0x30, 0x05, 0x4a, 0x37, 0xa0, 0x30, 0xc8, 0x94, 0x40, 0x19, 0x94, 0x43, 0x79, 0x00, 0x00,
|
||||
0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xc0, 0x58, 0x08, 0x01, 0x39, 0xcc, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x30, 0x57, 0x52, 0x54, 0x50, 0x33, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x0c, 0xa6,
|
||||
0x18, 0x97, 0xe4, 0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x01, 0x93, 0x2d, 0x47, 0x75, 0x3d, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0x70, 0x8a, 0x55, 0x41, 0x76, 0x23, 0x06, 0x0e, 0x00, 0x82,
|
||||
0x60, 0x90, 0x74, 0xcd, 0x10, 0x3c, 0xc2, 0xf3, 0x3c, 0x4a, 0x55, 0xc2, 0x76, 0x15, 0x70, 0x5b, 0x04, 0x76, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0x90, 0x80, 0x01, 0x74, 0x04, 0xd2, 0x20, 0x48,
|
||||
0x12, 0x83, 0x55, 0x91, 0x49, 0x05, 0x1f, 0x94, 0xf0, 0x41, 0x21, 0x91, 0x8c, 0x18, 0x38, 0x00, 0x08, 0x82, 0x41, 0x42, 0x06, 0x14, 0x13, 0x58, 0xc4, 0x20, 0x58, 0x0f, 0x57, 0x88, 0x18, 0x6c,
|
||||
0x25, 0x62, 0xb0, 0xa5, 0x7c, 0x5b, 0xcc, 0x77, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0x90, 0xa0, 0x01, 0x16, 0x05, 0x1a, 0x43, 0x0c, 0x82, 0x04, 0x06, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const BYTE cs_code_write_16bit_dxil[] =
|
||||
{
|
||||
#if 0
|
||||
RWByteAddressBuffer WriteShort1 : register(u6);
|
||||
RWByteAddressBuffer WriteShort2 : register(u7);
|
||||
RWByteAddressBuffer WriteShort3 : register(u8);
|
||||
RWByteAddressBuffer WriteShort4 : register(u9);
|
||||
|
||||
[numthreads(64, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
WriteShort1.Store<uint16_t>(2 * thr, uint16_t(thr));
|
||||
WriteShort2.Store<uint16_t2>(4 * thr, uint16_t(2 * thr) + uint16_t2(0, 1));
|
||||
WriteShort3.Store<uint16_t3>(6 * thr, uint16_t(3 * thr) + uint16_t3(0, 1, 2));
|
||||
WriteShort4.Store<uint16_t4>(8 * thr, uint16_t(4 * thr) + uint16_t4(0, 1, 2, 3));
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0xbc, 0xd0, 0x15, 0xdd, 0x79, 0x7c, 0x27, 0xda, 0x33, 0x83, 0x7c, 0x3f, 0x0d, 0xe3, 0x79, 0xd8, 0x01, 0x00, 0x00, 0x00, 0xe0, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x4f, 0xdb, 0x99, 0x0d, 0xe0, 0x00, 0x33, 0x9f, 0xa8, 0xf9, 0xe2, 0xb1, 0xe2, 0x5f, 0xc4, 0x44, 0x58, 0x49, 0x4c, 0xa4, 0x05, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00,
|
||||
0x69, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8c, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00,
|
||||
0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
|
||||
0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5,
|
||||
0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00,
|
||||
0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09,
|
||||
0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x4c, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6,
|
||||
0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0xc1, 0x99, 0x23, 0x80, 0xca, 0x02, 0x06, 0x1a,
|
||||
0x23, 0xa5, 0x94, 0xcc, 0x20, 0x35, 0x47, 0x10, 0x14, 0x03, 0x0d, 0x33, 0x06, 0xa3, 0x36, 0x10, 0x70, 0x9a, 0x34, 0x45, 0x94, 0x30, 0xf9, 0x2b, 0xbc, 0x61, 0x13, 0xa1, 0x0d, 0x43, 0x44, 0x48,
|
||||
0xd2, 0x46, 0x15, 0x05, 0x11, 0xa1, 0x60, 0x10, 0x9c, 0x23, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
|
||||
0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
|
||||
0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60,
|
||||
0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76,
|
||||
0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x47, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
|
||||
0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x20, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
|
||||
0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5, 0x30, 0x02, 0x50, 0x16, 0xe5, 0x50, 0x10, 0x65, 0x50, 0x12, 0x85, 0x50, 0x80, 0x01, 0xc4, 0x46, 0x00, 0x48, 0x16, 0x38, 0x20, 0x20, 0x02, 0xc5,
|
||||
0x19, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
|
||||
0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
|
||||
0x10, 0x04, 0x13, 0x84, 0x81, 0x98, 0x20, 0x0c, 0xc5, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xc3, 0x31, 0x41, 0x88,
|
||||
0x22, 0x02, 0x13, 0x84, 0x01, 0x99, 0x20, 0x30, 0xcf, 0x86, 0x45, 0x59, 0x18, 0xc5, 0x18, 0x1a, 0xc7, 0x71, 0x80, 0x09, 0xc2, 0x90, 0x6c, 0x58, 0x86, 0x85, 0x51, 0xa0, 0xa1, 0x71, 0x1c, 0x07,
|
||||
0x98, 0x20, 0x0c, 0xca, 0x86, 0x85, 0x58, 0x18, 0x45, 0x1a, 0x1a, 0xc7, 0x71, 0x80, 0x09, 0xc2, 0xb0, 0x4c, 0x10, 0x06, 0x66, 0xc3, 0x42, 0x2d, 0x8c, 0x52, 0x0d, 0x8d, 0xe3, 0x38, 0xc0, 0x06,
|
||||
0xe2, 0x89, 0x26, 0x6b, 0x03, 0x01, 0x5c, 0x00, 0x30, 0x41, 0x10, 0x00, 0x12, 0x6d, 0x61, 0x69, 0x6e, 0x13, 0x04, 0x09, 0x9a, 0x20, 0x0c, 0xcd, 0x04, 0x61, 0x70, 0x36, 0x0c, 0xdd, 0x30, 0x6c,
|
||||
0x20, 0x94, 0x8d, 0xf3, 0x36, 0x14, 0x99, 0x06, 0x60, 0x5f, 0x15, 0x36, 0x36, 0xbb, 0x36, 0x97, 0x34, 0xb2, 0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85, 0x0c, 0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e,
|
||||
0xed, 0xcd, 0x6d, 0x4a, 0x40, 0x34, 0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b, 0x12, 0x18, 0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2, 0xc8, 0xca, 0xe4, 0x9a, 0xde, 0xc8, 0xca,
|
||||
0xd8, 0xa6, 0x04, 0x48, 0x19, 0x32, 0x3c, 0x17, 0xb9, 0xb2, 0xb9, 0xb7, 0x3a, 0xb9, 0xb1, 0xb2, 0xb9, 0x29, 0xc1, 0x55, 0x87, 0x0c, 0xcf, 0xa5, 0xcc, 0x8d, 0x4e, 0x2e, 0x0f, 0xea, 0x2d, 0xcd,
|
||||
0x8d, 0x6e, 0x6e, 0x4a, 0xf0, 0x01, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
|
||||
0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
|
||||
0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
|
||||
0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
|
||||
0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
|
||||
0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
|
||||
0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
|
||||
0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
|
||||
0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
|
||||
0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00,
|
||||
0x71, 0x20, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x09, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
|
||||
0xd6, 0x46, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x52, 0xeb, 0x16, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44, 0x04, 0x30, 0x88, 0x83, 0x8f, 0xdc, 0xb6, 0x01, 0x10, 0x0c, 0x80,
|
||||
0x34, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x34, 0x4a, 0xae, 0x30, 0x05, 0x66, 0x00, 0x4a,
|
||||
0x37, 0x80, 0x4c, 0x09, 0x94, 0x41, 0x39, 0x94, 0x07, 0xa5, 0x19, 0x80, 0x12, 0x28, 0x82, 0x32, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xd0, 0x6c, 0x0c, 0x32, 0x49, 0xcf,
|
||||
0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x34, 0x5c, 0x93, 0x68, 0x15, 0x34, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x4d, 0xe7, 0x28, 0xdc, 0x15, 0x8d, 0x18, 0x24, 0x00, 0x08, 0x82, 0x41, 0xe3, 0x3d,
|
||||
0x8b, 0xb6, 0x49, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0x84, 0x41, 0xb3, 0x5d, 0x90, 0xa0, 0x04, 0x30, 0xb8, 0x11, 0x03, 0x07, 0x00, 0x41, 0x30, 0x50, 0xc6, 0x40, 0x22, 0x82, 0x48, 0x58,
|
||||
0x96, 0xe5, 0x01, 0x83, 0x0b, 0x12, 0x54, 0xb0, 0x6c, 0x15, 0x62, 0x70, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0xa0, 0x98, 0x41, 0x85, 0x04, 0xd4, 0x20, 0x38, 0x4e, 0x34, 0x06, 0x65, 0x74, 0x72,
|
||||
0x41, 0x82, 0x0a, 0x1e, 0x28, 0xe1, 0x81, 0x52, 0xcc, 0x40, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0x40, 0x59, 0x03, 0xcd, 0x09, 0x32, 0x62, 0x10, 0xa6, 0x0a, 0x0d, 0xce, 0x48, 0x50, 0xc1, 0xb4,
|
||||
0x25, 0x4c, 0x5b, 0xc3, 0xb4, 0xf5, 0x98, 0xc1, 0x8d, 0x18, 0x38, 0x00, 0x08, 0x82, 0x81, 0x02, 0x07, 0x1f, 0x15, 0x78, 0x05, 0x31, 0x08, 0x59, 0x1b, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const BYTE cs_code_read_32bit_dxil[] =
|
||||
{
|
||||
#if 0
|
||||
RWByteAddressBuffer Writeback : register(u1);
|
||||
|
||||
RWByteAddressBuffer WriteUint1 : register(u2);
|
||||
RWByteAddressBuffer WriteUint2 : register(u3);
|
||||
RWByteAddressBuffer WriteUint3 : register(u4);
|
||||
RWByteAddressBuffer WriteUint4 : register(u5);
|
||||
|
||||
[numthreads(64, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
if (4 * thr < 48)
|
||||
Writeback.Store<uint>(0 + 4 * thr, WriteUint1.Load<uint>(4 * thr));
|
||||
|
||||
if (8 * thr < 48)
|
||||
Writeback.Store<uint2>(1 * 64 + 8 * thr, WriteUint2.Load<uint2>(8 * thr));
|
||||
|
||||
if (12 * thr < 48)
|
||||
Writeback.Store<uint3>(2 * 64 + 12 * thr, WriteUint3.Load<uint3>(12 * thr));
|
||||
|
||||
if (16 * thr < 48)
|
||||
Writeback.Store<uint4>(3 * 64 + 16 * thr, WriteUint4.Load<uint4>(16 * thr));
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0x62, 0x2a, 0x23, 0x51, 0xa0, 0x59, 0x3e, 0xe5, 0x3a, 0xe5, 0xa5, 0x07, 0x0a, 0xd8, 0x70, 0x5b, 0x01, 0x00, 0x00, 0x00, 0xd0, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x4c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0xc0, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x3d, 0x9e, 0xa8,
|
||||
0x3d, 0x43, 0x9f, 0x42, 0x07, 0xbb, 0x05, 0xf3, 0x56, 0x69, 0x52, 0x14, 0x44, 0x58, 0x49, 0x4c, 0x7c, 0x06, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0x9f, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x64, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x96, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02,
|
||||
0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90,
|
||||
0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
|
||||
0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84,
|
||||
0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x58, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73,
|
||||
0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0xc1, 0x29, 0x0b, 0x18, 0x68, 0x8c, 0x31, 0xc6, 0x30, 0x83, 0xd2, 0x4d, 0xc3, 0xe5, 0x4f, 0xd8,
|
||||
0x43, 0x48, 0xfe, 0x4a, 0x48, 0x2b, 0x31, 0xf9, 0xc8, 0x6d, 0xa3, 0x62, 0x8c, 0x31, 0x46, 0x39, 0xd6, 0x40, 0x63, 0x98, 0x41, 0x6c, 0x8e, 0x20, 0x28, 0x06, 0x1a, 0x66, 0x0c, 0x47, 0x6f, 0x20,
|
||||
0xe0, 0x34, 0x69, 0x8a, 0x28, 0x61, 0xf2, 0x57, 0x78, 0xc3, 0x26, 0x42, 0x1b, 0x86, 0x88, 0x90, 0xa4, 0x8d, 0x2a, 0x0a, 0x22, 0x42, 0xc1, 0x20, 0x39, 0x47, 0x00, 0x0a, 0x53, 0x00, 0x00, 0x00,
|
||||
0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30,
|
||||
0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a,
|
||||
0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07,
|
||||
0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60,
|
||||
0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x12, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x30, 0x40, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4,
|
||||
0x79, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x0b, 0x04, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
|
||||
0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5, 0x30, 0x02, 0x50, 0x16, 0x65, 0x50, 0x08, 0xa5, 0x50, 0x80, 0x01, 0xe4, 0x46, 0x00, 0x88, 0x16, 0x08, 0xcd, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
|
||||
0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
|
||||
0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x32, 0x41, 0x98, 0x20, 0x02, 0x13, 0x84,
|
||||
0x21, 0x99, 0x20, 0x38, 0xce, 0x86, 0x45, 0x59, 0x18, 0x65, 0x18, 0x1a, 0xc7, 0x71, 0x80, 0x0d, 0xcb, 0xb0, 0x30, 0x0a, 0x31, 0x34, 0x8e, 0xe3, 0x00, 0x13, 0x84, 0x41, 0xd9, 0xb0, 0x10, 0x0b,
|
||||
0xa3, 0x44, 0x43, 0xe3, 0x38, 0x0e, 0x30, 0x41, 0x18, 0x96, 0x0d, 0x4b, 0xb4, 0x30, 0xca, 0x34, 0x34, 0x8e, 0xe3, 0x00, 0x13, 0x84, 0x81, 0xd9, 0xb0, 0x4c, 0x0b, 0xa3, 0x54, 0x43, 0xe3, 0x38,
|
||||
0x0e, 0xb0, 0xa1, 0x78, 0x20, 0x89, 0xb2, 0x36, 0x10, 0xc0, 0x05, 0x00, 0x13, 0x04, 0x01, 0x20, 0xd1, 0x16, 0x96, 0xe6, 0x36, 0x41, 0xa0, 0x9e, 0x09, 0xc2, 0xd0, 0x6c, 0x18, 0xb8, 0x61, 0xd8,
|
||||
0x40, 0x28, 0xdb, 0xd4, 0x6d, 0x28, 0x32, 0x0d, 0xc0, 0xbc, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99, 0xdc, 0x5c,
|
||||
0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35, 0xbd, 0x91, 0x95,
|
||||
0xb1, 0x4d, 0x09, 0x90, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53, 0x82, 0xab, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e, 0xd4, 0x5b, 0x9a,
|
||||
0x1b, 0xdd, 0xdc, 0x94, 0xc0, 0x03, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
|
||||
0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
|
||||
0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
|
||||
0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
|
||||
0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
|
||||
0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
|
||||
0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
|
||||
0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
|
||||
0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
|
||||
0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81,
|
||||
0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x20, 0x0d, 0x97,
|
||||
0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x0d, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0x9b,
|
||||
0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb7, 0x6d, 0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38,
|
||||
0xf8, 0xc8, 0x6d, 0x1b, 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x13, 0x04, 0x49, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x34, 0x66, 0x00, 0x4a, 0xae, 0xc0, 0xca, 0x52, 0xa0, 0x30, 0x05, 0x4a, 0x37, 0xa0, 0x30, 0x0a, 0x50, 0xa0, 0x00, 0x07, 0xc8, 0x94, 0x40, 0x19, 0x94, 0x43, 0x79, 0x00, 0x23, 0x06, 0x09, 0x00,
|
||||
0x82, 0x60, 0xf0, 0x68, 0x0c, 0x31, 0x49, 0xd0, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x5b, 0x53, 0x54, 0x54, 0x34, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x0f, 0xe7, 0x18, 0x9a, 0x25, 0x8d,
|
||||
0x18, 0x24, 0x00, 0x08, 0x82, 0xc1, 0xd3, 0x3d, 0x07, 0xb7, 0x4d, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xf0, 0x78, 0x10, 0xa2, 0x75, 0xd4, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x10, 0x62, 0xd0,
|
||||
0x6c, 0x15, 0x78, 0x37, 0xdc, 0x10, 0x48, 0x64, 0x30, 0xcb, 0x10, 0x08, 0xc1, 0x88, 0x81, 0x02, 0x80, 0x20, 0x18, 0x30, 0x63, 0x20, 0x15, 0x42, 0xc5, 0x6c, 0xa3, 0x09, 0x01, 0x30, 0x62, 0xe0,
|
||||
0x00, 0x20, 0x08, 0x06, 0x89, 0x19, 0x4c, 0x06, 0x71, 0x05, 0xd7, 0x75, 0x39, 0xdd, 0x2c, 0x81, 0x50, 0x85, 0x77, 0xc3, 0x0d, 0x81, 0x45, 0x06, 0xb3, 0x0c, 0x03, 0x11, 0x8c, 0x18, 0x28, 0x00,
|
||||
0x08, 0x82, 0x01, 0x73, 0x06, 0x96, 0x22, 0x64, 0xcf, 0x37, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x50, 0x05, 0x18, 0xc0, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x24, 0x6c, 0x90, 0x31, 0x41, 0x37,
|
||||
0x08, 0x5d, 0x37, 0x8d, 0xc1, 0x2c, 0x01, 0x51, 0xcb, 0x25, 0xc3, 0x0d, 0x01, 0x47, 0x06, 0xb3, 0x0c, 0x85, 0x11, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x01, 0xd3, 0x06, 0x5c, 0x24, 0x7c, 0x54,
|
||||
0x19, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0x42, 0x19, 0x1c, 0x8c, 0x18, 0x38, 0x00, 0x08, 0x82, 0x41, 0x32, 0x07, 0x60, 0x30, 0x05, 0x64, 0x40, 0x0c, 0x02, 0x19, 0x64,
|
||||
0x6a, 0x30, 0x4b, 0x60, 0x94, 0xa4, 0x06, 0x37, 0xdc, 0x10, 0x8c, 0x01, 0x19, 0xcc, 0x32, 0x1c, 0x48, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06, 0x0c, 0x1d, 0x8c, 0x41, 0x26, 0x98, 0x81, 0xc6,
|
||||
0x06, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x75, 0x8c, 0x01, 0x8c, 0x18, 0x38, 0x00, 0x08, 0x82, 0x41, 0xb2, 0x07, 0x68, 0xb0, 0x05, 0x6c, 0x50,
|
||||
0x10, 0x83, 0x00, 0x06, 0x72, 0x30, 0x4b, 0x80, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const BYTE cs_code_read_16bit_dxil[] =
|
||||
{
|
||||
#if 0
|
||||
RWByteAddressBuffer Writeback : register(u1);
|
||||
|
||||
RWByteAddressBuffer WriteShort1 : register(u6);
|
||||
RWByteAddressBuffer WriteShort2 : register(u7);
|
||||
RWByteAddressBuffer WriteShort3 : register(u8);
|
||||
RWByteAddressBuffer WriteShort4 : register(u9);
|
||||
|
||||
[numthreads(64, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
if (2 * thr < 48)
|
||||
Writeback.Store<uint16_t>(4 * 64 + 2 * thr, WriteShort1.Load<uint16_t>(2 * thr));
|
||||
|
||||
if (4 * thr < 48)
|
||||
Writeback.Store<uint16_t2>(5 * 64 + 4 * thr, WriteShort2.Load<uint16_t2>(4 * thr));
|
||||
|
||||
if (6 * thr < 48)
|
||||
Writeback.Store<uint16_t3>(6 * 64 + 6 * thr, WriteShort3.Load<uint16_t3>(6 * thr));
|
||||
|
||||
if (8 * thr < 48)
|
||||
Writeback.Store<uint16_t4>(7 * 64 + 8 * thr, WriteShort4.Load<uint16_t4>(8 * thr));
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0xbf, 0x17, 0x94, 0xb9, 0x47, 0xff, 0x95, 0x04, 0x21, 0xfa, 0x62, 0x3b, 0x15, 0x06, 0xbc, 0xb4, 0x01, 0x00, 0x00, 0x00, 0xec, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x4c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0xc0, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x37, 0x64, 0x27,
|
||||
0x3d, 0x42, 0x0c, 0x2b, 0x23, 0x22, 0xeb, 0xd9, 0x4c, 0x9e, 0x37, 0x92, 0x44, 0x58, 0x49, 0x4c, 0x98, 0x06, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xa6, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x80, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x9d, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02,
|
||||
0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90,
|
||||
0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
|
||||
0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84,
|
||||
0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x5c, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73,
|
||||
0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0xc1, 0x99, 0x23, 0x80, 0xca, 0x02, 0x06, 0x1a, 0x23, 0xa5, 0x94, 0xcc, 0x20, 0x75, 0xd3, 0x70,
|
||||
0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12, 0xd2, 0x4a, 0x4c, 0x3e, 0x52, 0xeb, 0xa8, 0x48, 0x29, 0xa5, 0x51, 0x0e, 0x36, 0xd0, 0x18, 0x66, 0x50, 0x9b, 0x23, 0x08, 0x8a, 0x81, 0x86, 0x19, 0xe3,
|
||||
0x11, 0x1c, 0x08, 0x38, 0x4d, 0x9a, 0x22, 0x4a, 0x98, 0xfc, 0x15, 0xde, 0xb0, 0x89, 0xd0, 0x86, 0x21, 0x22, 0x24, 0x69, 0xa3, 0x8a, 0x82, 0x88, 0x50, 0x30, 0x68, 0xce, 0x11, 0x80, 0xc2, 0x14,
|
||||
0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
|
||||
0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
|
||||
0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
|
||||
0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
|
||||
0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x47, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
|
||||
0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5, 0x30, 0x02, 0x50, 0x16, 0xe5, 0x50, 0x06, 0x05, 0x51, 0x08, 0x25, 0x51, 0x80, 0x01, 0xf4, 0x46, 0x00, 0xa8, 0x16, 0x38, 0x20, 0x20, 0x02, 0xd1,
|
||||
0x19, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b,
|
||||
0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9,
|
||||
0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x32, 0x41, 0xa0,
|
||||
0x24, 0x02, 0x13, 0x84, 0x21, 0x99, 0x20, 0x3c, 0xd0, 0x86, 0x45, 0x59, 0x18, 0x65, 0x18, 0x1a, 0xc7, 0x71, 0x80, 0x0d, 0xcb, 0xb0, 0x30, 0x8a, 0x31, 0x34, 0x8e, 0xe3, 0x00, 0x13, 0x84, 0x41,
|
||||
0xd9, 0xb0, 0x10, 0x0b, 0xa3, 0x44, 0x43, 0xe3, 0x38, 0x0e, 0x30, 0x41, 0x18, 0x96, 0x09, 0xc2, 0xc0, 0x6c, 0x58, 0xa6, 0x85, 0x51, 0xa8, 0xa1, 0x71, 0x1c, 0x07, 0x98, 0x20, 0x0c, 0xcd, 0x04,
|
||||
0x61, 0x70, 0x36, 0x2c, 0xd6, 0xc2, 0x28, 0xd7, 0xd0, 0x38, 0x8e, 0x03, 0x6c, 0x28, 0x1e, 0x48, 0xaa, 0xb0, 0x0d, 0x04, 0x90, 0x01, 0xc0, 0x04, 0x41, 0x00, 0x48, 0xb4, 0x85, 0xa5, 0xb9, 0x4d,
|
||||
0x10, 0xaa, 0x68, 0x82, 0x30, 0x3c, 0x1b, 0x06, 0x6f, 0x18, 0x36, 0x10, 0x4a, 0x67, 0x7d, 0x1b, 0x8a, 0x8d, 0x03, 0x34, 0x30, 0xa8, 0xc2, 0xc6, 0x66, 0xd7, 0xe6, 0x92, 0x46, 0x56, 0xe6, 0x46,
|
||||
0x37, 0x25, 0x08, 0xaa, 0x90, 0xe1, 0xb9, 0xd8, 0x95, 0xc9, 0xcd, 0xa5, 0xbd, 0xb9, 0x4d, 0x09, 0x88, 0x26, 0x64, 0x78, 0x2e, 0x76, 0x61, 0x6c, 0x76, 0x65, 0x72, 0x53, 0x02, 0xa3, 0x0e, 0x19,
|
||||
0x9e, 0xcb, 0x1c, 0x5a, 0x18, 0x59, 0x99, 0x5c, 0xd3, 0x1b, 0x59, 0x19, 0xdb, 0x94, 0x00, 0x29, 0x43, 0x86, 0xe7, 0x22, 0x57, 0x36, 0xf7, 0x56, 0x27, 0x37, 0x56, 0x36, 0x37, 0x25, 0xc8, 0xea,
|
||||
0x90, 0xe1, 0xb9, 0x94, 0xb9, 0xd1, 0xc9, 0xe5, 0x41, 0xbd, 0xa5, 0xb9, 0xd1, 0xcd, 0x4d, 0x09, 0xc0, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c,
|
||||
0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
|
||||
0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
|
||||
0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e,
|
||||
0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
|
||||
0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
|
||||
0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5,
|
||||
0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
|
||||
0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
|
||||
0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2,
|
||||
0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00,
|
||||
0x71, 0x20, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x0d, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01,
|
||||
0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0x48, 0xad, 0x9b, 0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb5, 0x6e,
|
||||
0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x1b, 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x00,
|
||||
0x13, 0x04, 0x49, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x4a, 0xae, 0xc0, 0xca, 0x52, 0xa0, 0x30, 0x05, 0x4a, 0x37, 0xa0, 0x00, 0x09, 0x0a, 0xb0, 0xa0, 0x00,
|
||||
0x0d, 0x0a, 0xf0, 0x80, 0x4c, 0x09, 0x94, 0x41, 0x39, 0x94, 0x07, 0xa5, 0x19, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x00, 0x79, 0x4e, 0x51, 0x51, 0xd2, 0x88, 0x41, 0x02, 0x80,
|
||||
0x20, 0x18, 0x40, 0xdf, 0x63, 0x60, 0xd7, 0x34, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x10, 0x18, 0x40, 0x87, 0xa7, 0x51, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x00, 0x85, 0x41, 0x84, 0x80, 0x81,
|
||||
0x57, 0x8d, 0x18, 0x24, 0x00, 0x08, 0x82, 0x01, 0x24, 0x06, 0x52, 0xe2, 0x85, 0x81, 0x35, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x84, 0x19, 0x3c, 0x5f, 0x05, 0x63, 0x70, 0xc3, 0x0d, 0x01, 0x45,
|
||||
0x06, 0xb3, 0x0c, 0x81, 0x10, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x41, 0x73, 0x06, 0x54, 0x21, 0x5c, 0x0d, 0x19, 0x8c, 0x26, 0x04, 0x40, 0x11, 0x13, 0x8c, 0x18, 0x38, 0x00, 0x08, 0x82, 0x81,
|
||||
0xb2, 0x06, 0xd6, 0x11, 0x68, 0x02, 0xc3, 0x30, 0xd0, 0x19, 0xcc, 0x12, 0x08, 0x65, 0x9c, 0xc1, 0x0d, 0x37, 0x04, 0x19, 0x19, 0xcc, 0x32, 0x0c, 0x44, 0x30, 0x62, 0xa0, 0x00, 0x20, 0x08, 0x06,
|
||||
0x0d, 0x1b, 0x64, 0x8b, 0xc0, 0x45, 0x69, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x50, 0x05, 0x06, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0xa0, 0xc4, 0x01, 0xd7, 0x04, 0x60, 0x30, 0x08, 0x92,
|
||||
0x54, 0xb5, 0xc1, 0x2c, 0x01, 0x51, 0x0c, 0x1b, 0xc8, 0x70, 0x43, 0xf0, 0x91, 0xc1, 0x2c, 0x43, 0x61, 0x04, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xd0, 0xc8, 0xc1, 0x27, 0x09, 0x62, 0x60, 0xbd,
|
||||
0xc1, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x94, 0xe1, 0xc1, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x28, 0x78, 0x30, 0x06, 0x54, 0x70, 0x06, 0xc4, 0x20, 0x64, 0x1b, 0x1d,
|
||||
0xcc, 0x12, 0x18, 0x35, 0xbd, 0xc1, 0x0d, 0x37, 0x04, 0x66, 0x40, 0x06, 0xb3, 0x0c, 0x07, 0x12, 0x8c, 0x18, 0x28, 0x00, 0x08, 0x82, 0x41, 0x93, 0x07, 0x66, 0xa0, 0x09, 0x69, 0xc0, 0xd9, 0xc1,
|
||||
0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0x43, 0x1d, 0x65, 0x00, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0xa0, 0x80, 0xc2, 0x1a, 0x70, 0xc1, 0x1b, 0x14, 0xc4,
|
||||
0x20, 0x88, 0x01, 0x1f, 0xcc, 0x12, 0x20, 0x08, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
shader_model.HighestShaderModel = D3D_SHADER_MODEL_6_2;
|
||||
hr = ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model));
|
||||
if (FAILED(hr) || shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_2)
|
||||
{
|
||||
skip("Shader model 6.2 not supported.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&options4, 0, sizeof(options4));
|
||||
hr = ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS4, &options4, sizeof(options4));
|
||||
if (FAILED(hr))
|
||||
options4.Native16BitShaderOpsSupported = FALSE;
|
||||
|
||||
if (!options4.Native16BitShaderOpsSupported)
|
||||
skip("Skipping 16-bit robustness tests.\n");
|
||||
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
root_signature_desc.NumParameters = 1;
|
||||
root_signature_desc.Flags = 0;
|
||||
root_signature_desc.NumStaticSamplers = 0;
|
||||
root_signature_desc.pStaticSamplers = NULL;
|
||||
root_signature_desc.pParameters = root_parameters;
|
||||
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].DescriptorTable.NumDescriptorRanges = 1;
|
||||
root_parameters[0].DescriptorTable.pDescriptorRanges = descriptor_ranges;
|
||||
|
||||
descriptor_ranges[0].RegisterSpace = 0;
|
||||
descriptor_ranges[0].BaseShaderRegister = 0;
|
||||
descriptor_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
||||
descriptor_ranges[0].NumDescriptors = UINT_MAX;
|
||||
descriptor_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
|
||||
hr = create_root_signature(context.device, &root_signature_desc, &context.root_signature);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr %#x.\n", hr);
|
||||
output_buffer = create_default_buffer(context.device, 8 * 16 * sizeof(uint32_t),
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
read_output_buffer = create_default_buffer(context.device, 8 * 16 * sizeof(uint32_t),
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
write_pso_32bit = create_compute_pipeline_state(context.device,
|
||||
context.root_signature,
|
||||
shader_bytecode(cs_code_write_32bit_dxil, sizeof(cs_code_write_32bit_dxil)));
|
||||
|
||||
if (options4.Native16BitShaderOpsSupported)
|
||||
{
|
||||
write_pso_16bit = create_compute_pipeline_state(context.device,
|
||||
context.root_signature,
|
||||
shader_bytecode(cs_code_write_16bit_dxil, sizeof(cs_code_write_16bit_dxil)));
|
||||
}
|
||||
else
|
||||
write_pso_16bit = NULL;
|
||||
|
||||
read_pso_32bit = create_compute_pipeline_state(context.device,
|
||||
context.root_signature,
|
||||
shader_bytecode(cs_code_read_32bit_dxil, sizeof(cs_code_read_32bit_dxil)));
|
||||
|
||||
if (options4.Native16BitShaderOpsSupported)
|
||||
{
|
||||
read_pso_16bit = create_compute_pipeline_state(context.device,
|
||||
context.root_signature,
|
||||
shader_bytecode(cs_code_read_16bit_dxil, sizeof(cs_code_read_16bit_dxil)));
|
||||
}
|
||||
else
|
||||
read_pso_16bit = NULL;
|
||||
|
||||
gpu_heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 8 + 2);
|
||||
cpu_heap = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 8 + 2);
|
||||
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu_heap);
|
||||
gpu_handle = ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu_heap);
|
||||
descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(context.device,
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
|
||||
{
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC view;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE h = cpu_handle;
|
||||
view.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
view.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
view.Buffer.FirstElement = 0;
|
||||
view.Buffer.NumElements = 16 * 8;
|
||||
view.Buffer.StructureByteStride = 0;
|
||||
view.Buffer.CounterOffsetInBytes = 0;
|
||||
view.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, output_buffer, NULL, &view, h);
|
||||
h.ptr += descriptor_size;
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, read_output_buffer, NULL, &view, h);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC view;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE h = cpu_handle;
|
||||
view.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
view.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
view.Buffer.FirstElement = 16 * i;
|
||||
view.Buffer.NumElements = 8;
|
||||
view.Buffer.StructureByteStride = 0;
|
||||
view.Buffer.CounterOffsetInBytes = 0;
|
||||
view.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||
h.ptr += (2 + i) * descriptor_size;
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, output_buffer, NULL, &view, h);
|
||||
}
|
||||
|
||||
ID3D12Device_CopyDescriptorsSimple(context.device, 8 + 2,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu_heap),
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu_heap),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(command_list, 1, &gpu_heap);
|
||||
{
|
||||
const UINT clear_value[4] = { 0xaaaaaaaau, 0xaaaaaaaau, 0xaaaaaaaau, 0xaaaaaaaau };
|
||||
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(command_list,
|
||||
gpu_handle, cpu_handle, output_buffer,
|
||||
clear_value, 0, NULL);
|
||||
uav_barrier(command_list, output_buffer);
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(command_list, 0, gpu_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, write_pso_32bit);
|
||||
ID3D12GraphicsCommandList_Dispatch(command_list, 1, 1, 1);
|
||||
if (write_pso_16bit)
|
||||
{
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, write_pso_16bit);
|
||||
ID3D12GraphicsCommandList_Dispatch(command_list, 1, 1, 1);
|
||||
}
|
||||
|
||||
uav_barrier(command_list, output_buffer);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, read_pso_32bit);
|
||||
ID3D12GraphicsCommandList_Dispatch(command_list, 1, 1, 1);
|
||||
if (read_pso_16bit)
|
||||
{
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, read_pso_16bit);
|
||||
ID3D12GraphicsCommandList_Dispatch(command_list, 1, 1, 1);
|
||||
}
|
||||
|
||||
transition_resource_state(command_list, output_buffer,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(output_buffer, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
uint32_t value, expected;
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
value = get_readback_uint(&rb, 16 * i + j, 0, 0);
|
||||
expected = j < 8 ? j : 0xaaaaaaaau;
|
||||
ok(value == expected, "32-bit value %u, %u: #%x != #%x.\n", i, j, value, expected);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 4; i < 8; i++)
|
||||
{
|
||||
uint16_t value, expected;
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
value = get_readback_uint16(&rb, 32 * i + j, 0);
|
||||
expected = options4.Native16BitShaderOpsSupported && j < 16 ? j : 0xaaaau;
|
||||
ok(value == expected, "16-bit value %u, %u: #%x != #%x.\n", i, j, value, expected);
|
||||
}
|
||||
}
|
||||
|
||||
release_resource_readback(&rb);
|
||||
|
||||
reset_command_list(command_list, context.allocator);
|
||||
transition_resource_state(command_list, read_output_buffer,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(read_output_buffer, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
uint32_t value, expected;
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
value = get_readback_uint(&rb, 16 * i + j, 0, 0);
|
||||
expected = j < 8 ? j : 0;
|
||||
ok(value == expected, "32-bit value %u, %u: #%x != #%x.\n", i, j, value, expected);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 4; i < 8; i++)
|
||||
{
|
||||
uint16_t value, expected;
|
||||
for (j = 0; j < 32; j++)
|
||||
{
|
||||
value = get_readback_uint16(&rb, 32 * i + j, 0);
|
||||
expected = options4.Native16BitShaderOpsSupported && j < 16 ? j : 0;
|
||||
ok(value == expected, "16-bit value %u, %u: #%x != #%x.\n", i, j, value, expected);
|
||||
}
|
||||
}
|
||||
|
||||
release_resource_readback(&rb);
|
||||
ID3D12DescriptorHeap_Release(gpu_heap);
|
||||
ID3D12DescriptorHeap_Release(cpu_heap);
|
||||
ID3D12Resource_Release(output_buffer);
|
||||
ID3D12Resource_Release(read_output_buffer);
|
||||
ID3D12PipelineState_Release(write_pso_32bit);
|
||||
if (write_pso_16bit)
|
||||
ID3D12PipelineState_Release(write_pso_16bit);
|
||||
ID3D12PipelineState_Release(read_pso_32bit);
|
||||
if (read_pso_16bit)
|
||||
ID3D12PipelineState_Release(read_pso_16bit);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
static void test_buffers_oob_behavior(bool use_dxil)
|
||||
{
|
||||
ID3D12DescriptorHeap *heap, *aux_cpu_heap, *aux_gpu_heap;
|
||||
|
|
|
@ -1467,3 +1467,36 @@ void test_missing_bindings_root_signature(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_root_signature_empty_blob(void)
|
||||
{
|
||||
ID3D12RootSignature *root_signature;
|
||||
struct test_context context;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD cs_code[] =
|
||||
{
|
||||
#if 0
|
||||
RWStructuredBuffer<uint> RWBuf;
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(int wg : SV_GroupID)
|
||||
{
|
||||
RWBuf[wg] = wg;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x81a88c98, 0x1ab24abd, 0xfdb8fb1f, 0x7e9cb035, 0x00000001, 0x000000a8, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000054, 0x00050050, 0x00000015, 0x0100086a,
|
||||
0x0400009e, 0x0011e000, 0x00000000, 0x00000004, 0x0200005f, 0x00021012, 0x0400009b, 0x00000001,
|
||||
0x00000001, 0x00000001, 0x070000a8, 0x0011e012, 0x00000000, 0x0002100a, 0x00004001, 0x00000000,
|
||||
0x0002100a, 0x0100003e,
|
||||
};
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
hr = ID3D12Device_CreateRootSignature(context.device, 0, cs_code, sizeof(cs_code), &IID_ID3D12RootSignature, (void **)&root_signature);
|
||||
/* Has to be E_FAIL, not E_INVALIDARG, oddly enough. */
|
||||
ok(hr == E_FAIL, "Unexpected hr #%x.\n", hr);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
|
@ -190,6 +190,431 @@ void test_fragment_coords(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_shader_instructions_dxil(void)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS4 options4;
|
||||
D3D12_ROOT_PARAMETER root_parameters[2];
|
||||
ID3D12RootSignature *root_signature;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
ID3D12Resource *output_buffer;
|
||||
ID3D12Resource *input_buffer;
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12PipelineState *pso;
|
||||
unsigned int i, j;
|
||||
|
||||
struct test
|
||||
{
|
||||
const D3D12_SHADER_BYTECODE *cs;
|
||||
float input_data[4];
|
||||
uint32_t output_data[4];
|
||||
bool native_fp16;
|
||||
bool is_todo;
|
||||
uint32_t output_data_alt[4];
|
||||
bool allows_alternative_result;
|
||||
};
|
||||
|
||||
static const BYTE legacy_f32_to_f16_code[] =
|
||||
{
|
||||
#if 0
|
||||
StructuredBuffer<float> Buf : register(t0);
|
||||
RWStructuredBuffer<uint> RWBuf : register(u0);
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
RWBuf[thr] = f32tof16(Buf[thr]);
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0x02, 0xfc, 0x26, 0xf2, 0xd6, 0xa4, 0x1f, 0xdd, 0xc5, 0xb6, 0x7b, 0x11, 0xd8, 0xcc, 0xdc, 0xe5, 0x01, 0x00, 0x00, 0x00, 0xd8, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x78, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0x01, 0x25, 0x60, 0x73, 0xdd, 0xac, 0xf7, 0xf7, 0xc8, 0xb6, 0x07,
|
||||
0x78, 0x1d, 0xab, 0x36, 0x44, 0x58, 0x49, 0x4c, 0xcc, 0x05, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0x73, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0xb4, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x6a, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91,
|
||||
0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14,
|
||||
0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
|
||||
0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff,
|
||||
0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06,
|
||||
0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14,
|
||||
0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x68, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x33, 0x00, 0x65, 0x18, 0xc3, 0xd0, 0xb9, 0x69, 0xb8,
|
||||
0xfc, 0x09, 0x7b, 0x08, 0xc9, 0x5f, 0x09, 0x69, 0x25, 0x26, 0xbf, 0xb8, 0x6d, 0x54, 0x18, 0x63, 0xcc, 0x98, 0x23, 0x40, 0x48, 0xdd, 0x33, 0x5c, 0xfe, 0x84, 0x3d, 0x84, 0xe4, 0x87, 0x40, 0x33,
|
||||
0x2c, 0x04, 0x0a, 0x56, 0x39, 0xd2, 0x60, 0x63, 0xa8, 0x41, 0xad, 0x2c, 0x60, 0xb0, 0x31, 0xc6, 0x18, 0x43, 0x0d, 0x7a, 0x73, 0x04, 0x41, 0x31, 0xd8, 0x50, 0x63, 0x44, 0x92, 0x03, 0x01, 0x33,
|
||||
0x75, 0xe3, 0xc0, 0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb3, 0x40, 0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f, 0xe5, 0x40, 0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e, 0xe5,
|
||||
0x20, 0x0f, 0x7c, 0x60, 0x0e, 0xec, 0xf0, 0x0e, 0xe1, 0x40, 0x0f, 0x7e, 0x80, 0x02, 0x43, 0x75, 0x26, 0x33, 0x18, 0x07, 0x76, 0x08, 0x87, 0x79, 0x98, 0x07, 0x37, 0x90, 0x85, 0x5b, 0x98, 0x05,
|
||||
0x7a, 0x90, 0x87, 0x7a, 0x18, 0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07, 0x72, 0x10, 0x85, 0x7a, 0x30, 0x07, 0x73, 0x28, 0x07, 0x79, 0xe0, 0x83, 0x7a, 0x70, 0x87, 0x79, 0x48, 0x87, 0x73, 0x70,
|
||||
0x87, 0x72, 0x20, 0x07, 0x30, 0x48, 0x07, 0x77, 0xa0, 0x07, 0x3f, 0x40, 0xc1, 0xa0, 0x3b, 0x47, 0x00, 0x0a, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
|
||||
0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
|
||||
0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73,
|
||||
0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07,
|
||||
0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x0e, 0x20, 0x00, 0x02, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x34, 0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x79, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
|
||||
0xc8, 0x23, 0x01, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x16, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
|
||||
0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5, 0x30, 0x02, 0x50, 0x18, 0x85, 0x40, 0x71, 0x04, 0x80, 0x72, 0x81, 0x10, 0x9e, 0x01, 0x20, 0x3b, 0x03, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
|
||||
0x3f, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41,
|
||||
0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xc1, 0x98, 0x20, 0x0c,
|
||||
0xc7, 0x06, 0x61, 0x20, 0x26, 0x08, 0x03, 0xb2, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x43, 0x32, 0x41, 0xb0, 0x1e, 0x02, 0x13, 0x84, 0x41, 0x99, 0x20, 0x0c,
|
||||
0xcb, 0x06, 0x61, 0x70, 0x36, 0x24, 0xca, 0xc2, 0x28, 0xca, 0xd0, 0x28, 0xcf, 0x86, 0x00, 0x9a, 0x20, 0x60, 0xce, 0x04, 0x21, 0x62, 0x36, 0x2c, 0x8a, 0xc4, 0x28, 0xca, 0xd0, 0x4c, 0xd3, 0xf4,
|
||||
0x6c, 0x08, 0xa8, 0x0d, 0x44, 0x54, 0x01, 0xc0, 0x04, 0x41, 0x00, 0x48, 0xb4, 0x85, 0xa5, 0xb9, 0x4d, 0x10, 0xb2, 0x66, 0xc3, 0xe0, 0x0c, 0xc3, 0x06, 0x42, 0xc9, 0x1c, 0x6d, 0x43, 0x71, 0x61,
|
||||
0x80, 0xb5, 0x55, 0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13, 0x32,
|
||||
0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73,
|
||||
0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12, 0x54, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x1b, 0x00, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
|
||||
0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
|
||||
0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
|
||||
0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
|
||||
0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
|
||||
0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
|
||||
0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
|
||||
0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
|
||||
0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8,
|
||||
0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x56, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x09, 0x50, 0xc3, 0xe5, 0x3b, 0x8f, 0xbf, 0x10, 0x06, 0x20, 0x60, 0xdf, 0x6d, 0xb7, 0xce, 0x57,
|
||||
0xeb, 0x36, 0x70, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x04, 0x58, 0x1b, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0, 0x17, 0xb7, 0x6d, 0x04, 0xd8, 0x70, 0xf9, 0xce, 0xe3, 0x47, 0x80, 0xb5, 0x51, 0x45,
|
||||
0x41, 0x44, 0xec, 0xe4, 0x44, 0x84, 0x8f, 0xdc, 0xb6, 0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x00, 0x04, 0x03, 0x20, 0x0d, 0x00, 0x00,
|
||||
0x61, 0x20, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x4a, 0xae, 0x74, 0x03, 0xca, 0x52, 0xa0, 0x08, 0x05,
|
||||
0x0a, 0x53, 0x80, 0x54, 0x09, 0x8c, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x20, 0x4d, 0x87, 0xf0, 0x3c, 0xcc, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x48, 0x14, 0x22, 0x40, 0x50, 0x33,
|
||||
0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x44, 0x86, 0x44, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xd0, 0x60, 0x88, 0x10, 0x48, 0x05, 0x34, 0x9a, 0x10, 0x00, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x70,
|
||||
0x6c, 0x49, 0x30, 0x62, 0xe0, 0x00, 0x20, 0x08, 0x06, 0x8f, 0x96, 0x18, 0x44, 0x15, 0x38, 0x8e, 0x83, 0x4c, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const BYTE fptrunc_f32_to_f16_code[] =
|
||||
{
|
||||
#if 0
|
||||
StructuredBuffer<float> Buf : register(t0);
|
||||
RWStructuredBuffer<uint> RWBuf : register(u0);
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
RWBuf[thr] = asuint16(half(Buf[thr]));
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0x0c, 0xe9, 0x3d, 0x5e, 0x3c, 0xb6, 0x70, 0xf2, 0x4b, 0x1d, 0x8a, 0x9a, 0x5b, 0x8e, 0x3c, 0x17, 0x01, 0x00, 0x00, 0x00, 0xbc, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x78, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xa9, 0xed, 0x8d, 0xa4, 0x34, 0x22, 0x78, 0xf6, 0x50, 0x75, 0xc7,
|
||||
0xe4, 0x48, 0xa0, 0x6b, 0x44, 0x58, 0x49, 0x4c, 0xb0, 0x05, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0x6c, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x98, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x63, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91,
|
||||
0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14,
|
||||
0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
|
||||
0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff,
|
||||
0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06,
|
||||
0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14,
|
||||
0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x68, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x33, 0x00, 0x37, 0x0d, 0x97, 0x3f, 0x61, 0x0f, 0x21,
|
||||
0xf9, 0x2b, 0x21, 0xad, 0xc4, 0xe4, 0x17, 0xb7, 0x8d, 0x0a, 0x63, 0x8c, 0x19, 0x73, 0x04, 0x08, 0xa1, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0x41, 0x2a,
|
||||
0xc7, 0x19, 0x6a, 0x0c, 0x34, 0x68, 0x95, 0x05, 0x0c, 0x35, 0xc6, 0x18, 0x63, 0xa0, 0x41, 0x6d, 0x8e, 0x20, 0x28, 0x86, 0x1a, 0x68, 0x8c, 0x47, 0x70, 0x20, 0x60, 0xa6, 0x6e, 0x1c, 0xd8, 0x21,
|
||||
0x1c, 0xe6, 0x61, 0x1e, 0xdc, 0x60, 0x16, 0xe8, 0x41, 0x1e, 0xea, 0x61, 0x1c, 0xe8, 0xa1, 0x1e, 0xe4, 0xa1, 0x1c, 0xc8, 0x41, 0x14, 0xea, 0xc1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0x81, 0x0f, 0xcc,
|
||||
0x81, 0x1d, 0xde, 0x21, 0x1c, 0xe8, 0xc1, 0x0f, 0x50, 0x60, 0x68, 0xce, 0x64, 0x06, 0xe3, 0xc0, 0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2, 0x70, 0x0b, 0xb3, 0x40, 0x0f, 0xf2, 0x50, 0x0f,
|
||||
0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f, 0xe5, 0x40, 0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e, 0xe5, 0x20, 0x0f, 0x7c, 0x50, 0x0f, 0xee, 0x30, 0x0f, 0xe9, 0x70, 0x0e, 0xee, 0x50, 0x0e, 0xe4, 0x00,
|
||||
0x06, 0xe9, 0xe0, 0x0e, 0xf4, 0xe0, 0x07, 0x28, 0x18, 0x54, 0xe7, 0x08, 0x40, 0x61, 0x14, 0x60, 0x8e, 0x00, 0x02, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
|
||||
0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
|
||||
0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72,
|
||||
0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07,
|
||||
0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08, 0x08, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x40,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x10, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5, 0x30, 0x02, 0x50, 0x18, 0x85, 0x40, 0x6f, 0x04,
|
||||
0x80, 0x6e, 0x81, 0x03, 0x02, 0x22, 0x90, 0x9d, 0x01, 0x20, 0x3a, 0x03, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44,
|
||||
0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a,
|
||||
0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a,
|
||||
0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x32, 0x41, 0xa0, 0x1c, 0x02, 0x13, 0x84, 0x21, 0x99, 0x20, 0x0c, 0xca, 0x06, 0x61, 0x70, 0x36, 0x24, 0xca, 0xc2, 0x28, 0xca, 0xd0, 0x28,
|
||||
0xcf, 0x86, 0x00, 0x9a, 0x20, 0x58, 0xcd, 0x04, 0xe1, 0x59, 0x36, 0x2c, 0x8a, 0xc4, 0x28, 0xca, 0xd0, 0x4c, 0xd3, 0xf4, 0x6c, 0x08, 0xa8, 0x0d, 0x44, 0x54, 0x01, 0xc0, 0x04, 0x41, 0x00, 0x48,
|
||||
0xb4, 0x85, 0xa5, 0xb9, 0x4d, 0x10, 0x2e, 0x66, 0xc3, 0xe0, 0x0c, 0xc3, 0x06, 0x42, 0xc9, 0x1c, 0x6d, 0x43, 0x71, 0x61, 0x80, 0xb5, 0x55, 0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73,
|
||||
0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81, 0x51, 0x87,
|
||||
0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12, 0x54,
|
||||
0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x1b, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c,
|
||||
0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e,
|
||||
0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c,
|
||||
0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e,
|
||||
0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4,
|
||||
0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07,
|
||||
0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5,
|
||||
0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90,
|
||||
0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b,
|
||||
0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2,
|
||||
0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84,
|
||||
0x40, 0x33, 0x2c, 0x84, 0x09, 0x5c, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc5, 0x6d, 0xdb, 0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60,
|
||||
0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb7, 0x6d, 0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x1b, 0x00, 0xc1, 0x00, 0x48,
|
||||
0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x4a, 0xae, 0x74, 0x03, 0xca,
|
||||
0x52, 0xa0, 0x30, 0x05, 0x08, 0x95, 0xc0, 0x08, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x00, 0x49, 0x86, 0xe0, 0x38, 0xcb, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x40, 0xd3,
|
||||
0x21, 0x3c, 0x0f, 0x33, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xc4, 0x75, 0x40, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x5c, 0x87, 0x10, 0x44, 0xc5, 0x33, 0x9a, 0x10, 0x00, 0x17, 0xe0, 0xbb,
|
||||
0x20, 0xef, 0x05, 0x23, 0x46, 0x0c, 0x1c, 0x00, 0x04, 0xc1, 0xa0, 0xd9, 0x16, 0xc4, 0xb0, 0x82, 0xe7, 0x79, 0x14, 0x0a, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const BYTE fp16_add_rounding_mode_code[] =
|
||||
{
|
||||
#if 0
|
||||
StructuredBuffer<float> Buf : register(t0);
|
||||
RWStructuredBuffer<half2> RWBuf : register(u0);
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
precise half result = half(Buf[thr]) + half(0.5);
|
||||
RWBuf[thr].x = result;
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0x80, 0x0b, 0xfa, 0x38, 0x4c, 0x9f, 0x96, 0x0e, 0x2a, 0x53, 0xaa, 0x48, 0x14, 0xe4, 0xf1, 0xad, 0x01, 0x00, 0x00, 0x00, 0xd4, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x78, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc1, 0x42, 0x47, 0x10, 0xe6, 0x07, 0x25, 0x9f, 0x02, 0x19, 0x28, 0x76,
|
||||
0x39, 0x75, 0xa7, 0xd5, 0x44, 0x58, 0x49, 0x4c, 0xc8, 0x05, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0x72, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0xb0, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x69, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91,
|
||||
0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14,
|
||||
0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
|
||||
0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff,
|
||||
0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06,
|
||||
0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14,
|
||||
0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x68, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x33, 0x00, 0x37, 0x0d, 0x97, 0x3f, 0x61, 0x0f, 0x21,
|
||||
0xf9, 0x2b, 0x21, 0xad, 0xc4, 0xe4, 0x17, 0xb7, 0x8d, 0x0a, 0x63, 0x8c, 0x19, 0x73, 0x04, 0x08, 0xa1, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0x41, 0x2a,
|
||||
0xc7, 0x19, 0x6a, 0x0c, 0x34, 0x68, 0x8d, 0x02, 0x94, 0x05, 0x0c, 0x35, 0x46, 0x6b, 0xad, 0xa1, 0x41, 0x6e, 0x8e, 0x20, 0x28, 0x86, 0x1a, 0x68, 0x0c, 0x48, 0x71, 0x20, 0x60, 0xa6, 0x6e, 0x1c,
|
||||
0xd8, 0x21, 0x1c, 0xe6, 0x61, 0x1e, 0xdc, 0x60, 0x16, 0xe8, 0x41, 0x1e, 0xea, 0x61, 0x1c, 0xe8, 0xa1, 0x1e, 0xe4, 0xa1, 0x1c, 0xc8, 0x41, 0x14, 0xea, 0xc1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0x81,
|
||||
0x0f, 0xcc, 0x81, 0x1d, 0xde, 0x21, 0x1c, 0xe8, 0xc1, 0x0f, 0x50, 0x60, 0x88, 0x0e, 0x23, 0x08, 0xcd, 0x4c, 0x6a, 0x30, 0x0e, 0xec, 0x10, 0x0e, 0xf3, 0x30, 0x0f, 0x6e, 0x20, 0x0b, 0xb7, 0x30,
|
||||
0x0b, 0xf4, 0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2, 0x50, 0x0e, 0xe4, 0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e, 0xf2, 0xc0, 0x07, 0xf6, 0x50, 0x0e, 0xe3, 0x40, 0x0f, 0xef,
|
||||
0x20, 0x0f, 0x7c, 0x80, 0x0e, 0xe1, 0xc0, 0x0e, 0xe6, 0xc0, 0x06, 0x60, 0x20, 0x07, 0x7e, 0x00, 0x06, 0x7e, 0x80, 0x02, 0x4b, 0x77, 0x8e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
|
||||
0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
|
||||
0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
|
||||
0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
|
||||
0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
|
||||
0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
|
||||
0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x8f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x08, 0x08, 0x00, 0x01, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x40, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x10, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5,
|
||||
0x30, 0x02, 0x50, 0x18, 0x85, 0x40, 0x70, 0x04, 0x80, 0x72, 0x81, 0x03, 0x02, 0x22, 0x10, 0x9e, 0x01, 0xa0, 0x3a, 0x03, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00,
|
||||
0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
|
||||
0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20,
|
||||
0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x32, 0x41, 0xa8, 0x1c, 0x02, 0x13, 0x84, 0x21, 0x99, 0x20, 0x0c, 0xca, 0x06, 0x61, 0x70,
|
||||
0x36, 0x24, 0xca, 0xc2, 0x28, 0xca, 0xd0, 0x28, 0xcf, 0x86, 0x00, 0x9a, 0x20, 0x60, 0xcd, 0x04, 0x01, 0x5a, 0x36, 0x2c, 0x8a, 0xc4, 0x28, 0xca, 0xd0, 0x4c, 0xd3, 0xf4, 0x6c, 0x08, 0xa8, 0x0d,
|
||||
0x44, 0x54, 0x01, 0xc0, 0x04, 0x41, 0x00, 0x48, 0xb4, 0x85, 0xa5, 0xb9, 0x4d, 0x10, 0x32, 0x66, 0xc3, 0xe0, 0x0c, 0xc3, 0x06, 0x42, 0xc9, 0x1c, 0x6d, 0x43, 0x71, 0x61, 0x80, 0xb5, 0x55, 0x61,
|
||||
0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30,
|
||||
0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73, 0x91, 0x2b, 0x9b, 0x7b,
|
||||
0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12, 0x54, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x1b, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
|
||||
0x50, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
|
||||
0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
|
||||
0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
|
||||
0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
|
||||
0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
|
||||
0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
|
||||
0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
|
||||
0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
|
||||
0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xcb, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xf0, 0x20,
|
||||
0x0f, 0xe5, 0x30, 0x0e, 0xe9, 0x30, 0x0f, 0xe5, 0x30, 0x23, 0x82, 0xc8, 0x01, 0x1f, 0xdc, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0xdc, 0xc0, 0x1e, 0xc2, 0x41, 0x1e, 0xd8, 0x21, 0x1c, 0xf2, 0xe1, 0x1d,
|
||||
0xea, 0x81, 0x1e, 0x00, 0x71, 0x20, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x09, 0x5c, 0xc3, 0xe5,
|
||||
0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc5, 0x6d, 0xdb, 0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11,
|
||||
0xe1, 0x17, 0xb5, 0x6e, 0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x1b, 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x34, 0x4a, 0xae, 0x74, 0x03, 0xca, 0x52, 0xa0, 0x30, 0x05, 0x08, 0x95, 0xc0, 0x08, 0x00, 0xb5,
|
||||
0x19, 0x80, 0x31, 0x02, 0x10, 0x74, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x4d, 0x08, 0xf1, 0x3c, 0xcc, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x44, 0x54, 0x42, 0x40, 0x50, 0x33,
|
||||
0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0x04, 0x96, 0x44, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x60, 0x89, 0x10, 0x48, 0x07, 0x34, 0x9a, 0x10, 0x00, 0x17, 0xb4, 0xab, 0xe0, 0x80, 0x11, 0x03,
|
||||
0x07, 0x00, 0x41, 0x30, 0x70, 0x36, 0xe6, 0x28, 0xac, 0x20, 0x49, 0x92, 0x05, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const BYTE fp16_arith_denorm_code[] =
|
||||
{
|
||||
#if 0
|
||||
StructuredBuffer<float> Buf : register(t0);
|
||||
RWStructuredBuffer<half2> RWBuf : register(u0);
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
precise half result = half(Buf[thr]) - half(Buf[thr ^ 3]);
|
||||
RWBuf[thr].x = result;
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0x3f, 0x0c, 0x9c, 0xb8, 0xe1, 0xb4, 0x19, 0x4a, 0x0d, 0xa9, 0xd2, 0x4f, 0xdc, 0xa5, 0xee, 0xac, 0x01, 0x00, 0x00, 0x00, 0xe4, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x78, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe3, 0x01, 0xf8, 0xb0, 0x5f, 0xe7, 0x66, 0x86, 0xb3, 0xc7, 0xdf, 0xa4,
|
||||
0x05, 0x84, 0xe1, 0x4e, 0x44, 0x58, 0x49, 0x4c, 0xd8, 0x05, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0x76, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0xc0, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x6d, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91,
|
||||
0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14,
|
||||
0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
|
||||
0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff,
|
||||
0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06,
|
||||
0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14,
|
||||
0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x68, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x33, 0x00, 0x37, 0x0d, 0x97, 0x3f, 0x61, 0x0f, 0x21,
|
||||
0xf9, 0x2b, 0x21, 0xad, 0xc4, 0xe4, 0x17, 0xb7, 0x8d, 0x0a, 0x63, 0x8c, 0x19, 0x73, 0x04, 0x08, 0xa1, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0x41, 0x2a,
|
||||
0xc7, 0x19, 0x6a, 0x0c, 0x34, 0x68, 0x8d, 0x02, 0x94, 0x05, 0x0c, 0x35, 0x46, 0x6b, 0xad, 0xa1, 0x41, 0x6e, 0x8e, 0x20, 0x28, 0x86, 0x1a, 0x68, 0x0c, 0x48, 0x71, 0x20, 0x60, 0xa6, 0x6e, 0x1c,
|
||||
0xd8, 0x21, 0x1c, 0xe6, 0x61, 0x1e, 0xdc, 0x60, 0x16, 0xe8, 0x41, 0x1e, 0xea, 0x61, 0x1c, 0xe8, 0xa1, 0x1e, 0xe4, 0xa1, 0x1c, 0xc8, 0x41, 0x14, 0xea, 0xc1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0x81,
|
||||
0x0f, 0xcc, 0x81, 0x1d, 0xde, 0x21, 0x1c, 0xe8, 0xc1, 0x0f, 0x50, 0x60, 0x88, 0x0e, 0x23, 0x08, 0xcd, 0x4c, 0x6a, 0x30, 0x0e, 0xec, 0x10, 0x0e, 0xf3, 0x30, 0x0f, 0x6e, 0x20, 0x0b, 0xb7, 0x30,
|
||||
0x0b, 0xf4, 0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2, 0x50, 0x0e, 0xe4, 0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e, 0xf2, 0xc0, 0x07, 0xf6, 0x50, 0x0e, 0xe3, 0x40, 0x0f, 0xef,
|
||||
0x20, 0x0f, 0x7c, 0x80, 0x0e, 0xe1, 0xc0, 0x0e, 0xe6, 0xc0, 0x06, 0x60, 0x20, 0x07, 0x7e, 0x00, 0x06, 0x7e, 0x80, 0x02, 0x4b, 0x77, 0x8e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
|
||||
0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
|
||||
0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
|
||||
0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
|
||||
0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
|
||||
0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
|
||||
0x67, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x8f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x9e, 0x08, 0x08, 0x00, 0x01, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x40, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x10, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5,
|
||||
0x30, 0x02, 0x50, 0x18, 0x85, 0x40, 0x70, 0x04, 0x80, 0x72, 0x81, 0x03, 0x02, 0x22, 0x10, 0x9e, 0x01, 0xa0, 0x3a, 0x03, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00,
|
||||
0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
|
||||
0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20,
|
||||
0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x32, 0x41, 0xa8, 0x1c, 0x02, 0x13, 0x84, 0x21, 0x99, 0x20, 0x0c, 0xca, 0x06, 0x61, 0x70,
|
||||
0x36, 0x24, 0xca, 0xc2, 0x28, 0xca, 0xd0, 0x28, 0xcf, 0x86, 0x00, 0x9a, 0x20, 0x60, 0xcd, 0x04, 0x01, 0x5a, 0x36, 0x2c, 0x8a, 0xc4, 0x28, 0xca, 0xd0, 0x4c, 0xd3, 0xf4, 0x6c, 0x08, 0xa8, 0x0d,
|
||||
0x44, 0x54, 0x01, 0xc0, 0x04, 0x41, 0x00, 0x48, 0xb4, 0x85, 0xa5, 0xb9, 0x4d, 0x10, 0x32, 0x66, 0xc3, 0xe0, 0x0c, 0xc3, 0x06, 0x42, 0xc9, 0x1c, 0x6d, 0x43, 0x71, 0x61, 0x80, 0xb5, 0x55, 0x61,
|
||||
0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30,
|
||||
0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73, 0x91, 0x2b, 0x9b, 0x7b,
|
||||
0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12, 0x54, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x1b, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
|
||||
0x50, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
|
||||
0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
|
||||
0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
|
||||
0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
|
||||
0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
|
||||
0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
|
||||
0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
|
||||
0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
|
||||
0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xcb, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xf0, 0x20,
|
||||
0x0f, 0xe5, 0x30, 0x0e, 0xe9, 0x30, 0x0f, 0xe5, 0x30, 0x23, 0x82, 0xc8, 0x01, 0x1f, 0xdc, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0xdc, 0xc0, 0x1e, 0xc2, 0x41, 0x1e, 0xd8, 0x21, 0x1c, 0xf2, 0xe1, 0x1d,
|
||||
0xea, 0x81, 0x1e, 0x00, 0x71, 0x20, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x09, 0x5c, 0xc3, 0xe5,
|
||||
0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc5, 0x6d, 0xdb, 0x00, 0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11,
|
||||
0xe1, 0x17, 0xb5, 0x6e, 0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8, 0xc8, 0x6d, 0x1b, 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x4a, 0xae, 0x2c, 0x05, 0x4a, 0x37, 0xa0, 0x0c, 0x0a, 0x53, 0x80, 0x50, 0x09, 0x8c, 0x00,
|
||||
0x50, 0x9b, 0x01, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x10, 0x4d, 0xc8, 0xf0, 0x3c, 0xcc, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x44, 0x54, 0x32, 0x40, 0x50, 0x33, 0x62, 0x60, 0x00, 0x20,
|
||||
0x08, 0x06, 0x04, 0x86, 0x44, 0x23, 0x06, 0x0a, 0x00, 0x82, 0x60, 0xb0, 0x60, 0x8a, 0x10, 0x48, 0x06, 0x34, 0x9a, 0x10, 0x00, 0x17, 0xb4, 0x2b, 0x62, 0xe1, 0x11, 0x03, 0x05, 0x00, 0x41, 0x30,
|
||||
0x58, 0x38, 0xc7, 0x08, 0x2c, 0x85, 0x1a, 0x4d, 0x08, 0x80, 0x0b, 0xda, 0x55, 0x11, 0xc4, 0x88, 0x81, 0x03, 0x80, 0x20, 0x18, 0x38, 0xdf, 0xb3, 0x24, 0x5a, 0xc0, 0x30, 0x8c, 0xc3, 0x21, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE legacy_f32_to_f16 = SHADER_BYTECODE(legacy_f32_to_f16_code);
|
||||
static const D3D12_SHADER_BYTECODE fptrunc_f32_to_f16 = SHADER_BYTECODE(fptrunc_f32_to_f16_code);
|
||||
static const D3D12_SHADER_BYTECODE fp16_add_rounding_mode = SHADER_BYTECODE(fp16_add_rounding_mode_code);
|
||||
static const D3D12_SHADER_BYTECODE fp16_arith_denorm = SHADER_BYTECODE(fp16_arith_denorm_code);
|
||||
|
||||
union
|
||||
{
|
||||
uint32_t u32[4];
|
||||
float f32[4];
|
||||
} patterns;
|
||||
|
||||
struct test tests[] =
|
||||
{
|
||||
/* LegacyF32toF16 opcode is always RTZ. */
|
||||
{&legacy_f32_to_f16, { 1024.75f, 1025.75f, 1026.75f, 1027.75f }, { 0x6400, 0x6401, 0x6402, 0x6403 }, false, true},
|
||||
{&legacy_f32_to_f16, { -1024.75f, -1025.75f, -1026.75f, -1027.75f }, { 0xe400, 0xe401, 0xe402, 0xe403 }, false, true},
|
||||
{&legacy_f32_to_f16, { 0.75f / 0x1000000, -0.75f / 0x1000000, 1.75f / 0x1000000, -1.75f / 0x1000000 }, { 0x0000, 0x8000, 0x0001, 0x8001 }, false, true},
|
||||
{&legacy_f32_to_f16, { 0.0f /* To be filled in with weird values, +inf, -inf, nan(1), nan(all bits). */ }, {0x7c00, 0xfc00, UINT32_MAX /* NaN, any pattern allowed. */, UINT32_MAX /* NaN, any pattern allowed. */}, false},
|
||||
/* Drivers don't seem to agree if this should be RTZ or RTE ... Accept both results. Unclear if this is even well specified in D3D12. */
|
||||
{&fptrunc_f32_to_f16, { 1024.5f, 1025.5f, 1026.5f, 1027.5f }, { 0x6400, 0x6402, 0x6402, 0x6404 }, true, false, { 0x6400, 0x6401, 0x6402, 0x6403 }, true},
|
||||
{&fptrunc_f32_to_f16, { 0.0f /* To be filled in with weird values, +inf, -inf, nan(1), nan(all bits). */ }, {0x7c00, 0xfc00, UINT32_MAX /* NaN, any pattern allowed. */, UINT32_MAX /* NaN, any pattern allowed. */}, true},
|
||||
/* Arithmetic is RTE. */
|
||||
{&fp16_add_rounding_mode, { 1024.0f, 1025.0f, 1026.0f, 1027.0f }, { 0x6400, 0x6402, 0x6402, 0x6404 }, true},
|
||||
/* Denorm is preserved in arithmetic. */
|
||||
{&fp16_arith_denorm, { 1.0f / 0x1000000, 2.0f / 0x1000000, 3.0f / 0x1000000, 4.0f / 0x1000000 }, { 0x8003, 0x8001, 0x0001, 0x0003 }, true},
|
||||
};
|
||||
|
||||
/* RTZ tests are TODO since we have no direct way of implementing it. */
|
||||
|
||||
patterns.u32[0] = 0x7f800000;
|
||||
patterns.u32[1] = 0xff800000;
|
||||
patterns.u32[2] = 0x7f800001;
|
||||
patterns.u32[3] = 0x7fffffff;
|
||||
memcpy(tests[3].input_data, patterns.f32, sizeof(patterns.f32));
|
||||
memcpy(tests[5].input_data, patterns.f32, sizeof(patterns.f32));
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
if (!context_supports_dxil(&context))
|
||||
{
|
||||
skip("DXIL not supported.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&options4, 0, sizeof(options4));
|
||||
if (FAILED(ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS4, &options4, sizeof(options4))))
|
||||
options4.Native16BitShaderOpsSupported = FALSE;
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
rs_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
rs_desc.pParameters = root_parameters;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
create_root_signature(context.device, &rs_desc, &root_signature);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
if (!options4.Native16BitShaderOpsSupported && tests[i].native_fp16)
|
||||
{
|
||||
skip("Native FP16 not supported, skipping.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
pso = create_compute_pipeline_state(context.device, root_signature, *tests[i].cs);
|
||||
input_buffer = create_upload_buffer(context.device, sizeof(tests[i].input_data), tests[i].input_data);
|
||||
output_buffer = create_default_buffer(context.device, sizeof(tests[i].output_data),
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso);
|
||||
ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 0,
|
||||
ID3D12Resource_GetGPUVirtualAddress(input_buffer));
|
||||
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(output_buffer));
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
|
||||
transition_resource_state(context.list, output_buffer,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(output_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
|
||||
for (j = 0; j < ARRAY_SIZE(tests[i].output_data); j++)
|
||||
{
|
||||
uint32_t value = get_readback_uint(&rb, j, 0, 0);
|
||||
if (tests[i].allows_alternative_result && tests[i].output_data[j] != tests[i].output_data_alt[j])
|
||||
{
|
||||
todo_if(tests[i].is_todo)
|
||||
ok(value == tests[i].output_data[j] || value == tests[i].output_data_alt[j],
|
||||
"Value %u mismatch: %x != (%x or %x)\n",
|
||||
j, value, tests[i].output_data[j], tests[i].output_data_alt[j]);
|
||||
}
|
||||
else if (tests[i].output_data[j] == UINT32_MAX)
|
||||
{
|
||||
todo_if(tests[i].is_todo)
|
||||
ok((value & 0x7fff) > 0x7c00, "Value %u mismatch: Expected NaN, got %x.\n", value);
|
||||
}
|
||||
else
|
||||
{
|
||||
todo_if(tests[i].is_todo)
|
||||
ok(value == tests[i].output_data[j],
|
||||
"Value %u mismatch: %x != %x\n", j, value, tests[i].output_data[j]);
|
||||
}
|
||||
}
|
||||
ID3D12Resource_Release(input_buffer);
|
||||
ID3D12Resource_Release(output_buffer);
|
||||
ID3D12PipelineState_Release(pso);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
release_resource_readback(&rb);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
ID3D12RootSignature_Release(root_signature);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_shader_instructions(void)
|
||||
{
|
||||
struct named_shader
|
||||
|
@ -200,6 +625,7 @@ void test_shader_instructions(void)
|
|||
};
|
||||
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
const struct named_shader *current_ps;
|
||||
struct test_context_desc desc;
|
||||
|
@ -2179,16 +2605,18 @@ void test_shader_instructions(void)
|
|||
struct vec4 f;
|
||||
} output;
|
||||
bool skip_on_warp;
|
||||
bool requires_fp64;
|
||||
bool is_todo;
|
||||
}
|
||||
uint_tests[] =
|
||||
{
|
||||
{&ps_dadd, {.d = {{2.5, 0.0}}}, {{0x20a80000, 0x400c0000, 0x20500000, 0x40120000}}},
|
||||
{&ps_dmax, {.d = {{2.5, 0.0}}}, {{0x40200000, 0x40200000, 0x40200000, 0x40200000}}},
|
||||
{&ps_dmax, {.d = {{0.5, 0.0}}}, {{0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000}}},
|
||||
{&ps_dmovc, {.d = {{0.5, 0.0}}}, {{0x40900000, 0x40900000, 0x40900000, 0x40900000}}},
|
||||
{&ps_dmovc, {.d = {{1.5, 0.0}}}, {{0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000}}},
|
||||
{&ps_dmodifier, {.d = {{1.5, 0.0}}}, {{0xbfc00000, 0x3fc00000, 0x40200000, 0x00000000}}},
|
||||
{&ps_dmodifier, {.d = {{-1.5, 0.0}}}, {{0x3fc00000, 0x3fc00000, 0x3fc00000, 0x00000000}}},
|
||||
{&ps_dadd, {.d = {{2.5, 0.0}}}, {{0x20a80000, 0x400c0000, 0x20500000, 0x40120000}}, false, true},
|
||||
{&ps_dmax, {.d = {{2.5, 0.0}}}, {{0x40200000, 0x40200000, 0x40200000, 0x40200000}}, false, true},
|
||||
{&ps_dmax, {.d = {{0.5, 0.0}}}, {{0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000}}, false, true},
|
||||
{&ps_dmovc, {.d = {{0.5, 0.0}}}, {{0x40900000, 0x40900000, 0x40900000, 0x40900000}}, false, true},
|
||||
{&ps_dmovc, {.d = {{1.5, 0.0}}}, {{0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000}}, false, true},
|
||||
{&ps_dmodifier, {.d = {{1.5, 0.0}}}, {{0xbfc00000, 0x3fc00000, 0x40200000, 0x00000000}}, false, true},
|
||||
{&ps_dmodifier, {.d = {{-1.5, 0.0}}}, {{0x3fc00000, 0x3fc00000, 0x3fc00000, 0x00000000}}, false, true},
|
||||
|
||||
{&ps_bfi, {{{ 0, 0, 0, 0}}}, {{ 0, 0, 0, 0}}},
|
||||
{&ps_bfi, {{{ 0, 0, 0, 1}}}, {{ 1, 1, 1, 1}}},
|
||||
|
@ -2341,9 +2769,24 @@ void test_shader_instructions(void)
|
|||
{&ps_f16tof32_2, {{{0x000f0000, 0x000f3c00, 0x000f5640, 0x000f5bd0}}}, {{250, 100, 1, 0}}},
|
||||
{&ps_f16tof32_2, {{{0xffff0000, 0xffff3c00, 0xffff5640, 0xffff5bd0}}}, {{250, 100, 1, 0}}},
|
||||
|
||||
/* Verify subnormal behavior. D3D11 functional spec says FP16 denorms must be preserved. */
|
||||
{&ps_f32tof16, {.f = {{1.0f / 0x1000000, 2.0f / 0x1000000, 3.0f / 0x1000000, 4.0f / 0x1000000}}}, {{1, 2, 3, 4}}},
|
||||
{&ps_f32tof16, {.f = {{-1.0f / 0x1000000, -2.0f / 0x1000000, -3.0f / 0x1000000, -4.0f / 0x1000000}}}, {{0x8001, 0x8002, 0x8003, 0x8004}}},
|
||||
/* Verify RTZ behavior on fp32 -> fp16 rounding. D3D11 functional spec calls this out explicitly. */
|
||||
{&ps_f32tof16, {.f = {{1024.0f, 1025.0f, 1026.0f, 1027.0f}}}, {{0x6400, 0x6401, 0x6402, 0x6403}}},
|
||||
{&ps_f32tof16, {.f = {{2048.0f, 2050.0f, 2052.0f, 2054.0f}}}, {{0x6800, 0x6801, 0x6802, 0x6803}}},
|
||||
{&ps_f32tof16, {.f = {{-1024.0f, -1025.0f, -1026.0f, -1027.0f}}}, {{0xe400, 0xe401, 0xe402, 0xe403}}},
|
||||
{&ps_f32tof16, {.f = {{-2048.0f, -2050.0f, -2052.0f, -2054.0f}}}, {{0xe800, 0xe801, 0xe802, 0xe803}}},
|
||||
|
||||
/* We cannot efficiently implement this since we have no dedicated RTZ rounding opcode in SPIR-V. */
|
||||
{&ps_f32tof16, {.f = {{1024.75f, 1025.75f, 1026.75f, 1027.75f}}}, {{0x6400, 0x6401, 0x6402, 0x6403}}, false, false, true},
|
||||
{&ps_f32tof16, {.f = {{2049.75f, 2051.75f, 2053.75f, 2055.75f}}}, {{0x6800, 0x6801, 0x6802, 0x6803}}, false, false, true},
|
||||
{&ps_f32tof16, {.f = {{-1024.75f, -1025.75f, -1026.75f, -1027.75f}}}, {{0xe400, 0xe401, 0xe402, 0xe403}}, false, false, true},
|
||||
{&ps_f32tof16, {.f = {{-2049.75f, -2051.75f, -2053.75f, -2055.75f}}}, {{0xe800, 0xe801, 0xe802, 0xe803}}, false, false, true},
|
||||
{&ps_f32tof16, {.f = {{0.75f / 0x1000000, -0.75f / 0x1000000, 1.75f / 0x1000000, -1.75f / 0x1000000}}}, {{0x0000, 0x8000, 0x0001, 0x8001}}, false, false, true},
|
||||
|
||||
{&ps_f32tof16, {.f = {{0.0f, 1.0f, -1.0f, 666.0f}}}, {{0, 0x3c00, 0xbc00, 0x6134}}},
|
||||
{&ps_f32tof16, {.f = {{INFINITY, -INFINITY, 100000.0f, -100000.0}}}, {{0x7C00, 0xFC00, 0x7BFF, 0xFBFF}}},
|
||||
|
||||
{&ps_f32tof16_2, {.f = {{0.0f, 1.0f, -1.0f, 666.0f}}}, {{0x6134, 0xbc00, 0x3c00, 0}}},
|
||||
|
||||
{&ps_imad, {{{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}}, {{ 0, 0, 0, 0}}},
|
||||
|
@ -2709,6 +3152,9 @@ void test_shader_instructions(void)
|
|||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
memset(&options, 0, sizeof(options));
|
||||
ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));
|
||||
|
||||
context.root_signature = create_cb_root_signature(context.device,
|
||||
0, D3D12_SHADER_VISIBILITY_PIXEL, D3D12_ROOT_SIGNATURE_FLAG_NONE);
|
||||
|
||||
|
@ -2777,6 +3223,12 @@ void test_shader_instructions(void)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (uint_tests[i].requires_fp64 && !options.DoublePrecisionFloatShaderOps)
|
||||
{
|
||||
skip("Skipping FP64 test due to lack of feature support.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current_ps != uint_tests[i].ps)
|
||||
{
|
||||
if (context.pipeline_state)
|
||||
|
@ -2804,6 +3256,8 @@ void test_shader_instructions(void)
|
|||
|
||||
transition_resource_state(command_list, context.render_target,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
todo_if(uint_tests[i].is_todo)
|
||||
check_sub_resource_uvec4(context.render_target, 0, queue, command_list, &uint_tests[i].output.u);
|
||||
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
@ -4680,7 +5134,7 @@ void test_gather(void)
|
|||
{0.3f, 1.3f, 1.2f, 0.2f}, {1.3f, 2.3f, 2.2f, 1.2f}, {2.3f, 3.3f, 3.2f, 2.2f}, {3.3f, 3.3f, 3.2f, 3.2f},
|
||||
{0.3f, 1.3f, 1.3f, 0.3f}, {1.3f, 2.3f, 2.3f, 1.3f}, {2.3f, 3.3f, 3.3f, 2.3f}, {3.3f, 3.3f, 3.3f, 3.3f},
|
||||
};
|
||||
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const D3D12_SUBRESOURCE_DATA resource_data = {&texture_data, sizeof(texture_data) / 4};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
|
@ -4717,7 +5171,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -4755,7 +5209,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_offset, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -4793,7 +5247,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_green, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -4831,7 +5285,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_po, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -4867,7 +5321,7 @@ void test_gather(void)
|
|||
constants.offset_x = 0;
|
||||
constants.offset_y = 0;
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5001,7 +5455,7 @@ void test_gather_c(void)
|
|||
{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 1.0f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 1.0f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f},
|
||||
};
|
||||
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const D3D12_SUBRESOURCE_DATA resource_data = {&texture_data, sizeof(texture_data) / 4};
|
||||
static const D3D12_STATIC_SAMPLER_DESC sampler_desc =
|
||||
{
|
||||
|
@ -5057,7 +5511,7 @@ void test_gather_c(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_c, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5095,7 +5549,7 @@ void test_gather_c(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_po_c, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5131,7 +5585,7 @@ void test_gather_c(void)
|
|||
constants.offset_x = 0;
|
||||
constants.offset_y = 0;
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5827,7 +6281,7 @@ void test_multisample_array_texture(void)
|
|||
};
|
||||
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const struct vec4 colors[] =
|
||||
static const float colors[][4] =
|
||||
{
|
||||
{1.0f, 0.0f, 0.0f, 1.0f},
|
||||
{0.0f, 1.0f, 0.0f, 1.0f},
|
||||
|
@ -5932,8 +6386,7 @@ void test_multisample_array_texture(void)
|
|||
rtv_desc.Texture2DMSArray.FirstArraySlice = i;
|
||||
rtv_desc.Texture2DMSArray.ArraySize = 1;
|
||||
ID3D12Device_CreateRenderTargetView(device, texture, &rtv_desc, cpu_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, cpu_handle, &colors[i].x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, cpu_handle, colors[i], 0, NULL);
|
||||
}
|
||||
|
||||
transition_resource_state(command_list, texture,
|
||||
|
@ -11410,11 +11863,11 @@ static void test_shader_get_render_target_sample_count(bool use_dxil)
|
|||
use_dxil ? (const void*)ps_code_dxil : (const void*)ps_code_dxbc,
|
||||
use_dxil ? sizeof(ps_code_dxil) : sizeof(ps_code_dxbc),
|
||||
};
|
||||
static const struct vec4 sample_count = {8.0f, 8.0f, 8.0f, 8.0f};
|
||||
static const struct vec4 sample_count = {4.0f, 4.0f, 4.0f, 4.0f};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.rt_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
desc.sample_desc.Count = 8;
|
||||
desc.sample_desc.Count = 4;
|
||||
desc.no_pipeline = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -31,6 +31,7 @@ static uint32_t compute_tile_count(uint32_t resource_size, uint32_t mip, uint32_
|
|||
void test_get_resource_tiling(void)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
||||
D3D12_SUBRESOURCE_TILING tilings_alt[16];
|
||||
D3D12_PACKED_MIP_INFO packed_mip_info;
|
||||
D3D12_SUBRESOURCE_TILING tilings[16];
|
||||
UINT num_resource_tiles, num_tilings;
|
||||
|
@ -64,6 +65,16 @@ void test_get_resource_tiling(void)
|
|||
/* Test buffers */
|
||||
{ D3D12_RESOURCE_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 1024, 1, 1, 1, 1, 1, 0, 65536, 1, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 16*65536, 1, 1, 1, 16, 1, 0, 65536, 1, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
/* Test small resource behavior */
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 1, 1, 1, 1, 1, 1, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 2, 2, 1, 2, 1, 2, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 4, 4, 1, 3, 1, 3, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 8, 8, 1, 4, 1, 4, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 16, 16, 1, 5, 1, 5, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 32, 32, 1, 6, 1, 6, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 64, 64, 1, 7, 1, 7, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 128, 128, 1, 8, 1, 8, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 256, 256, 1, 9, 2, 9, 1, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
/* Test various image formats */
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 512, 512, 1, 1, 4, 1, 1, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8_UNORM, 512, 512, 1, 1, 8, 1, 1, 256, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
|
@ -85,7 +96,7 @@ void test_get_resource_tiling(void)
|
|||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 128, 128, 1, 8, 1, 8, 1, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 512, 512, 1, 10, 21, 10, 3, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 512, 512, 4, 3, 84, 12, 3, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 64, 64, 1, 1, 0, 1, 0, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 64, 64, 1, 1, 1, 1, 0, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
/* Test 3D textures */
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE3D, DXGI_FORMAT_R8_UNORM, 64, 64, 64, 1, 4, 1, 1, 64, 32, 32, D3D12_TILED_RESOURCES_TIER_3 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE3D, DXGI_FORMAT_R8G8_UNORM, 64, 64, 64, 1, 8, 1, 1, 32, 32, 32, D3D12_TILED_RESOURCES_TIER_3 },
|
||||
|
@ -127,6 +138,15 @@ void test_get_resource_tiling(void)
|
|||
D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &IID_ID3D12Resource, (void **)&resource);
|
||||
ok(hr == S_OK, "Failed to create reserved resource, hr %#x.\n", hr);
|
||||
|
||||
/* This is nonsense, but it doesn't crash or generate errors. */
|
||||
ID3D12Device_GetResourceTiling(context.device, resource, NULL, NULL, NULL, NULL, 0, NULL);
|
||||
|
||||
/* If num_tilings is NULL, tilings_alt is ignored. */
|
||||
memset(tilings, 0, sizeof(tilings));
|
||||
memset(tilings_alt, 0, sizeof(tilings_alt));
|
||||
ID3D12Device_GetResourceTiling(context.device, resource, NULL, NULL, NULL, NULL, 0, tilings_alt);
|
||||
ok(memcmp(tilings, tilings_alt, sizeof(tilings_alt)) == 0, "Mismatch.\n");
|
||||
|
||||
num_tilings = 0;
|
||||
ID3D12Device_GetResourceTiling(context.device, resource, NULL, NULL, NULL, &num_tilings, 0, NULL);
|
||||
ok(num_tilings == 0, "Unexpected tiling count %u.\n", num_tilings);
|
||||
|
@ -203,18 +223,10 @@ void test_get_resource_tiling(void)
|
|||
ok((packed_mip_info.NumTilesForPackedMips == 0) == (packed_mip_info.NumPackedMips == 0),
|
||||
"Unexpected packed tile count %u.\n", packed_mip_info.NumTilesForPackedMips);
|
||||
|
||||
if (packed_mip_info.NumStandardMips || !packed_mip_info.NumPackedMips)
|
||||
{
|
||||
ok(tile_shape.WidthInTexels == tests[i].tile_shape_w, "Unexpected tile width %u.\n", tile_shape.WidthInTexels);
|
||||
ok(tile_shape.HeightInTexels == tests[i].tile_shape_h, "Unexpected tile height %u.\n", tile_shape.HeightInTexels);
|
||||
ok(tile_shape.DepthInTexels == tests[i].tile_shape_d, "Unexpected tile depth %u.\n", tile_shape.DepthInTexels);
|
||||
}
|
||||
else
|
||||
{
|
||||
ok(!tile_shape.WidthInTexels && !tile_shape.HeightInTexels && !tile_shape.DepthInTexels,
|
||||
"Unexpected tile shape (%u,%u,%u) for packed resource.\n",
|
||||
tile_shape.WidthInTexels, tile_shape.HeightInTexels, tile_shape.DepthInTexels);
|
||||
}
|
||||
/* Docs say that tile shape should be cleared to zero if there is no standard mip, but drivers don't seem to care about this. */
|
||||
ok(tile_shape.WidthInTexels == tests[i].tile_shape_w, "Unexpected tile width %u.\n", tile_shape.WidthInTexels);
|
||||
ok(tile_shape.HeightInTexels == tests[i].tile_shape_h, "Unexpected tile height %u.\n", tile_shape.HeightInTexels);
|
||||
ok(tile_shape.DepthInTexels == tests[i].tile_shape_d, "Unexpected tile depth %u.\n", tile_shape.DepthInTexels);
|
||||
|
||||
for (j = 0; j < tests[i].expected_tiling_count; j++)
|
||||
{
|
||||
|
@ -3371,3 +3383,248 @@ void test_texture_feedback_instructions_dxil(void)
|
|||
test_texture_feedback_instructions(true);
|
||||
}
|
||||
|
||||
void test_sparse_buffer_memory_lifetime(void)
|
||||
{
|
||||
/* Attempt to bind sparse memory, then free the underlying heap, but keep the sparse resource
|
||||
* alive. This should confuse drivers that attempt to track BO lifetimes. */
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
||||
const UINT values[] = { 42, 42, 42, 42 };
|
||||
D3D12_ROOT_PARAMETER root_parameters[2];
|
||||
D3D12_TILE_REGION_SIZE region_size;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE h_gpu;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE h_cpu;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
D3D12_DESCRIPTOR_RANGE desc_range;
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12DescriptorHeap *cpu;
|
||||
ID3D12DescriptorHeap *gpu;
|
||||
D3D12_HEAP_DESC heap_desc;
|
||||
D3D12_RESOURCE_DESC desc;
|
||||
ID3D12Resource *sparse;
|
||||
ID3D12Resource *buffer;
|
||||
ID3D12Heap *heap_live;
|
||||
ID3D12Heap *heap;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD cs_sparse_query_dxbc[] =
|
||||
{
|
||||
#if 0
|
||||
RWStructuredBuffer<uint> RWBuf : register(u0);
|
||||
Buffer<uint> Buf : register(t0);
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
uint code;
|
||||
|
||||
// Sample mapped, but freed memory. See what CheckAccessFullyMapped returns.
|
||||
uint data = Buf.Load(thr, code);
|
||||
uint value = CheckAccessFullyMapped(code) ? (1u << 16) : 0u;
|
||||
value |= data & 0xffffu;
|
||||
RWBuf[2 * thr + 0] = value;
|
||||
|
||||
// Sample not yet mapped memory. See what CheckAccessFullyMapped returns.
|
||||
data = Buf.Load(thr + 1024 * 1024, code);
|
||||
value = CheckAccessFullyMapped(code) ? (1u << 16) : 0u;
|
||||
value |= data & 0xffffu;
|
||||
|
||||
RWBuf[2 * thr + 1] = value;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x8c2a40af, 0x2a9b20a6, 0xa99f0977, 0x37daacf5, 0x00000001, 0x00000280, 0x00000004,
|
||||
0x00000030, 0x00000040, 0x00000050, 0x00000270, 0x4e475349, 0x00000008, 0x00000000, 0x00000008,
|
||||
0x4e47534f, 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000218, 0x00050050, 0x00000086,
|
||||
0x0100086a, 0x04000858, 0x00107000, 0x00000000, 0x00004444, 0x0400009e, 0x0011e000, 0x00000000,
|
||||
0x00000004, 0x0200005f, 0x00020012, 0x02000068, 0x00000002, 0x0400009b, 0x00000001, 0x00000001,
|
||||
0x00000001, 0x8a0000df, 0x80000042, 0x00111103, 0x00100012, 0x00000000, 0x00100012, 0x00000001,
|
||||
0x00020006, 0x00107e46, 0x00000000, 0x050000ea, 0x00100022, 0x00000000, 0x0010000a, 0x00000001,
|
||||
0x09000037, 0x00100022, 0x00000000, 0x0010001a, 0x00000000, 0x00004001, 0x00010000, 0x00004001,
|
||||
0x00000000, 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000010, 0x00004001, 0x00000000,
|
||||
0x0010000a, 0x00000000, 0x0010001a, 0x00000000, 0x06000029, 0x00100022, 0x00000000, 0x0002000a,
|
||||
0x00004001, 0x00000001, 0x090000a8, 0x0011e012, 0x00000000, 0x0010001a, 0x00000000, 0x00004001,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x1300008c, 0x00100052, 0x00000000, 0x00004002, 0x00000014,
|
||||
0x00000000, 0x0000001f, 0x00000000, 0x00004002, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
|
||||
0x00020006, 0x00004002, 0x00100000, 0x00000000, 0x00000001, 0x00000000, 0x8b0000df, 0x80000042,
|
||||
0x00111103, 0x00100012, 0x00000000, 0x00100012, 0x00000001, 0x00100006, 0x00000000, 0x00107e46,
|
||||
0x00000000, 0x050000ea, 0x00100082, 0x00000000, 0x0010000a, 0x00000001, 0x09000037, 0x00100082,
|
||||
0x00000000, 0x0010003a, 0x00000000, 0x00004001, 0x00010000, 0x00004001, 0x00000000, 0x0b00008c,
|
||||
0x00100012, 0x00000000, 0x00004001, 0x00000010, 0x00004001, 0x00000000, 0x0010000a, 0x00000000,
|
||||
0x0010003a, 0x00000000, 0x090000a8, 0x0011e012, 0x00000000, 0x0010002a, 0x00000000, 0x00004001,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x0100003e, 0x30494653, 0x00000008, 0x00000100, 0x00000000,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE cs_sparse_query = SHADER_BYTECODE(cs_sparse_query_dxbc);
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
hr = ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));
|
||||
ok(hr == S_OK, "Failed to check feature support, hr %#x.\n", hr);
|
||||
|
||||
if (options.TiledResourcesTier < D3D12_TILED_RESOURCES_TIER_2)
|
||||
{
|
||||
skip("Tiled resources Tier 2 not supported by device.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
memset(&desc_range, 0, sizeof(desc_range));
|
||||
rs_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
rs_desc.pParameters = root_parameters;
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
||||
root_parameters[1].DescriptorTable.pDescriptorRanges = &desc_range;
|
||||
desc_range.NumDescriptors = 1;
|
||||
desc_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
context.pipeline_state = create_compute_pipeline_state(context.device, context.root_signature, cs_sparse_query);
|
||||
|
||||
memset(&heap_desc, 0, sizeof(heap_desc));
|
||||
heap_desc.SizeInBytes = 4 * 1024 * 1024;
|
||||
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
||||
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap);
|
||||
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap_live);
|
||||
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.Width = 64 * 1024 * 1024;
|
||||
desc.Height = 1;
|
||||
desc.DepthOrArraySize = 1;
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
desc.MipLevels = 1;
|
||||
desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
hr = ID3D12Device_CreateReservedResource(context.device, &desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
NULL, &IID_ID3D12Resource, (void**)&sparse);
|
||||
ok(SUCCEEDED(hr), "Failed to create reserved resource, hr #%x.\n", hr);
|
||||
|
||||
{
|
||||
const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { 0 };
|
||||
const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NULL;
|
||||
const UINT offset = 0;
|
||||
const UINT count = desc.Width / (64 * 1024);
|
||||
region_size.UseBox = FALSE;
|
||||
region_size.NumTiles = desc.Width / (64 * 1024);
|
||||
ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, ®ion_start_coordinate, ®ion_size,
|
||||
NULL, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE);
|
||||
}
|
||||
|
||||
region_size.UseBox = FALSE;
|
||||
region_size.NumTiles = 1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { i, 0, 0, 0 };
|
||||
const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NONE;
|
||||
const UINT offset = i;
|
||||
const UINT count = 1;
|
||||
|
||||
ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, ®ion_start_coordinate, ®ion_size,
|
||||
i == 0 ? heap : heap_live, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE);
|
||||
}
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
|
||||
buffer = create_default_buffer(context.device, 128 * 1024,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
cpu = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
|
||||
gpu = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2);
|
||||
memset(&uav_desc, 0, sizeof(uav_desc));
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
uav_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
uav_desc.Buffer.NumElements = 128 * 1024 / 4;
|
||||
uav_desc.Buffer.FirstElement = 0;
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu));
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu));
|
||||
|
||||
memset(&srv_desc, 0, sizeof(srv_desc));
|
||||
srv_desc.Buffer.FirstElement = 0;
|
||||
srv_desc.Buffer.NumElements = 2 * 1024 * 1024;
|
||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
||||
srv_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
|
||||
h_cpu = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu);
|
||||
h_cpu.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
ID3D12Device_CreateShaderResourceView(context.device, sparse, &srv_desc, h_cpu);
|
||||
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu);
|
||||
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(context.list,
|
||||
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu),
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu), sparse, values, 0, NULL);
|
||||
transition_resource_state(context.list, sparse,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 0, 128 * 1024);
|
||||
transition_resource_state(context.list, buffer,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT,
|
||||
&rb, context.queue, context.list);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
ok(get_readback_uint(&rb, 0, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 0, 0, 0));
|
||||
ok(get_readback_uint(&rb, 64 * 1024 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 64 * 1024 / 4, 0, 0));
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Heap_Release(heap);
|
||||
|
||||
/* Access a resource where we can hypothetically access the freed heap memory. */
|
||||
/* On AMD Windows native at least, if we read the freed region, we read garbage, which proves it's not required to unbind explicitly.
|
||||
* We'd read 0 in that case. */
|
||||
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 64 * 1024, 64 * 1024);
|
||||
|
||||
#define EXPLORE_UNDEFINED_BEHAVIOR 0
|
||||
|
||||
#if EXPLORE_UNDEFINED_BEHAVIOR
|
||||
/* This reads unmapped memory. */
|
||||
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 1024, sparse, 1024, 1024);
|
||||
#endif
|
||||
|
||||
transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
h_gpu = ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu);
|
||||
h_gpu.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu);
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0, ID3D12Resource_GetGPUVirtualAddress(buffer));
|
||||
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 1, h_gpu);
|
||||
#if EXPLORE_UNDEFINED_BEHAVIOR
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
#endif
|
||||
|
||||
transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT,
|
||||
&rb, context.queue, context.list);
|
||||
|
||||
#if EXPLORE_UNDEFINED_BEHAVIOR
|
||||
skip("Reading undefined value #%x.\n", get_readback_uint(&rb, 0, 0, 0));
|
||||
skip("Reading value #%x (expect 0).\n", get_readback_uint(&rb, 1, 0, 0));
|
||||
skip("Reading undefined value #%x.\n", get_readback_uint(&rb, 1024 / 4, 0, 0));
|
||||
#endif
|
||||
ok(get_readback_uint(&rb, 2048 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 2048 / 4, 0, 0));
|
||||
ok(get_readback_uint(&rb, 64 * 1024 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 64 * 1024 / 4, 0, 0));
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Resource_Release(buffer);
|
||||
ID3D12Resource_Release(sparse);
|
||||
ID3D12DescriptorHeap_Release(cpu);
|
||||
ID3D12DescriptorHeap_Release(gpu);
|
||||
ID3D12Heap_Release(heap_live);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,159 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "d3d12_crosstest.h"
|
||||
|
||||
void test_primitive_restart_list_topology_stream_output(void)
|
||||
{
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12Resource *counter_buffer, *so_buffer;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_STREAM_OUTPUT_BUFFER_VIEW sobv;
|
||||
struct test_context_desc desc;
|
||||
ID3D12Resource *index_buffer;
|
||||
struct resource_readback rb;
|
||||
struct test_context context;
|
||||
D3D12_INDEX_BUFFER_VIEW ibv;
|
||||
ID3D12CommandQueue *queue;
|
||||
const struct vec4 *data;
|
||||
ID3D12Device *device;
|
||||
uint32_t counter;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
static const D3D12_SO_DECLARATION_ENTRY so_declaration[] =
|
||||
{
|
||||
{0, "SV_Position", 0, 0, 4, 0},
|
||||
};
|
||||
static const struct vec4 expected_output[] =
|
||||
{
|
||||
/* Strip */
|
||||
{ 2000.0f, 2000.0f, 2000.0f, 2000.0f },
|
||||
{ 3000.0f, 3000.0f, 3000.0f, 3000.0f },
|
||||
{ 4000.0f, 4000.0f, 4000.0f, 4000.0f },
|
||||
|
||||
/* List */
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
||||
{ 1.0f, 1.0f, 1.0f, 1.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, -1.0f },
|
||||
{ 9.0f, 9.0f, 9.0f, 9.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, -1.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, -1.0f },
|
||||
{ 2000.0f, 2000.0f, 2000.0f, 2000.0f },
|
||||
{ 3000.0f, 3000.0f, 3000.0f, 3000.0f },
|
||||
{ 4000.0f, 4000.0f, 4000.0f, 4000.0f },
|
||||
|
||||
/* Strip */
|
||||
{ 2000.0f, 2000.0f, 2000.0f, 2000.0f },
|
||||
{ 3000.0f, 3000.0f, 3000.0f, 3000.0f },
|
||||
{ 4000.0f, 4000.0f, 4000.0f, 4000.0f },
|
||||
};
|
||||
static const uint32_t index_data[] = { 0, 1, UINT32_MAX, 9, UINT32_MAX, UINT32_MAX, 2000, 3000, 4000 };
|
||||
static const UINT strides[] = { 16 };
|
||||
|
||||
static const DWORD vs_code[] =
|
||||
{
|
||||
#if 0
|
||||
float4 main(uint vid : SV_VertexID) : SV_Position
|
||||
{
|
||||
if (vid == ~0u)
|
||||
return float4(-1, -1, -1, -1);
|
||||
else
|
||||
return float4(vid, vid, vid, vid);
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x59eaaf80, 0xf7ab5160, 0xf0ce6da4, 0x82ce289b, 0x00000001, 0x00000140, 0x00000003,
|
||||
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978,
|
||||
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003,
|
||||
0x00000000, 0x0000000f, 0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x000000a4, 0x00010050,
|
||||
0x00000029, 0x0100086a, 0x04000060, 0x00101012, 0x00000000, 0x00000006, 0x04000067, 0x001020f2,
|
||||
0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x07000020, 0x00100012, 0x00000000, 0x0010100a,
|
||||
0x00000000, 0x00004001, 0xffffffff, 0x0304001f, 0x0010000a, 0x00000000, 0x08000036, 0x001020f2,
|
||||
0x00000000, 0x00004002, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0x0100003e, 0x01000012,
|
||||
0x05000056, 0x001020f2, 0x00000000, 0x00101006, 0x00000000, 0x0100003e, 0x01000015, 0x0100003e,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE vs = SHADER_BYTECODE(vs_code);
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.root_signature_flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;
|
||||
desc.no_pipeline = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
device = context.device;
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
init_pipeline_state_desc(&pso_desc, context.root_signature, 0, &vs, NULL, NULL);
|
||||
pso_desc.StreamOutput.NumEntries = ARRAY_SIZE(so_declaration);
|
||||
pso_desc.StreamOutput.pSODeclaration = so_declaration;
|
||||
pso_desc.StreamOutput.pBufferStrides = strides;
|
||||
pso_desc.StreamOutput.NumStrides = ARRAY_SIZE(strides);
|
||||
pso_desc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;
|
||||
pso_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
|
||||
counter_buffer = create_default_buffer(device, 32,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
|
||||
so_buffer = create_default_buffer(device, 4096,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
|
||||
index_buffer = create_upload_buffer(device, sizeof(index_data), index_data);
|
||||
sobv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(so_buffer);
|
||||
sobv.SizeInBytes = 4096;
|
||||
sobv.BufferFilledSizeLocation = ID3D12Resource_GetGPUVirtualAddress(counter_buffer);
|
||||
|
||||
ibv.Format = DXGI_FORMAT_R32_UINT;
|
||||
ibv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buffer);
|
||||
ibv.SizeInBytes = sizeof(index_data);
|
||||
|
||||
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
|
||||
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_SOSetTargets(command_list, 0, 1, &sobv);
|
||||
ID3D12GraphicsCommandList_IASetIndexBuffer(command_list, &ibv);
|
||||
|
||||
/* Primitive restart state only applies to strip primitives. */
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
ID3D12GraphicsCommandList_DrawIndexedInstanced(command_list, ARRAY_SIZE(index_data), 1,
|
||||
0, 0, 0);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
ID3D12GraphicsCommandList_DrawIndexedInstanced(command_list, ARRAY_SIZE(index_data), 1,
|
||||
0, 0, 0);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
ID3D12GraphicsCommandList_DrawIndexedInstanced(command_list, ARRAY_SIZE(index_data), 1,
|
||||
0, 0, 0);
|
||||
|
||||
transition_resource_state(command_list, counter_buffer,
|
||||
D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
transition_resource_state(command_list, so_buffer,
|
||||
D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(counter_buffer, DXGI_FORMAT_R32_UINT, &rb, queue, command_list);
|
||||
counter = get_readback_uint(&rb, 0, 0, 0);
|
||||
ok(counter == sizeof(expected_output), "Got unexpected counter %u, expected %u.\n",
|
||||
counter, (unsigned int)sizeof(expected_output));
|
||||
release_resource_readback(&rb);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
get_buffer_readback_with_command_list(so_buffer, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
|
||||
for (i = 0; i < ARRAY_SIZE(expected_output); ++i)
|
||||
{
|
||||
const struct vec4 *expected = &expected_output[i];
|
||||
data = get_readback_vec4(&rb, i, 0);
|
||||
ok(compare_vec4(data, expected, 1),
|
||||
"Got {%.8e, %.8e, %.8e, %.8e}, expected {%.8e, %.8e, %.8e, %.8e}.\n",
|
||||
data->x, data->y, data->z, data->w, expected->x, expected->y, expected->z, expected->w);
|
||||
}
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Resource_Release(index_buffer);
|
||||
ID3D12Resource_Release(counter_buffer);
|
||||
ID3D12Resource_Release(so_buffer);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
static void test_vertex_shader_stream_output(bool use_dxil)
|
||||
{
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
|
|
|
@ -137,12 +137,6 @@ void test_queue_wait(void)
|
|||
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
|
||||
release_resource_readback(&rb);
|
||||
|
||||
if (!vkd3d_test_platform_is_windows())
|
||||
{
|
||||
skip("Wait() is not implemented yet.\n"); /* FIXME */
|
||||
goto skip_tests;
|
||||
}
|
||||
|
||||
/* Wait() before CPU signal */
|
||||
update_buffer_data(cb, 0, sizeof(blue), &blue);
|
||||
queue_wait(queue, fence, 2);
|
||||
|
@ -218,7 +212,6 @@ void test_queue_wait(void)
|
|||
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
|
||||
release_resource_readback(&rb);
|
||||
|
||||
skip_tests:
|
||||
/* Signal() and Wait() in the same command queue */
|
||||
update_buffer_data(cb, 0, sizeof(blue), &blue);
|
||||
queue_signal(queue, fence, 7);
|
||||
|
@ -1194,3 +1187,247 @@ void test_create_fence(void)
|
|||
ok(!refcount, "ID3D12Device has %u references left.\n", (unsigned int)refcount);
|
||||
}
|
||||
|
||||
void test_fence_wait_robustness_inner(bool shared_handles)
|
||||
{
|
||||
VKD3D_UNUSED HANDLE shared_signal = NULL;
|
||||
VKD3D_UNUSED HANDLE shared_drain = NULL;
|
||||
VKD3D_UNUSED HANDLE shared_wait = NULL;
|
||||
ID3D12CommandAllocator *allocator[2];
|
||||
ID3D12Fence *signal_fence_dup = NULL;
|
||||
D3D12_COMMAND_QUEUE_DESC queue_desc;
|
||||
ID3D12Fence *drain_fence_dup = NULL;
|
||||
ID3D12Fence *wait_fence_dup = NULL;
|
||||
ID3D12GraphicsCommandList *list[2];
|
||||
ID3D12CommandQueue *compute_queue;
|
||||
struct test_context context;
|
||||
ID3D12Fence *signal_fence;
|
||||
ID3D12Fence *drain_fence;
|
||||
ID3D12Fence *wait_fence;
|
||||
ID3D12Resource *src;
|
||||
ID3D12Resource *dst;
|
||||
unsigned int i;
|
||||
HANDLE event;
|
||||
UINT value;
|
||||
HRESULT hr;
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
hr = ID3D12Device_CreateFence(context.device, 0,
|
||||
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
|
||||
&IID_ID3D12Fence, (void**)&signal_fence);
|
||||
todo_if(shared_handles) ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Failed to create fence, skipping test ...\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateFence(context.device, 0,
|
||||
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
|
||||
&IID_ID3D12Fence, (void**)&wait_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Failed to create fence, skipping test ...\n");
|
||||
ID3D12Fence_Release(signal_fence);
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateFence(context.device, 0,
|
||||
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
|
||||
&IID_ID3D12Fence, (void**)&drain_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Failed to create fence, skipping test ...\n");
|
||||
ID3D12Fence_Release(signal_fence);
|
||||
ID3D12Fence_Release(wait_fence);
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
if (shared_handles)
|
||||
{
|
||||
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)signal_fence,
|
||||
NULL, GENERIC_ALL, NULL, &shared_signal);
|
||||
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)wait_fence,
|
||||
NULL, GENERIC_ALL, NULL, &shared_wait);
|
||||
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)drain_fence,
|
||||
NULL, GENERIC_ALL, NULL, &shared_drain);
|
||||
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
|
||||
|
||||
ID3D12Fence_Release(signal_fence);
|
||||
ID3D12Fence_Release(wait_fence);
|
||||
ID3D12Fence_Release(drain_fence);
|
||||
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_signal, &IID_ID3D12Fence, (void**)&signal_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_wait, &IID_ID3D12Fence, (void**)&wait_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_drain, &IID_ID3D12Fence, (void**)&drain_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
|
||||
/* OpenSharedHandle takes a kernel level reference on the HANDLE. */
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_signal, &IID_ID3D12Fence, (void**)&signal_fence_dup);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_wait, &IID_ID3D12Fence, (void**)&wait_fence_dup);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_drain, &IID_ID3D12Fence, (void**)&drain_fence_dup);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
|
||||
/* Observed behavior: Closing the last reference to the kernel HANDLE object unblocks all waiters.
|
||||
* This isn't really implementable in Wine as it stands since applications are free to share
|
||||
* the HANDLE and Dupe it arbitrarily.
|
||||
* For now, assume this is not a thing, we can report TDR-like situations if this comes up in practice. */
|
||||
if (shared_signal)
|
||||
CloseHandle(shared_signal);
|
||||
if (shared_wait)
|
||||
CloseHandle(shared_wait);
|
||||
if (shared_drain)
|
||||
CloseHandle(shared_drain);
|
||||
}
|
||||
#endif
|
||||
|
||||
memset(&queue_desc, 0, sizeof(queue_desc));
|
||||
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
||||
queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
|
||||
|
||||
src = create_default_buffer(context.device, 256 * 1024 * 1024, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
dst = create_default_buffer(context.device, 256 * 1024 * 1024, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
|
||||
ID3D12Device_CreateCommandQueue(context.device, &queue_desc, &IID_ID3D12CommandQueue, (void**)&compute_queue);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ID3D12Device_CreateCommandAllocator(context.device, D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
&IID_ID3D12CommandAllocator, (void**)&allocator[i]);
|
||||
ID3D12Device_CreateCommandList(context.device, 0, D3D12_COMMAND_LIST_TYPE_COMPUTE, allocator[i], NULL,
|
||||
&IID_ID3D12GraphicsCommandList, (void**)&list[i]);
|
||||
}
|
||||
|
||||
/* Heavy copy action. */
|
||||
for (i = 0; i < 128; i++)
|
||||
{
|
||||
ID3D12GraphicsCommandList_CopyResource(list[0], dst, src);
|
||||
ID3D12GraphicsCommandList_CopyResource(list[1], src, dst);
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_Close(list[0]);
|
||||
ID3D12GraphicsCommandList_Close(list[1]);
|
||||
|
||||
/* Note on ref-count checks: The debug layers can take transient public ref-counts it seems. */
|
||||
|
||||
ID3D12CommandQueue_ExecuteCommandLists(context.queue, 1, (ID3D12CommandList * const *)&list[0]);
|
||||
ID3D12CommandQueue_Signal(context.queue, signal_fence, 1);
|
||||
/* Validate that signal/wait does not take public ref-counts. */
|
||||
value = get_refcount(signal_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time. */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
|
||||
/* Try waiting for a signal that never comes. We'll be able to unblock this wait
|
||||
* when we fully release the fence. */
|
||||
ID3D12CommandQueue_Wait(compute_queue, signal_fence, UINT64_MAX);
|
||||
value = get_refcount(signal_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
ID3D12CommandQueue_Signal(compute_queue, wait_fence, 1);
|
||||
value = get_refcount(wait_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time. */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(wait_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
|
||||
ID3D12CommandQueue_Wait(compute_queue, wait_fence, 1);
|
||||
value = get_refcount(wait_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
/* Check that we can queue up event completion.
|
||||
* Again, verify that releasing the fence unblocks all waiters ... */
|
||||
event = create_event();
|
||||
ID3D12Fence_SetEventOnCompletion(signal_fence, UINT64_MAX, event);
|
||||
|
||||
if (signal_fence_dup)
|
||||
ID3D12Fence_Release(signal_fence_dup);
|
||||
if (wait_fence_dup)
|
||||
ID3D12Fence_Release(wait_fence_dup);
|
||||
|
||||
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time.
|
||||
* Makes sure that the fences aren't signalled when we try to free them.
|
||||
* (Sure, there is a theoretical race condition if GPU completes between this check and the release, but seriously ...). */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(wait_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
|
||||
/* Test that it's valid to release fence while it's in flight.
|
||||
* If we don't cause device lost and drain_fence is waited on successfully we pass the test. */
|
||||
value = ID3D12Fence_Release(signal_fence);
|
||||
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
|
||||
value = ID3D12Fence_Release(wait_fence);
|
||||
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
|
||||
|
||||
ID3D12CommandQueue_ExecuteCommandLists(compute_queue, 1, (ID3D12CommandList * const *)&list[1]);
|
||||
ID3D12CommandQueue_Signal(compute_queue, drain_fence, 1);
|
||||
|
||||
wait_event(event, INFINITE);
|
||||
destroy_event(event);
|
||||
ID3D12Fence_SetEventOnCompletion(drain_fence, 1, NULL);
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(drain_fence);
|
||||
ok(value == 1, "Expected fence wait value 1, but got %u.\n", value);
|
||||
|
||||
if (drain_fence_dup)
|
||||
{
|
||||
/* Check we observe the counter in sibling fences as well. */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(drain_fence_dup);
|
||||
ok(value == 1, "Expected fence wait value 1, but got %u.\n", value);
|
||||
ID3D12Fence_Release(drain_fence_dup);
|
||||
}
|
||||
|
||||
value = ID3D12Fence_Release(drain_fence);
|
||||
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
|
||||
|
||||
/* Early freeing of fences might signal the drain fence too early, causing GPU hang. */
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
wait_queue_idle(context.device, compute_queue);
|
||||
|
||||
ID3D12CommandQueue_Release(compute_queue);
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ID3D12CommandAllocator_Release(allocator[i]);
|
||||
ID3D12GraphicsCommandList_Release(list[i]);
|
||||
}
|
||||
ID3D12Resource_Release(dst);
|
||||
ID3D12Resource_Release(src);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_fence_wait_robustness(void)
|
||||
{
|
||||
test_fence_wait_robustness_inner(false);
|
||||
}
|
||||
|
||||
void test_fence_wait_robustness_shared(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
test_fence_wait_robustness_inner(true);
|
||||
#else
|
||||
skip("Shared fences not supported on native Linux build.\n");
|
||||
#endif
|
||||
}
|
|
@ -1784,16 +1784,16 @@ void test_hull_shader_vertex_input_patch_constant_phase(void)
|
|||
d.c = (155.0 / 255.0).xxxx;
|
||||
|
||||
if (vid == 0)
|
||||
d.position = float4(-1, -1, 0, 0);
|
||||
d.position = float4(-1, -1, 0, 1);
|
||||
else if (vid == 1)
|
||||
d.position = float4(-1, 1, 0, 0);
|
||||
d.position = float4(-1, 3, 0, 1);
|
||||
else
|
||||
d.position = float4(3, 1, 0, 0);
|
||||
d.position = float4(3, -1, 0, 1);
|
||||
|
||||
return d;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0xae8bdf44, 0x9e5a4ce7, 0xf21eca02, 0x3e1dd4ac, 0x00000001, 0x00000234, 0x00000003,
|
||||
0x43425844, 0x64268a24, 0xcd8d91b3, 0x70514911, 0x95555eb8, 0x00000001, 0x00000234, 0x00000003,
|
||||
0x0000002c, 0x00000060, 0x000000e8, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978,
|
||||
0x4e47534f, 0x00000080, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000001, 0x00000003,
|
||||
|
@ -1805,9 +1805,9 @@ void test_hull_shader_vertex_input_patch_constant_phase(void)
|
|||
0x001020f2, 0x00000001, 0x03000065, 0x001020f2, 0x00000002, 0x03000065, 0x001020f2, 0x00000003,
|
||||
0x02000068, 0x00000001, 0x07000020, 0x00100012, 0x00000000, 0x0010100a, 0x00000000, 0x00004001,
|
||||
0x00000001, 0x0f000037, 0x001000f2, 0x00000000, 0x00100006, 0x00000000, 0x00004002, 0xbf800000,
|
||||
0x3f800000, 0x00000000, 0x00000000, 0x00004002, 0x40400000, 0x3f800000, 0x00000000, 0x00000000,
|
||||
0x40400000, 0x00000000, 0x3f800000, 0x00004002, 0x40400000, 0xbf800000, 0x00000000, 0x3f800000,
|
||||
0x0c000037, 0x001020f2, 0x00000000, 0x00101006, 0x00000000, 0x00100e46, 0x00000000, 0x00004002,
|
||||
0xbf800000, 0xbf800000, 0x00000000, 0x00000000, 0x08000036, 0x001020f2, 0x00000001, 0x00004002,
|
||||
0xbf800000, 0xbf800000, 0x00000000, 0x3f800000, 0x08000036, 0x001020f2, 0x00000001, 0x00004002,
|
||||
0x3ec8c8c9, 0x3ec8c8c9, 0x3ec8c8c9, 0x3ec8c8c9, 0x08000036, 0x001020f2, 0x00000002, 0x00004002,
|
||||
0x3f48c8c9, 0x3f48c8c9, 0x3f48c8c9, 0x3f48c8c9, 0x08000036, 0x001020f2, 0x00000003, 0x00004002,
|
||||
0x3f1b9b9c, 0x3f1b9b9c, 0x3f1b9b9c, 0x3f1b9b9c, 0x0100003e,
|
||||
|
@ -1987,7 +1987,6 @@ void test_hull_shader_vertex_input_patch_constant_phase(void)
|
|||
|
||||
transition_resource_state(command_list, context.render_target,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
bug_if(is_radv_device(context.device))
|
||||
check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff9bc864, 0);
|
||||
|
||||
destroy_test_context(&context);
|
||||
|
|
|
@ -27,6 +27,10 @@ PFN_D3D12_GET_DEBUG_INTERFACE pfn_D3D12GetDebugInterface;
|
|||
const char *vkd3d_test_platform = "other";
|
||||
struct vkd3d_test_state_context vkd3d_test_state;
|
||||
|
||||
#ifdef _WIN32
|
||||
RENDERDOC_API_1_0_0 *renderdoc_api;
|
||||
#endif
|
||||
|
||||
bool compare_float(float f, float g, int ulps)
|
||||
{
|
||||
int x, y;
|
||||
|
@ -209,8 +213,8 @@ void upload_texture_data_(unsigned int line, ID3D12Resource *texture,
|
|||
{
|
||||
D3D12_TEXTURE_COPY_LOCATION dst_location, src_location;
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts;
|
||||
uint64_t *row_sizes, required_size;
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
UINT64 *row_sizes, required_size;
|
||||
ID3D12Resource *upload_buffer;
|
||||
D3D12_MEMCPY_DEST dst_data;
|
||||
ID3D12Device *device;
|
||||
|
@ -842,6 +846,9 @@ ID3D12CommandSignature *create_command_signature_(unsigned int line,
|
|||
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
|
||||
signature_desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS);
|
||||
break;
|
||||
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
|
||||
signature_desc.ByteStride = sizeof(D3D12_DISPATCH_RAYS_DESC);
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
@ -858,6 +865,7 @@ ID3D12CommandSignature *create_command_signature_(unsigned int line,
|
|||
|
||||
bool init_compute_test_context_(unsigned int line, struct test_context *context)
|
||||
{
|
||||
D3D12_COMMAND_LIST_TYPE command_list_type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
|
||||
ID3D12Device *device;
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -870,14 +878,21 @@ bool init_compute_test_context_(unsigned int line, struct test_context *context)
|
|||
}
|
||||
device = context->device;
|
||||
|
||||
context->queue = create_command_queue_(line, device,
|
||||
D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
#ifdef _WIN32
|
||||
begin_renderdoc_capturing(device);
|
||||
/* Workaround RenderDoc bug. It expects a DIRECT command queue to exist. */
|
||||
if (renderdoc_api)
|
||||
command_list_type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
||||
#endif
|
||||
|
||||
hr = ID3D12Device_CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
context->queue = create_command_queue_(line, device,
|
||||
command_list_type, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
|
||||
hr = ID3D12Device_CreateCommandAllocator(device, command_list_type,
|
||||
&IID_ID3D12CommandAllocator, (void **)&context->allocator);
|
||||
ok_(line)(hr == S_OK, "Failed to create command allocator, hr %#x.\n", hr);
|
||||
|
||||
hr = ID3D12Device_CreateCommandList(device, 0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
hr = ID3D12Device_CreateCommandList(device, 0, command_list_type,
|
||||
context->allocator, NULL, &IID_ID3D12GraphicsCommandList, (void **)&context->list);
|
||||
ok_(line)(hr == S_OK, "Failed to create command list, hr %#x.\n", hr);
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
#ifndef __VKD3D_D3D12_TEST_UTILS_H
|
||||
#define __VKD3D_D3D12_TEST_UTILS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "renderdoc_app.h"
|
||||
#endif
|
||||
|
||||
#define SHADER_BYTECODE(code) {code,sizeof(code)}
|
||||
|
||||
#define wait_queue_idle(a, b) wait_queue_idle_(__LINE__, a, b)
|
||||
|
@ -78,7 +82,7 @@ static inline bool compare_color(DWORD c1, DWORD c2, BYTE max_diff)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline D3D12_SHADER_BYTECODE shader_bytecode(const DWORD *code, size_t size)
|
||||
static inline D3D12_SHADER_BYTECODE shader_bytecode(const void *code, size_t size)
|
||||
{
|
||||
D3D12_SHADER_BYTECODE shader_bytecode = { code, size };
|
||||
return shader_bytecode;
|
||||
|
@ -302,6 +306,8 @@ static inline unsigned int format_size(DXGI_FORMAT format)
|
|||
case DXGI_FORMAT_R8G8_UNORM:
|
||||
return 16;
|
||||
case DXGI_FORMAT_R16G16B16A16_TYPELESS:
|
||||
case DXGI_FORMAT_R32G32_UINT:
|
||||
case DXGI_FORMAT_R32G32_TYPELESS:
|
||||
return 8;
|
||||
case DXGI_FORMAT_R32_TYPELESS:
|
||||
case DXGI_FORMAT_D32_FLOAT:
|
||||
|
@ -319,6 +325,8 @@ static inline unsigned int format_size(DXGI_FORMAT format)
|
|||
case DXGI_FORMAT_R8G8B8A8_UNORM:
|
||||
case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
|
||||
case DXGI_FORMAT_R8G8B8A8_UINT:
|
||||
case DXGI_FORMAT_R8G8B8A8_SINT:
|
||||
case DXGI_FORMAT_R8G8B8A8_SNORM:
|
||||
case DXGI_FORMAT_B8G8R8A8_UNORM:
|
||||
return 4;
|
||||
case DXGI_FORMAT_R16_FLOAT:
|
||||
|
@ -960,6 +968,16 @@ static inline ID3D12PipelineState *create_pipeline_state_dxil_(unsigned int line
|
|||
return pipeline_state;
|
||||
}
|
||||
|
||||
#define create_pipeline_state_from_stream(device, desc, state) create_pipeline_state_from_stream_(device, desc, sizeof(*desc), state)
|
||||
static inline HRESULT create_pipeline_state_from_stream_(ID3D12Device2 *device, void *stream, size_t size, ID3D12PipelineState **state)
|
||||
{
|
||||
D3D12_PIPELINE_STATE_STREAM_DESC pipeline_desc;
|
||||
pipeline_desc.SizeInBytes = size;
|
||||
pipeline_desc.pPipelineStateSubobjectStream = stream;
|
||||
|
||||
return ID3D12Device2_CreatePipelineState(device, &pipeline_desc, &IID_ID3D12PipelineState, (void **)state);
|
||||
}
|
||||
|
||||
struct test_context_desc
|
||||
{
|
||||
unsigned int rt_width, rt_height, rt_array_size;
|
||||
|
@ -1035,6 +1053,45 @@ static inline void create_render_target_(unsigned int line, struct test_context
|
|||
ID3D12Device_CreateRenderTargetView(context->device, *render_target, NULL, *rtv);
|
||||
}
|
||||
|
||||
/* Utility code for capturing native D3D12 tests, which is why this only covers Win32.
|
||||
* Launch the d3d12.exe test binary from RenderDoc UI.
|
||||
* For Vulkan capturing, use VKD3D_AUTO_CAPTURE_COUNTS and friends instead. */
|
||||
#ifdef _WIN32
|
||||
extern RENDERDOC_API_1_0_0 *renderdoc_api;
|
||||
|
||||
static inline void begin_renderdoc_capturing(ID3D12Device *device)
|
||||
{
|
||||
pRENDERDOC_GetAPI get_api;
|
||||
HANDLE renderdoc;
|
||||
FARPROC fn_ptr;
|
||||
|
||||
if (!renderdoc_api)
|
||||
{
|
||||
renderdoc = GetModuleHandleA("renderdoc.dll");
|
||||
if (renderdoc)
|
||||
{
|
||||
fn_ptr = GetProcAddress(renderdoc, "RENDERDOC_GetAPI");
|
||||
if (fn_ptr)
|
||||
{
|
||||
/* Workaround compiler warnings about casting to function pointer. */
|
||||
memcpy(&get_api, &fn_ptr, sizeof(fn_ptr));
|
||||
if (!get_api(eRENDERDOC_API_Version_1_0_0, (void **)&renderdoc_api))
|
||||
renderdoc_api = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (renderdoc_api)
|
||||
renderdoc_api->StartFrameCapture(device, NULL);
|
||||
}
|
||||
|
||||
static inline void end_renderdoc_capturing(ID3D12Device *device)
|
||||
{
|
||||
if (renderdoc_api)
|
||||
renderdoc_api->EndFrameCapture(device, NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define init_test_context(context, desc) init_test_context_(__LINE__, context, desc)
|
||||
static inline bool init_test_context_(unsigned int line, struct test_context *context,
|
||||
const struct test_context_desc *desc)
|
||||
|
@ -1052,6 +1109,10 @@ static inline bool init_test_context_(unsigned int line, struct test_context *co
|
|||
}
|
||||
device = context->device;
|
||||
|
||||
#ifdef _WIN32
|
||||
begin_renderdoc_capturing(device);
|
||||
#endif
|
||||
|
||||
context->queue = create_command_queue_(line, device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
|
||||
hr = ID3D12Device_CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
|
@ -1103,6 +1164,10 @@ static inline void destroy_test_context_(unsigned int line, struct test_context
|
|||
{
|
||||
ULONG refcount;
|
||||
|
||||
#ifdef _WIN32
|
||||
end_renderdoc_capturing(context->device);
|
||||
#endif
|
||||
|
||||
if (context->pipeline_state)
|
||||
ID3D12PipelineState_Release(context->pipeline_state);
|
||||
if (context->root_signature)
|
||||
|
@ -1335,4 +1400,184 @@ create_32bit_constants_root_signature_(__LINE__, a, b, c, e, 0)
|
|||
#define init_depth_stencil(a, b, c, d, e, f, g, h, i) init_depth_stencil_(__LINE__, a, b, c, d, e, f, g, h, i)
|
||||
#define destroy_depth_stencil(depth_stencil) destroy_depth_stencil_(__LINE__, depth_stencil)
|
||||
|
||||
union d3d12_root_signature_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
ID3D12RootSignature *root_signature;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_shader_bytecode_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_SHADER_BYTECODE shader_bytecode;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_stream_output_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_STREAM_OUTPUT_DESC stream_output_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_blend_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_BLEND_DESC blend_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_sample_mask_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
UINT sample_mask;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_rasterizer_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_RASTERIZER_DESC rasterizer_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_depth_stencil_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_input_layout_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_INPUT_LAYOUT_DESC input_layout;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_ib_strip_cut_value_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE strip_cut_value;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_primitive_topology_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE primitive_topology_type;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_render_target_formats_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_RT_FORMAT_ARRAY render_target_formats;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_depth_stencil_format_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
DXGI_FORMAT depth_stencil_format;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_sample_desc_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
DXGI_SAMPLE_DESC sample_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_node_mask_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
UINT node_mask;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_cached_pso_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_CACHED_PIPELINE_STATE cached_pso;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_flags_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_PIPELINE_STATE_FLAGS flags;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_depth_stencil1_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_DEPTH_STENCIL_DESC1 depth_stencil_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
union d3d12_view_instancing_subobject
|
||||
{
|
||||
struct
|
||||
{
|
||||
D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type;
|
||||
D3D12_VIEW_INSTANCING_DESC view_instancing_desc;
|
||||
};
|
||||
void *dummy_align;
|
||||
};
|
||||
|
||||
#endif /* __VKD3D_D3D12_TEST_UTILS_H */
|
||||
|
|
|
@ -74,6 +74,7 @@ decl_test(test_map_resource);
|
|||
decl_test(test_map_placed_resources);
|
||||
decl_test(test_bundle_state_inheritance);
|
||||
decl_test(test_shader_instructions);
|
||||
decl_test(test_shader_instructions_dxil);
|
||||
decl_test(test_compute_shader_instructions);
|
||||
decl_test(test_discard_instruction);
|
||||
decl_test(test_shader_interstage_interface);
|
||||
|
@ -122,6 +123,8 @@ decl_test(test_tgsm);
|
|||
decl_test(test_uav_load);
|
||||
decl_test(test_cs_uav_store);
|
||||
decl_test(test_uav_counters);
|
||||
decl_test(test_uav_counter_null_behavior_dxbc);
|
||||
decl_test(test_uav_counter_null_behavior_dxil);
|
||||
decl_test(test_decrement_uav_counter);
|
||||
decl_test(test_atomic_instructions_dxbc);
|
||||
decl_test(test_atomic_instructions_dxil);
|
||||
|
@ -134,6 +137,7 @@ decl_test(test_resolve_non_issued_query_data);
|
|||
decl_test(test_resolve_query_data_in_different_command_list);
|
||||
decl_test(test_resolve_query_data_in_reordered_command_list);
|
||||
decl_test(test_execute_indirect);
|
||||
decl_test(test_execute_indirect_state);
|
||||
decl_test(test_dispatch_zero_thread_groups);
|
||||
decl_test(test_unaligned_vertex_stride);
|
||||
decl_test(test_zero_vertex_stride);
|
||||
|
@ -145,6 +149,7 @@ decl_test(test_copy_texture);
|
|||
decl_test(test_copy_texture_buffer);
|
||||
decl_test(test_copy_buffer_texture);
|
||||
decl_test(test_copy_block_compressed_texture);
|
||||
decl_test(test_copy_buffer_overlap);
|
||||
decl_test(test_separate_bindings);
|
||||
decl_test(test_face_culling_dxbc);
|
||||
decl_test(test_face_culling_dxil);
|
||||
|
@ -197,6 +202,7 @@ decl_test(test_primitive_restart);
|
|||
decl_test(test_index_buffer_edge_case_stream_output);
|
||||
decl_test(test_vertex_shader_stream_output_dxbc);
|
||||
decl_test(test_vertex_shader_stream_output_dxil);
|
||||
decl_test(test_primitive_restart_list_topology_stream_output);
|
||||
decl_test(test_read_write_subresource);
|
||||
decl_test(test_queue_wait);
|
||||
decl_test(test_graphics_compute_queue_synchronization);
|
||||
|
@ -222,6 +228,7 @@ decl_test(test_bindless_uav_counter_sm51);
|
|||
decl_test(test_bindless_uav_counter_dxil);
|
||||
decl_test(test_bindless_bufinfo_sm51);
|
||||
decl_test(test_bindless_bufinfo_dxil);
|
||||
decl_test(test_bindless_heap_sm66);
|
||||
decl_test(test_stencil_export_dxbc);
|
||||
decl_test(test_stencil_export_dxil);
|
||||
decl_test(test_raytracing);
|
||||
|
@ -241,6 +248,7 @@ decl_test(test_get_cached_blob);
|
|||
decl_test(test_pipeline_library);
|
||||
decl_test(test_buffers_oob_behavior_dxbc);
|
||||
decl_test(test_buffers_oob_behavior_dxil);
|
||||
decl_test(test_buffers_oob_behavior_vectorized_byte_address);
|
||||
decl_test(test_typed_buffers_many_objects_dxbc);
|
||||
decl_test(test_typed_buffers_many_objects_dxil);
|
||||
decl_test(test_create_pipeline_with_null_root_signature);
|
||||
|
@ -252,6 +260,8 @@ decl_test(test_vrs_dxil);
|
|||
decl_test(test_vrs_image);
|
||||
decl_test(test_stress_suballocation);
|
||||
decl_test(test_stress_suballocation_multithread);
|
||||
decl_test(test_stress_suballocation_rebar);
|
||||
decl_test(test_stress_fallback_render_target_allocation_device);
|
||||
decl_test(test_placed_image_alignment);
|
||||
decl_test(test_root_parameter_preservation);
|
||||
decl_test(test_cbv_hoisting_sm51);
|
||||
|
@ -269,7 +279,45 @@ decl_test(test_sv_barycentric);
|
|||
decl_test(test_shader_fp16);
|
||||
decl_test(test_shader_sm62_denorm);
|
||||
decl_test(test_shader_sm64_packed);
|
||||
decl_test(test_shader_sm66_packed);
|
||||
decl_test(test_shader_sm65_wave_intrinsics);
|
||||
decl_test(test_shader_sm66_wave_size);
|
||||
decl_test(test_shader_sm66_quad_op_semantics);
|
||||
decl_test(test_shader_sm66_compute_derivatives);
|
||||
decl_test(test_shader_sm66_64bit_atomics);
|
||||
decl_test(test_shader_sm66_is_helper_lane);
|
||||
decl_test(test_get_copyable_footprints_planar);
|
||||
decl_test(test_depth_stencil_test_no_dsv);
|
||||
decl_test(test_depth_stencil_layout_tracking);
|
||||
decl_test(test_copy_buffer_to_depth_stencil);
|
||||
decl_test(test_map_texture_validation);
|
||||
decl_test(test_read_write_subresource_2d);
|
||||
decl_test(test_read_subresource_rt);
|
||||
decl_test(test_integer_blending_pipeline_state);
|
||||
decl_test(test_discard_resource_uav);
|
||||
decl_test(test_unbound_rtv_rendering);
|
||||
decl_test(test_raytracing_local_rs_static_sampler);
|
||||
decl_test(test_raytracing_local_rs_static_sampler_collection);
|
||||
decl_test(test_rayquery);
|
||||
decl_test(test_typed_srv_uav_cast);
|
||||
decl_test(test_typed_srv_cast_clear);
|
||||
decl_test(test_aliasing_barrier_edge_cases);
|
||||
decl_test(test_mesh_shader_create_pipeline);
|
||||
decl_test(test_mesh_shader_rendering);
|
||||
decl_test(test_mesh_shader_execute_indirect);
|
||||
decl_test(test_amplification_shader);
|
||||
decl_test(test_advanced_cbv_layout);
|
||||
decl_test(test_shader_waveop_maximal_convergence);
|
||||
decl_test(test_uav_3d_sliced_view);
|
||||
decl_test(test_pipeline_no_ps_nonzero_rts);
|
||||
decl_test(test_root_descriptor_offset_sign);
|
||||
decl_test(test_raytracing_no_global_root_signature);
|
||||
decl_test(test_raytracing_missing_required_objects);
|
||||
decl_test(test_raytracing_reject_duplicate_objects);
|
||||
decl_test(test_raytracing_embedded_subobjects);
|
||||
decl_test(test_raytracing_default_association_tiebreak);
|
||||
decl_test(test_raytracing_collection_identifiers);
|
||||
decl_test(test_fence_wait_robustness);
|
||||
decl_test(test_fence_wait_robustness_shared);
|
||||
decl_test(test_root_signature_empty_blob);
|
||||
decl_test(test_sparse_buffer_memory_lifetime);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue