mesa/src/gallium/drivers/virgl/ci/traces-virgl.yml

227 lines
7.8 KiB
YAML
Raw Normal View History

%YAML 1.2
---
traces-db:
download-url: "https://s3.freedesktop.org/mesa-tracie-public/"
traces:
glmark2/desktop:windows=4:effect=blur:blur-radius=5:passes=1:separable=true-v2.trace:
gl-virgl:
checksum: 2fc8433c4a38b796173bda2bcfb924cc
glmark2/jellyfish-v2.trace:
gl-virgl:
checksum: 2112a9a5519f39483735509f2ccc61af
glxgears/glxgears-2-v2.trace:
gl-virgl:
checksum: f8eba0fec6e3e0af9cb09844bc73bdc8
gputest/furmark-v2.trace:
gl-virgl:
checksum: 57ddd36b117adc9216c65c10d914a37e
gputest/pixmark-piano-v2.trace:
gl-virgl:
checksum: 0d875bda7edc01698342b157c6f51500
gputest/triangle-v2.trace:
gl-virgl:
checksum: 7812de00011a3a059892e36cea19c696
humus/Portals-v2.trace:
gl-virgl:
checksum: b697edce7776f1afe294a7e80dfc013e
0ad/0ad-v2.trace:
gl-virgl:
checksum: 350e0cf64d124ba98d90106f61775eb4
glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=false-v2.trace:
gl-virgl:
checksum: f80431e56327354b4c88cc45c7e6633a
glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=subdata:interleave=false-v2.trace:
gl-virgl:
checksum: 81e12bfa4ae3b7e63b01edbed71a5941
glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=true-v2.trace:
gl-virgl:
checksum: 08e6d00fe3f4414ebfadc9e5f3c3bf0e
glmark2/bump:bump-render=height-v2.trace:
gl-virgl:
checksum: 4d5211dfb0fd82a1a1dbb498dc2e5b8b
glmark2/bump:bump-render=high-poly-v2.trace:
gl-virgl:
checksum: 4b4d4a4b7bb1341bbd0299c7eb3a6ac9
glmark2/bump:bump-render=normals-v2.trace:
gl-virgl:
checksum: 832e5baf289b27dd84a665f1c85f57c2
glmark2/conditionals:vertex-steps=0:fragment-steps=0-v2.trace:
gl-virgl:
checksum: b78f28d97b675fcc7649cced3930650a
glmark2/conditionals:vertex-steps=0:fragment-steps=5-v2.trace:
gl-virgl:
checksum: d0782a516f06a6dddac4f1e1249f41e7
glmark2/conditionals:vertex-steps=5:fragment-steps=0-v2.trace:
gl-virgl:
checksum: 1ae280a9c6cae495f2d272516a52167e
glmark2/desktop:windows=4:effect=shadow-v2.trace:
gl-virgl:
checksum: d4b3e8338327859a029c7267c9916524
glmark2/effect2d:kernel=0,1,0;1,-4,1;0,1,0;-v2.trace:
gl-virgl:
checksum: 35584880539813436d87bfcbe22cf59b
glmark2/effect2d:kernel=1,1,1,1,1;1,1,1,1,1;1,1,1,1,1;-v2.trace:
gl-virgl:
checksum: b80963dae6ecf40c83bfb16943ef1011
glmark2/function:fragment-steps=5:fragment-complexity=low-v2.trace:
gl-virgl:
checksum: da10cb29cab30c5c068e722b5da7c2e5
glmark2/function:fragment-steps=5:fragment-complexity=medium-v2.trace:
gl-virgl:
checksum: 8e40504d9f2ead8c0d02604291bff1b6
glmark2/build:use-vbo=false-v2.trace:
gl-virgl:
checksum: 024fc485e1f33461313c956ab1b73bdf
glmark2/build:use-vbo=true-v2.trace:
gl-virgl:
checksum: 48c45d16cd410a71aea1a12a73e257d3
glmark2/ideas:speed=10000-v2.trace:
gl-virgl:
checksum: db78cfb035213e31e1435b637b1a8f19
glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-loop=false-v2.trace:
gl-virgl:
checksum: 7fee2e864e015353ace431d51d41bb22
glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-uniform=false-v2.trace:
gl-virgl:
checksum: c87127a5c3256c1fe7c79f7931b8f9df
glmark2/loop:vertex-steps=5:fragment-steps=5:fragment-uniform=true-v2.trace:
gl-virgl:
checksum: 5fec1f728bda86891db4243130546187
glmark2/pulsar:quads=5:texture=false:light=false-v2.trace:
gl-virgl:
checksum: 3e0e6675fb65e00f9128138ff08c2634
glmark2/refract-v2.trace:
gl-virgl:
checksum: cdadfee0518b964433d80c01329ec191
glmark2/shading:shading=blinn-phong-inf-v2.trace:
gl-virgl:
checksum: 36b07dad759ca65e52f1abf1667e7ca8
glmark2/shading:shading=cel-v2.trace:
gl-virgl:
checksum: cb41cf2531a06d65f6e4f442ab62ae8d
glmark2/shading:shading=gouraud-v2.trace:
gl-virgl:
checksum: 3e5469d5038d7cc94ef3549ce9d8c385
glmark2/shading:shading=phong-v2.trace:
gl-virgl:
checksum: e40abcbb4cfbbbfb499d4b0e6d668f41
glmark2/shadow-v2.trace:
gl-virgl:
checksum: 2bb7290f8559ff93305c0e29f3d671e1
glmark2/texture:texture-filter=linear-v2.trace:
gl-virgl:
checksum: 914fd8dddb23751d9a187a979d881abb
glmark2/texture:texture-filter=mipmap-v2.trace:
gl-virgl:
checksum: ea1939f3c4e8dd9cdbc26d41f9dc891a
glmark2/texture:texture-filter=nearest-v2.trace:
gl-virgl:
checksum: 1ae652bdebd1188ab912a800a4c37166
gputest/gimark-v2.trace:
gl-virgl:
label: [crash]
gputest/pixmark-julia-fp32-v2.trace:
gl-virgl:
nir/opt_algebraic: Fuse c - a * b to FMA Algebraically it is clear that -(a * b) + c = (-a) * b + c = fma(-a, b, c) But this is not clear from the NIR ('fadd', ('fneg', ('fmul', a, b)), c) Add rules to handle this case specially. Note we don't necessarily want to solve this by pushing fneg into fmul, because the rule opt_algebraic (not the late part where FMA fusing happens) specifically pulls fneg out of fmul to push fneg up multiplication chains. Noticed in the big glmark2 "terrain" shader, which has a cycle count reduced by 22% on Mali-G57 thanks to having this pattern a ton and being FMA bound. BEFORE: 1249 inst, 16.015625 cycles, 16.015625 fma, ... 632 quadwords AFTER: 997 inst, 12.437500 cycles, .... 504 quadwords Results on the same shader on AGX are also quite dramatic: BEFORE: 1294 inst, 8600 bytes, 50 halfregs, ... AFTER: 1154 inst, 8040 bytes, 50 halfregs, ... Similar rules apply for fabs. v2: Use a loop over the bit sizes (suggested by Emma). shader-db on Valhall (open + small subset of closed), results on Bifrost are similar: total instructions in shared programs: 167975 -> 164970 (-1.79%) instructions in affected programs: 92642 -> 89637 (-3.24%) helped: 492 HURT: 25 helped stats (abs) min: 1.0 max: 252.0 x̄: 6.25 x̃: 3 helped stats (rel) min: 0.30% max: 20.18% x̄: 3.21% x̃: 2.91% HURT stats (abs) min: 1.0 max: 5.0 x̄: 2.80 x̃: 3 HURT stats (rel) min: 0.46% max: 9.09% x̄: 3.89% x̃: 3.37% 95% mean confidence interval for instructions value: -6.95 -4.68 95% mean confidence interval for instructions %-change: -3.08% -2.65% Instructions are helped. total cycles in shared programs: 10556.89 -> 10538.98 (-0.17%) cycles in affected programs: 265.56 -> 247.66 (-6.74%) helped: 88 HURT: 2 helped stats (abs) min: 0.015625 max: 3.578125 x̄: 0.20 x̃: 0 helped stats (rel) min: 0.65% max: 22.34% x̄: 5.65% x̃: 4.25% HURT stats (abs) min: 0.0625 max: 0.0625 x̄: 0.06 x̃: 0 HURT stats (rel) min: 8.33% max: 12.50% x̄: 10.42% x̃: 10.42% 95% mean confidence interval for cycles value: -0.28 -0.12 95% mean confidence interval for cycles %-change: -6.30% -4.30% Cycles are helped. total fma in shared programs: 1582.42 -> 1535.06 (-2.99%) fma in affected programs: 871.58 -> 824.22 (-5.43%) helped: 502 HURT: 9 helped stats (abs) min: 0.015625 max: 3.578125 x̄: 0.09 x̃: 0 helped stats (rel) min: 0.60% max: 25.00% x̄: 5.46% x̃: 4.82% HURT stats (abs) min: 0.015625 max: 0.0625 x̄: 0.03 x̃: 0 HURT stats (rel) min: 4.35% max: 12.50% x̄: 6.22% x̃: 4.35% 95% mean confidence interval for fma value: -0.11 -0.08 95% mean confidence interval for fma %-change: -5.58% -4.93% Fma are helped. total cvt in shared programs: 665.55 -> 665.95 (0.06%) cvt in affected programs: 61.72 -> 62.12 (0.66%) helped: 33 HURT: 43 helped stats (abs) min: 0.015625 max: 0.359375 x̄: 0.04 x̃: 0 helped stats (rel) min: 1.01% max: 25.00% x̄: 6.68% x̃: 4.35% HURT stats (abs) min: 0.015625 max: 0.109375 x̄: 0.04 x̃: 0 HURT stats (rel) min: 0.78% max: 38.46% x̄: 10.85% x̃: 6.90% 95% mean confidence interval for cvt value: -0.01 0.02 95% mean confidence interval for cvt %-change: 0.23% 6.24% Inconclusive result (value mean confidence interval includes 0). total quadwords in shared programs: 93376 -> 91736 (-1.76%) quadwords in affected programs: 25376 -> 23736 (-6.46%) helped: 169 HURT: 1 helped stats (abs) min: 8.0 max: 128.0 x̄: 9.75 x̃: 8 helped stats (rel) min: 1.52% max: 33.33% x̄: 8.35% x̃: 8.00% HURT stats (abs) min: 8.0 max: 8.0 x̄: 8.00 x̃: 8 HURT stats (rel) min: 25.00% max: 25.00% x̄: 25.00% x̃: 25.00% 95% mean confidence interval for quadwords value: -11.18 -8.11 95% mean confidence interval for quadwords %-change: -8.95% -7.36% Quadwords are helped. total threads in shared programs: 4697 -> 4701 (0.09%) threads in affected programs: 4 -> 8 (100.00%) helped: 4 HURT: 0 helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% 95% mean confidence interval for threads value: 1.00 1.00 95% mean confidence interval for threads %-change: 100.00% 100.00% Threads are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Marek Ol<C5><A1><C3><A1>k <marek.olsak@amd.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19312>
2022-10-26 03:29:31 +01:00
checksum: 8b3584b1dd8f1d1bb63205564bd78e4e
gputest/pixmark-julia-fp64-v2.trace:
gl-virgl:
checksum: 73ccaff82ea764057fb0f93f0024cf84
gputest/pixmark-volplosion-v2.trace:
gl-virgl:
checksum: aef0b32ce99a3b25d35304ca08032833
gputest/plot3d-v2.trace:
gl-virgl:
checksum: 817a36e53edccdf946061315596e9cdd
gputest/tessmark-v2.trace:
gl-virgl:
label: [skip, slow]
humus/AmbientAperture-v2.trace:
gl-virgl:
checksum: b33fb8ee73b0c50b14822e170f15ab8a
humus/CelShading-v2.trace:
gl-virgl:
checksum: 3629cba72bde53e4275a8365175fde83
humus/DynamicBranching3-v2.trace:
gl-virgl:
checksum: 0236b28aa8b26fa60172d71bb040f2e9
humus/HDR-v2.trace:
gl-virgl:
checksum: eab0801aadeae87ce31aa0d4ff55e8f8
humus/RaytracedShadows-v2.trace:
gl-virgl:
checksum: df074a376fd3e7abc4dffdd191db8f4b
humus/VolumetricFogging2-v2.trace:
gl-virgl:
checksum: 2eb71553403ad8e0171abc9dc25e5bc1
itoral-gl-terrain-demo/demo-v2.trace:
gl-virgl:
checksum: 37780a6eaa38a55700e8207e89009f56
neverball/neverball-v2.trace:
gl-virgl:
nir: Be smarter fusing ffma If there is a single use of fmul, and that single use is fadd, it makes sense to fuse ffma, as we already do. However, if there are multiple uses, fusing may impede code gen. Consider the source fragment: a = fmul(x, y) b = fadd(a, z) c = fmin(a, t) d = fmax(b, c) The fmul has two uses. The current ffma fusing is greedy and will produce the following "optimized" code. a = fmul(x, y) b = ffma(x, y, z) c = fmin(a, t) d = fmax(b, c) Actually, this code is worse! Instead of 1 fmul + 1 fadd, we now have 1 fmul + 1 ffma. In effect, two multiplies (and a fused add) instead of one multiply and an add. Depending on the ISA, that could impede scheduling or increase code size. It can also increase register pressure, extending the live range. It's tempting to gate on is_used_once, but that would hurt in cases where we really do fuse everything, e.g.: a = fmul(x, y) b = fadd(a, z) c = fadd(a, t) For ISAs that fuse ffma, we expect that 2 ffma is faster than 1 fmul + 2 fadd. So what we really want is to fuse ffma iff the fmul will get deleted. That occurs iff all uses of the fmul are fadd and will themselves get fused to ffma, leaving fmul to get dead code eliminated. That's easy to implement with a new NIR search helper, checking that all uses are fadd. shader-db results on Mali-G57 [open shader-db + subset of closed]: total instructions in shared programs: 179491 -> 178991 (-0.28%) instructions in affected programs: 36862 -> 36362 (-1.36%) helped: 190 HURT: 27 total cycles in shared programs: 10573.20 -> 10571.75 (-0.01%) cycles in affected programs: 72.02 -> 70.56 (-2.02%) helped: 28 HURT: 1 total fma in shared programs: 1590.47 -> 1582.61 (-0.49%) fma in affected programs: 319.95 -> 312.09 (-2.46%) helped: 194 HURT: 1 total cvt in shared programs: 812.98 -> 813.03 (<.01%) cvt in affected programs: 118.53 -> 118.58 (0.04%) helped: 65 HURT: 81 total quadwords in shared programs: 98968 -> 98840 (-0.13%) quadwords in affected programs: 2960 -> 2832 (-4.32%) helped: 20 HURT: 4 total threads in shared programs: 4693 -> 4697 (0.09%) threads in affected programs: 4 -> 8 (100.00%) helped: 4 HURT: 0 v2: Update trace checksums for virgl due to numerical differences. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18814>
2022-10-15 18:39:26 +01:00
checksum: 0b8ae7dd4f7f26c3278ded8a5694b983
pathfinder/canvas_moire-v2.trace:
gl-virgl:
checksum: 25ba8f18274126670311bd3ffe058f74
pathfinder/canvas_text_v2-v2.trace:
gl-virgl:
checksum: a1446d0c42a78771240fca6f3b1e10d8
pathfinder/demo-v2.trace:
gl-virgl:
checksum: 0702a66c415cfc13d5bae8bec08402cf
paraview/pv-manyspheres-v2.trace:
gl-virgl:
label: [crash]
text: |-
src/mesa/main/arrayobj.c:800:_mesa_update_vao_derived_arrays: Assertion
`attrib->_EffRelativeOffset < binding->Stride failed. running these.
paraview/pv-waveletcontour-v2.trace:
gl-virgl:
label: [crash]
paraview/pv-waveletvolume-v2.trace:
gl-virgl:
checksum: f4af4067b37c00861fa5911e4c0a6629
supertuxkart/supertuxkart-mansion-egl-gles-v2.trace:
gl-virgl:
nir/opt_algebraic: Fuse c - a * b to FMA Algebraically it is clear that -(a * b) + c = (-a) * b + c = fma(-a, b, c) But this is not clear from the NIR ('fadd', ('fneg', ('fmul', a, b)), c) Add rules to handle this case specially. Note we don't necessarily want to solve this by pushing fneg into fmul, because the rule opt_algebraic (not the late part where FMA fusing happens) specifically pulls fneg out of fmul to push fneg up multiplication chains. Noticed in the big glmark2 "terrain" shader, which has a cycle count reduced by 22% on Mali-G57 thanks to having this pattern a ton and being FMA bound. BEFORE: 1249 inst, 16.015625 cycles, 16.015625 fma, ... 632 quadwords AFTER: 997 inst, 12.437500 cycles, .... 504 quadwords Results on the same shader on AGX are also quite dramatic: BEFORE: 1294 inst, 8600 bytes, 50 halfregs, ... AFTER: 1154 inst, 8040 bytes, 50 halfregs, ... Similar rules apply for fabs. v2: Use a loop over the bit sizes (suggested by Emma). shader-db on Valhall (open + small subset of closed), results on Bifrost are similar: total instructions in shared programs: 167975 -> 164970 (-1.79%) instructions in affected programs: 92642 -> 89637 (-3.24%) helped: 492 HURT: 25 helped stats (abs) min: 1.0 max: 252.0 x̄: 6.25 x̃: 3 helped stats (rel) min: 0.30% max: 20.18% x̄: 3.21% x̃: 2.91% HURT stats (abs) min: 1.0 max: 5.0 x̄: 2.80 x̃: 3 HURT stats (rel) min: 0.46% max: 9.09% x̄: 3.89% x̃: 3.37% 95% mean confidence interval for instructions value: -6.95 -4.68 95% mean confidence interval for instructions %-change: -3.08% -2.65% Instructions are helped. total cycles in shared programs: 10556.89 -> 10538.98 (-0.17%) cycles in affected programs: 265.56 -> 247.66 (-6.74%) helped: 88 HURT: 2 helped stats (abs) min: 0.015625 max: 3.578125 x̄: 0.20 x̃: 0 helped stats (rel) min: 0.65% max: 22.34% x̄: 5.65% x̃: 4.25% HURT stats (abs) min: 0.0625 max: 0.0625 x̄: 0.06 x̃: 0 HURT stats (rel) min: 8.33% max: 12.50% x̄: 10.42% x̃: 10.42% 95% mean confidence interval for cycles value: -0.28 -0.12 95% mean confidence interval for cycles %-change: -6.30% -4.30% Cycles are helped. total fma in shared programs: 1582.42 -> 1535.06 (-2.99%) fma in affected programs: 871.58 -> 824.22 (-5.43%) helped: 502 HURT: 9 helped stats (abs) min: 0.015625 max: 3.578125 x̄: 0.09 x̃: 0 helped stats (rel) min: 0.60% max: 25.00% x̄: 5.46% x̃: 4.82% HURT stats (abs) min: 0.015625 max: 0.0625 x̄: 0.03 x̃: 0 HURT stats (rel) min: 4.35% max: 12.50% x̄: 6.22% x̃: 4.35% 95% mean confidence interval for fma value: -0.11 -0.08 95% mean confidence interval for fma %-change: -5.58% -4.93% Fma are helped. total cvt in shared programs: 665.55 -> 665.95 (0.06%) cvt in affected programs: 61.72 -> 62.12 (0.66%) helped: 33 HURT: 43 helped stats (abs) min: 0.015625 max: 0.359375 x̄: 0.04 x̃: 0 helped stats (rel) min: 1.01% max: 25.00% x̄: 6.68% x̃: 4.35% HURT stats (abs) min: 0.015625 max: 0.109375 x̄: 0.04 x̃: 0 HURT stats (rel) min: 0.78% max: 38.46% x̄: 10.85% x̃: 6.90% 95% mean confidence interval for cvt value: -0.01 0.02 95% mean confidence interval for cvt %-change: 0.23% 6.24% Inconclusive result (value mean confidence interval includes 0). total quadwords in shared programs: 93376 -> 91736 (-1.76%) quadwords in affected programs: 25376 -> 23736 (-6.46%) helped: 169 HURT: 1 helped stats (abs) min: 8.0 max: 128.0 x̄: 9.75 x̃: 8 helped stats (rel) min: 1.52% max: 33.33% x̄: 8.35% x̃: 8.00% HURT stats (abs) min: 8.0 max: 8.0 x̄: 8.00 x̃: 8 HURT stats (rel) min: 25.00% max: 25.00% x̄: 25.00% x̃: 25.00% 95% mean confidence interval for quadwords value: -11.18 -8.11 95% mean confidence interval for quadwords %-change: -8.95% -7.36% Quadwords are helped. total threads in shared programs: 4697 -> 4701 (0.09%) threads in affected programs: 4 -> 8 (100.00%) helped: 4 HURT: 0 helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% 95% mean confidence interval for threads value: 1.00 1.00 95% mean confidence interval for threads %-change: 100.00% 100.00% Threads are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Marek Ol<C5><A1><C3><A1>k <marek.olsak@amd.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19312>
2022-10-26 03:29:31 +01:00
checksum: cc7092975dd6c9064aa54cd7f18053b6
xonotic/xonotic-keybench-high-v2.trace:
gl-virgl:
checksum: f3b184bf8858a6ebccd09e7ca032197e
valve/counterstrike-v2.trace:
gl-virgl:
checksum: 3bc0e0e39cb3c29f6d76ff07f1f02860
valve/counterstrike-source-v2.trace:
gl-virgl:
checksum: f8e5b19142007be14ce6d18d25ef329d
valve/half-life-2-v2.trace:
gl-virgl:
checksum: 6099a13f48bf090ee1d768f98208da70
valve/portal-2-v2.trace:
gl-virgl:
checksum: 7489a8412ee2bca45431d208e0006a3e
supertuxkart/supertuxkart-antediluvian-abyss.rdc:
gl-virgl:
label: [crash]
supertuxkart/supertuxkart-menu.rdc:
gl-virgl:
label: [crash]
supertuxkart/supertuxkart-ravenbridge-mansion.rdc:
gl-virgl:
label: [crash]
godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc:
gl-virgl:
checksum: 232eb48d6689c0117e3cc1660af7f32d
ror/ror-default.trace:
gl-virgl:
label: [crash]
text: |-
../src/mesa/main/arrayobj.c:800:_mesa_update_vao_derived_arrays:
Assertion `attrib->_EffRelativeOffset < binding->Stride' failed.
nheko/nheko-colors.trace:
gl-virgl:
label: [no-perf]
checksum: 3a12c08087e16cfae4729f4e9d6c9387
warzone2100/warzone2100-default.trace:
gl-virgl:
checksum: 1fd3f9b5e5a711bdfac49dc03912e1de