rust/program: remove Program::kernels

This was a terrible method as it cloned the entire list on each call.
Instead consumers should just take the lock and operate on a slice instead
to lower CPU overhead.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28872>
This commit is contained in:
Karol Herbst 2024-04-23 02:47:28 +02:00 committed by Marge Bot
parent d8ed73b5f6
commit cc9141f044
4 changed files with 18 additions and 17 deletions

View File

@ -260,13 +260,14 @@ fn create_kernel(
return Err(CL_INVALID_VALUE);
}
let build = p.build_info();
// CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable for program.
if p.kernels().is_empty() {
if build.kernels().is_empty() {
return Err(CL_INVALID_PROGRAM_EXECUTABLE);
}
// CL_INVALID_KERNEL_NAME if kernel_name is not found in program.
if !p.kernels().contains(&name) {
if !build.kernels().contains(&name) {
return Err(CL_INVALID_KERNEL_NAME);
}
@ -277,7 +278,7 @@ fn create_kernel(
return Err(CL_INVALID_KERNEL_DEFINITION);
}
Ok(Kernel::new(name, p).into_cl())
Ok(Kernel::new(name, Arc::clone(&p), &build).into_cl())
}
#[cl_entrypoint]
@ -298,25 +299,26 @@ fn create_kernels_in_program(
num_kernels_ret: *mut cl_uint,
) -> CLResult<()> {
let p = Program::arc_from_raw(program)?;
let build = p.build_info();
// CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable for any device in
// program.
if p.kernels().is_empty() {
if build.kernels().is_empty() {
return Err(CL_INVALID_PROGRAM_EXECUTABLE);
}
// CL_INVALID_VALUE if kernels is not NULL and num_kernels is less than the number of kernels
// in program.
if !kernels.is_null() && p.kernels().len() > num_kernels as usize {
if !kernels.is_null() && build.kernels().len() > num_kernels as usize {
return Err(CL_INVALID_VALUE);
}
let mut num_kernels = 0;
for name in p.kernels() {
for name in build.kernels() {
// Kernel objects are not created for any __kernel functions in program that do not have the
// same function definition across all devices for which a program executable has been
// successfully built.
if !p.has_unique_kernel_signatures(&name) {
if !p.has_unique_kernel_signatures(name) {
continue;
}
@ -325,7 +327,7 @@ fn create_kernels_in_program(
unsafe {
kernels
.add(num_kernels as usize)
.write(Kernel::new(name, p.clone()).into_cl());
.write(Kernel::new(name.clone(), p.clone(), &build).into_cl());
}
}
num_kernels += 1;

View File

@ -44,9 +44,9 @@ impl CLInfo<cl_program_info> for cl_program {
ProgramSourceType::Il(il) => to_maybeuninit_vec(il.to_bin().to_vec()),
_ => Vec::new(),
},
CL_PROGRAM_KERNEL_NAMES => cl_prop::<&str>(&*prog.kernels().join(";")),
CL_PROGRAM_KERNEL_NAMES => cl_prop::<&str>(&*prog.build_info().kernels().join(";")),
CL_PROGRAM_NUM_DEVICES => cl_prop::<cl_uint>(prog.devs.len() as cl_uint),
CL_PROGRAM_NUM_KERNELS => cl_prop::<usize>(prog.kernels().len()),
CL_PROGRAM_NUM_KERNELS => cl_prop::<usize>(prog.build_info().kernels().len()),
CL_PROGRAM_REFERENCE_COUNT => cl_prop::<cl_uint>(Program::refcnt(*self)?),
CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT => cl_prop::<cl_bool>(CL_FALSE),
CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT => cl_prop::<cl_bool>(CL_FALSE),

View File

@ -811,8 +811,7 @@ fn extract<'a, const S: usize>(buf: &'a mut &[u8]) -> &'a [u8; S] {
}
impl Kernel {
pub fn new(name: String, prog: Arc<Program>) -> Arc<Kernel> {
let prog_build = prog.build_info();
pub fn new(name: String, prog: Arc<Program>, prog_build: &ProgramBuild) -> Arc<Kernel> {
let kernel_info = Arc::clone(prog_build.kernel_info.get(&name).unwrap());
let builds = prog_build
.builds
@ -823,7 +822,7 @@ impl Kernel {
let values = vec![None; kernel_info.args.len()];
Arc::new(Self {
base: CLObjectBase::new(RusticlTypes::Kernel),
prog: prog.clone(),
prog: prog,
name: name,
values: Mutex::new(values),
builds: builds,

View File

@ -248,6 +248,10 @@ impl ProgramBuild {
}
}
pub fn kernels(&self) -> &[String] {
&self.kernels
}
pub fn to_nir(&self, kernel: &str, d: &Device) -> NirShader {
let mut spec_constants: Vec<_> = self
.spec_constants
@ -556,10 +560,6 @@ impl Program {
true
}
pub fn kernels(&self) -> Vec<String> {
self.build_info().kernels.clone()
}
pub fn active_kernels(&self) -> bool {
self.build_info()
.builds