From 95c700540c03f87a8f0f96435b41cb3d881fe847 Mon Sep 17 00:00:00 2001 From: gmeanti Date: Thu, 21 Sep 2023 09:25:29 -0400 Subject: [PATCH] Fix kwargs to fmm/fmmv --- falkon/kernels/distance_kernel.py | 2 +- falkon/mmv_ops/fmm.py | 14 +++++++------- falkon/mmv_ops/fmmv.py | 20 ++++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/falkon/kernels/distance_kernel.py b/falkon/kernels/distance_kernel.py index a376630..6917041 100644 --- a/falkon/kernels/distance_kernel.py +++ b/falkon/kernels/distance_kernel.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Tuple, Type, Union +from typing import Dict, Optional, Type, Union import numpy as np import torch diff --git a/falkon/mmv_ops/fmm.py b/falkon/mmv_ops/fmm.py index 43e6c23..35264b0 100644 --- a/falkon/mmv_ops/fmm.py +++ b/falkon/mmv_ops/fmm.py @@ -241,7 +241,7 @@ def sparse_mm_run_thread( for j in range(0, M, m): lenj = min(m, M - j) - c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2} + c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2.items()} c_m2 = m2.narrow_rows(j, lenj).to(dtype=comp_dt) # On CUDA the second argument to apply (a Sparse*Sparse multiplication) must be @@ -258,7 +258,7 @@ def sparse_mm_run_thread( for i in range(0, N, n): leni = min(n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} c_m1 = m1.narrow_rows(i, leni).to(dtype=comp_dt) if dev.type == "cuda": @@ -336,7 +336,7 @@ def mm_run_thread( for i in range(0, N, n): leni = min(n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} if has_gpu_bufs: c_dev_m1 = copy( @@ -350,7 +350,7 @@ def mm_run_thread( for j in range(0, M, m): lenj = min(m, M - j) - c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2} + c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2.items()} if has_gpu_bufs: c_dev_m2 = copy( @@ -415,12 +415,12 @@ def mm_diff_run_thread( for i in range(0, N, n): leni = min(n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} c_dev_m1 = m1[i : i + leni, :].to(device=dev, dtype=comp_dt, non_blocking=True, copy=False) for j in range(0, M, m): lenj = min(m, M - j) - c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2} + c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2.items()} c_dev_m2 = m2[j : j + lenj, :].to(device=dev, dtype=comp_dt, non_blocking=True, copy=False) c_dev_out = kernel.compute_diff( @@ -493,7 +493,7 @@ def run_cpu_gpu( X1_block = X1.narrow(0, block_sizes[i], bwidth) c_kwargs_m1 = {} if kwargs_m1 is not None: - c_kwargs_m1 = {k: v[block_sizes[i] : block_sizes[i] + bwidth] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[block_sizes[i] : block_sizes[i] + bwidth] for k, v in kwargs_m1.items()} args.append( ( ArgsFmm( diff --git a/falkon/mmv_ops/fmmv.py b/falkon/mmv_ops/fmmv.py index 6a540a6..96d2819 100644 --- a/falkon/mmv_ops/fmmv.py +++ b/falkon/mmv_ops/fmmv.py @@ -258,7 +258,7 @@ def sparse_mmv_run_thread( s1, s2 = _init_two_streams(stack, dev, tid) # enters stream 1 for i in range(0, N, blk_n): leni = min(blk_n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} c_m1 = m1.narrow_rows(i, leni) if incore: # Note that CUDA-incore is not allowed to happen (so this is CPU->CPU) @@ -271,7 +271,7 @@ def sparse_mmv_run_thread( for j in range(0, M, blk_m): lenj = min(blk_m, M - j) - c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2} + c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2.items()} c_m2 = m2.narrow_rows(j, lenj) if incore: # CPU -> CPU @@ -357,7 +357,7 @@ def mmv_run_thread( s1, s2 = _init_two_streams(stack, dev, tid) for i in range(0, N, blk_n): leni = min(blk_n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} if m1_ic: c_dev_m1 = m1[i : i + leni, :] else: @@ -370,7 +370,7 @@ def mmv_run_thread( for j in range(0, M, blk_m): lenj = min(blk_m, M - j) - c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2} + c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2.items()} if m2_ic: c_dev_m2 = m2[j : j + lenj, :] else: @@ -433,14 +433,14 @@ def mmv_diff_run_thread( s1, s2 = _init_two_streams(stack, dev, tid) for i in range(0, N, blk_n): leni = min(blk_n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} c_dev_m1 = m1[i : i + leni, :].to(dev, non_blocking=True, copy=False) c_dev_m1_g = None if grads[0] is None else grads[0][i : i + leni, :].to(dev, non_blocking=True, copy=False) c_dev_out = out[i : i + leni, :].to(dev, non_blocking=True, copy=False) for j in range(0, M, blk_m): lenj = min(blk_m, M - j) - c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2} + c_kwargs_m2 = {k: v[j:lenj] for k, v in kwargs_m2.items()} c_dev_m2 = m2[j : j + lenj, :].to(dev, non_blocking=True, copy=False) c_dev_m2_g = ( None if grads[1] is None else grads[1][j : j + lenj, :].to(dev, non_blocking=True, copy=False) @@ -655,7 +655,7 @@ def sparse_dmmv_run_thread( for i in range(0, N, blk_n): leni = min(blk_n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} c_m1 = m1.narrow_rows(i, leni) if incore: # Note that CUDA-incore is not allowed to happen (so this is CPU->CPU) @@ -739,7 +739,7 @@ def dmmv_run_thread( copy(v, dev_v, non_blocking=True) for i in range(0, N, blk_n): leni = min(blk_n, N - i) - c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[i:leni] for k, v in kwargs_m1.items()} if m1_ic: c_dev_m1 = m1[i : i + leni, :] else: @@ -820,7 +820,7 @@ def run_cpu_gpu( X1_block = X1.narrow(0, block_sizes[i], bwidth) c_kwargs_m1 = {} if kwargs_m1 is not None: - c_kwargs_m1 = {k: v[block_sizes[i] : block_sizes[i] + bwidth] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[block_sizes[i] : block_sizes[i] + bwidth] for k, v in kwargs_m1.items()} args.append( ( ArgsFmmv( @@ -1098,7 +1098,7 @@ def fdmmv( X1_block = X1.narrow(0, block_sizes[i], bwidth) c_kwargs_m1 = {} if kwargs_m1 is not None: - c_kwargs_m1 = {k: v[block_sizes[i] : block_sizes[i] + bwidth] for k, v in kwargs_m1} + c_kwargs_m1 = {k: v[block_sizes[i] : block_sizes[i] + bwidth] for k, v in kwargs_m1.items()} args.append( ( ArgsFmmv(