Discussion : ussing nonlinearity and normalization in the operations #187

jakubMitura14 · 2024-09-29T16:21:19Z

Hello I am using the tensor contraction to as a layer in Lux.jl - and it works no problem, here implementation if it would be usefull for somebody.


struct TensorOpLayer_str <: Lux.AbstractExplicitLayer
    param_shape
    operation_expression::Expr
end

function Lux.initialparameters(rng::AbstractRNG, l::TensorOpLayer_str)
    P=rand(rng,Float32,l.param_shape...)
    return (P=P,)
end

function extract_double_brackets(s::String)
    pattern = r"\(\(.*?\)\)"
    matches = eachmatch(pattern, s)
    return [match.match for match in matches]
end

function parse_tuple_from_string(s)
    # Parse the string into an expression
    expr = Meta.parse(s)
    
    # Evaluate the expression to get the tuple
    result = eval(expr)
    
    return result
end


function Lux.initialstates(::AbstractRNG, l::TensorOpLayer_str)::NamedTuple
    ex=l.operation_expression
    parser=TensorOperations.tensorparser(ex,:allocator=>TensorOperations.CUDAAllocator(),:backend=>TensorOperations.cuTENSORBackend())
    parsed_ex=parser(ex)
    arg_tuples=extract_double_brackets(string(parsed_ex.args[3]))
    arg_tuples=map(parse_tuple_from_string,arg_tuples)

    return (PA=arg_tuples[1],PB=arg_tuples[2],PC=arg_tuples[3])

end


function (l::TensorOpLayer_str)(x, ps, st::NamedTuple)

    res = TensorOperations.tensoralloc_contract(Float32, x, st.PA, false, ps.P, st.PB, false, st.PC, Val{false}(), TensorOperations.CUDAAllocator{CUDA.UnifiedMemory, CUDA.DeviceMemory, CUDA.DeviceMemory}())
    res = TensorOperations.tensorcontract!(res, x, st.PA, false, ps.P, st.PB, false, st.PC, VectorInterface.One(), VectorInterface.Zero(), TensorOperations.cuTENSORBackend(), TensorOperations.CUDAAllocator{CUDA.UnifiedMemory, CUDA.DeviceMemory, CUDA.DeviceMemory}())
    res=swish.(res)

    return res, st
end

Maybe it is not perfect as it does some not pretty reverse engineering as I was unable to get a return statement from tensor macro when expression is passed to a layer state but it works.

Then I invoke them like

            , TensorOpLayer_str((num_directions, primary_sv_repr, num_channels), :(res[b, f, n, e] := x[b, c, f, n, d] * P[d, e, c]))
            , GroupNorm(num_params_exec, num_params_exec, affine=true)
            , TensorOpLayer_str((num_params_exec, primary_sv_repr, final_sv_repr), :(res[b, f, e] := x[b, f, n, d] * P[n, d, e]))
            , LayerNorm((batch_size, flat_sv_len))

this is only part of a network - I had divided operation into multiple smaller operations to do non linearity (here swish) and normalization (numbers are growing fast and leads to instabilities in training without some kind of normalization).

Hovewer and here some problem - I know that operation would be far more performant I I would combine operations like

res[b, f, n, e] := x[b, c, f, n, d] * P[d, e, c])
res[b, f, e] := x[b, f, n, d] * P[n, d, e]

etc into one operation - Hovewer In this way I would be unable to add nonlinearities and normalization between each step - is there a better way to achieve this then what I did ?

Thanks for help !

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Discussion : ussing nonlinearity and normalization in the operations #187

Discussion : ussing nonlinearity and normalization in the operations #187

jakubMitura14 commented Sep 29, 2024

Discussion : ussing nonlinearity and normalization in the operations #187

Discussion : ussing nonlinearity and normalization in the operations #187

Comments

jakubMitura14 commented Sep 29, 2024