This module is designed to arbitrarily select modules in the model according to a unified form. We use regular matching to select modules.
For example, all modules in Unet are listed following:
Name and type of each module in UNet
# Code:
for name, module in unet.named_modules():
print(name, type(module))
# Outputs:
conv_in <class 'torch.nn.modules.conv.Conv2d'>
time_proj <class 'diffusers.models.embeddings.Timesteps'>
time_embedding <class 'diffusers.models.embeddings.TimestepEmbedding'>
time_embedding.linear_1 <class 'torch.nn.modules.linear.Linear'>
time_embedding.act <class 'torch.nn.modules.activation.SiLU'>
time_embedding.linear_2 <class 'torch.nn.modules.linear.Linear'>
down_blocks <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0 <class 'diffusers.models.unet_2d_blocks.CrossAttnDownBlock2D'>
down_blocks.0.attentions <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.0 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
down_blocks.0.attentions.0.norm <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.0.attentions.0.proj_in <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.attentions.0.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.0.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
down_blocks.0.attentions.0.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.0.attentions.0.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.attentions.0.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.0.attentions.0.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.attentions.0.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.0.attentions.0.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
down_blocks.0.attentions.0.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.0.proj_out <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.attentions.1 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
down_blocks.0.attentions.1.norm <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.0.attentions.1.proj_in <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.attentions.1.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.1.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
down_blocks.0.attentions.1.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.0.attentions.1.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.attentions.1.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.0.attentions.1.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.attentions.1.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.0.attentions.1.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
down_blocks.0.attentions.1.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.attentions.1.proj_out <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.resnets <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.0.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.0.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.0.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
down_blocks.0.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.0.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.0.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.0.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.0.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.0.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.0.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
down_blocks.0.downsamplers <class 'torch.nn.modules.container.ModuleList'>
down_blocks.0.downsamplers.0 <class 'diffusers.models.resnet.Downsample2D'>
down_blocks.0.downsamplers.0.conv <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1 <class 'diffusers.models.unet_2d_blocks.CrossAttnDownBlock2D'>
down_blocks.1.attentions <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.0 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
down_blocks.1.attentions.0.norm <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.1.attentions.0.proj_in <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.attentions.0.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.0.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
down_blocks.1.attentions.0.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.1.attentions.0.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.attentions.0.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.1.attentions.0.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.attentions.0.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.1.attentions.0.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
down_blocks.1.attentions.0.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.0.proj_out <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.attentions.1 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
down_blocks.1.attentions.1.norm <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.1.attentions.1.proj_in <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.attentions.1.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.1.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
down_blocks.1.attentions.1.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.1.attentions.1.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.attentions.1.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.1.attentions.1.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.attentions.1.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.1.attentions.1.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
down_blocks.1.attentions.1.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.attentions.1.proj_out <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.resnets <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.1.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.1.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.1.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
down_blocks.1.resnets.0.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.1.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.1.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.1.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.1.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.1.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.1.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
down_blocks.1.downsamplers <class 'torch.nn.modules.container.ModuleList'>
down_blocks.1.downsamplers.0 <class 'diffusers.models.resnet.Downsample2D'>
down_blocks.1.downsamplers.0.conv <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2 <class 'diffusers.models.unet_2d_blocks.CrossAttnDownBlock2D'>
down_blocks.2.attentions <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.0 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
down_blocks.2.attentions.0.norm <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.2.attentions.0.proj_in <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.attentions.0.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.0.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
down_blocks.2.attentions.0.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.2.attentions.0.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.attentions.0.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.2.attentions.0.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.attentions.0.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.2.attentions.0.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
down_blocks.2.attentions.0.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.0.proj_out <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.attentions.1 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
down_blocks.2.attentions.1.norm <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.2.attentions.1.proj_in <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.attentions.1.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.1.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
down_blocks.2.attentions.1.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.2.attentions.1.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.attentions.1.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.2.attentions.1.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.attentions.1.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
down_blocks.2.attentions.1.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
down_blocks.2.attentions.1.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.attentions.1.proj_out <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.resnets <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.2.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.2.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.2.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
down_blocks.2.resnets.0.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.2.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.2.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.2.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.2.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.2.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.2.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
down_blocks.2.downsamplers <class 'torch.nn.modules.container.ModuleList'>
down_blocks.2.downsamplers.0 <class 'diffusers.models.resnet.Downsample2D'>
down_blocks.2.downsamplers.0.conv <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.3 <class 'diffusers.models.unet_2d_blocks.DownBlock2D'>
down_blocks.3.resnets <class 'torch.nn.modules.container.ModuleList'>
down_blocks.3.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.3.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.3.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.3.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.3.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.3.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.3.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.3.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
down_blocks.3.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
down_blocks.3.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.3.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.3.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
down_blocks.3.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
down_blocks.3.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
down_blocks.3.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
down_blocks.3.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.0 <class 'diffusers.models.unet_2d_blocks.UpBlock2D'>
up_blocks.0.resnets <class 'torch.nn.modules.container.ModuleList'>
up_blocks.0.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.0.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.0.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.0.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.0.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.0.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.0.resnets.0.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.0.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.0.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.0.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.0.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.0.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.0.resnets.1.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.2 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.0.resnets.2.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.0.resnets.2.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.2.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.0.resnets.2.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.0.resnets.2.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.0.resnets.2.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.resnets.2.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.0.resnets.2.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.0.upsamplers <class 'torch.nn.modules.container.ModuleList'>
up_blocks.0.upsamplers.0 <class 'diffusers.models.resnet.Upsample2D'>
up_blocks.0.upsamplers.0.conv <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1 <class 'diffusers.models.unet_2d_blocks.CrossAttnUpBlock2D'>
up_blocks.1.attentions <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.0 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.1.attentions.0.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.attentions.0.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.attentions.0.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.0.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.1.attentions.0.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.0.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.0.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.0.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.0.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.0.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.1.attentions.0.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.0.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.attentions.1 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.1.attentions.1.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.attentions.1.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.attentions.1.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.1.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.1.attentions.1.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.1.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.1.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.1.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.1.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.1.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.1.attentions.1.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.1.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.attentions.2 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.1.attentions.2.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.attentions.2.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.attentions.2.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.2.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.1.attentions.2.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.2.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.2.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.2.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.2.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.1.attentions.2.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.1.attentions.2.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.attentions.2.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.1.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.1.resnets.0.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.1.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.1.resnets.1.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.2 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.1.resnets.2.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.resnets.2.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.2.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.1.resnets.2.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.1.resnets.2.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.1.resnets.2.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.resnets.2.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.1.resnets.2.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.1.upsamplers <class 'torch.nn.modules.container.ModuleList'>
up_blocks.1.upsamplers.0 <class 'diffusers.models.resnet.Upsample2D'>
up_blocks.1.upsamplers.0.conv <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2 <class 'diffusers.models.unet_2d_blocks.CrossAttnUpBlock2D'>
up_blocks.2.attentions <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.0 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.2.attentions.0.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.attentions.0.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.attentions.0.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.0.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.2.attentions.0.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.0.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.0.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.0.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.0.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.0.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.2.attentions.0.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.0.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.attentions.1 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.2.attentions.1.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.attentions.1.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.attentions.1.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.1.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.2.attentions.1.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.1.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.1.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.1.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.1.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.1.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.2.attentions.1.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.1.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.attentions.2 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.2.attentions.2.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.attentions.2.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.attentions.2.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.2.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.2.attentions.2.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.2.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.2.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.2.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.2.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.2.attentions.2.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.2.attentions.2.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.attentions.2.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.2.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.2.resnets.0.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.2.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.2.resnets.1.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.2 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.2.resnets.2.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.resnets.2.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.2.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.2.resnets.2.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.2.resnets.2.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.2.resnets.2.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.resnets.2.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.2.resnets.2.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.2.upsamplers <class 'torch.nn.modules.container.ModuleList'>
up_blocks.2.upsamplers.0 <class 'diffusers.models.resnet.Upsample2D'>
up_blocks.2.upsamplers.0.conv <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3 <class 'diffusers.models.unet_2d_blocks.CrossAttnUpBlock2D'>
up_blocks.3.attentions <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.0 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.3.attentions.0.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.attentions.0.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.attentions.0.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.0.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.3.attentions.0.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.0.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.0.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.0.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.0.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.0.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.3.attentions.0.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.0.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.attentions.1 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.3.attentions.1.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.attentions.1.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.attentions.1.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.1.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.3.attentions.1.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.1.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.1.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.1.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.1.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.1.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.3.attentions.1.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.1.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.attentions.2 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
up_blocks.3.attentions.2.norm <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.attentions.2.proj_in <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.attentions.2.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.2.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
up_blocks.3.attentions.2.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.2.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.2.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.2.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.2.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
up_blocks.3.attentions.2.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
up_blocks.3.attentions.2.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.attentions.2.proj_out <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets <class 'torch.nn.modules.container.ModuleList'>
up_blocks.3.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.3.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.3.resnets.0.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.3.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.3.resnets.1.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.2 <class 'diffusers.models.resnet.ResnetBlock2D'>
up_blocks.3.resnets.2.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.resnets.2.conv1 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.2.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
up_blocks.3.resnets.2.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
up_blocks.3.resnets.2.dropout <class 'torch.nn.modules.dropout.Dropout'>
up_blocks.3.resnets.2.conv2 <class 'torch.nn.modules.conv.Conv2d'>
up_blocks.3.resnets.2.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
up_blocks.3.resnets.2.conv_shortcut <class 'torch.nn.modules.conv.Conv2d'>
mid_block <class 'diffusers.models.unet_2d_blocks.UNetMidBlock2DCrossAttn'>
mid_block.attentions <class 'torch.nn.modules.container.ModuleList'>
mid_block.attentions.0 <class 'diffusers.models.transformer_2d.Transformer2DModel'>
mid_block.attentions.0.norm <class 'torch.nn.modules.normalization.GroupNorm'>
mid_block.attentions.0.proj_in <class 'torch.nn.modules.conv.Conv2d'>
mid_block.attentions.0.transformer_blocks <class 'torch.nn.modules.container.ModuleList'>
mid_block.attentions.0.transformer_blocks.0 <class 'diffusers.models.attention.BasicTransformerBlock'>
mid_block.attentions.0.transformer_blocks.0.norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
mid_block.attentions.0.transformer_blocks.0.attn1 <class 'diffusers.models.attention_processor.Attention'>
mid_block.attentions.0.transformer_blocks.0.attn1.to_q <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn1.to_k <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn1.to_v <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn1.to_out <class 'torch.nn.modules.container.ModuleList'>
mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0 <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn1.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
mid_block.attentions.0.transformer_blocks.0.norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
mid_block.attentions.0.transformer_blocks.0.attn2 <class 'diffusers.models.attention_processor.Attention'>
mid_block.attentions.0.transformer_blocks.0.attn2.to_q <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn2.to_k <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn2.to_v <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn2.to_out <class 'torch.nn.modules.container.ModuleList'>
mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0 <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.attn2.to_out.1 <class 'torch.nn.modules.dropout.Dropout'>
mid_block.attentions.0.transformer_blocks.0.norm3 <class 'torch.nn.modules.normalization.LayerNorm'>
mid_block.attentions.0.transformer_blocks.0.ff <class 'diffusers.models.attention.FeedForward'>
mid_block.attentions.0.transformer_blocks.0.ff.net <class 'torch.nn.modules.container.ModuleList'>
mid_block.attentions.0.transformer_blocks.0.ff.net.0 <class 'diffusers.models.attention.GEGLU'>
mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.transformer_blocks.0.ff.net.1 <class 'torch.nn.modules.dropout.Dropout'>
mid_block.attentions.0.transformer_blocks.0.ff.net.2 <class 'torch.nn.modules.linear.Linear'>
mid_block.attentions.0.proj_out <class 'torch.nn.modules.conv.Conv2d'>
mid_block.resnets <class 'torch.nn.modules.container.ModuleList'>
mid_block.resnets.0 <class 'diffusers.models.resnet.ResnetBlock2D'>
mid_block.resnets.0.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
mid_block.resnets.0.conv1 <class 'torch.nn.modules.conv.Conv2d'>
mid_block.resnets.0.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
mid_block.resnets.0.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
mid_block.resnets.0.dropout <class 'torch.nn.modules.dropout.Dropout'>
mid_block.resnets.0.conv2 <class 'torch.nn.modules.conv.Conv2d'>
mid_block.resnets.0.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
mid_block.resnets.1 <class 'diffusers.models.resnet.ResnetBlock2D'>
mid_block.resnets.1.norm1 <class 'torch.nn.modules.normalization.GroupNorm'>
mid_block.resnets.1.conv1 <class 'torch.nn.modules.conv.Conv2d'>
mid_block.resnets.1.time_emb_proj <class 'torch.nn.modules.linear.Linear'>
mid_block.resnets.1.norm2 <class 'torch.nn.modules.normalization.GroupNorm'>
mid_block.resnets.1.dropout <class 'torch.nn.modules.dropout.Dropout'>
mid_block.resnets.1.conv2 <class 'torch.nn.modules.conv.Conv2d'>
mid_block.resnets.1.nonlinearity <class 'torch.nn.modules.activation.SiLU'>
conv_norm_out <class 'torch.nn.modules.normalization.GroupNorm'>
conv_act <class 'torch.nn.modules.activation.SiLU'>
conv_out <class 'torch.nn.modules.conv.Conv2d'>
Then we can select modules by regular matching. For exmaple, we define several cases for unet in unidiffusion/utils/module_regular_search.py
:
PREDEFINED_PATTERN_UNET = {
'attention': r'attn(0|1)',
'cross_attention': r'attn2',
'cross_attention.q': r'attn2\.to_q',
'cross_attention.k': r'attn2\.to_k',
'cross_attention.v': r'attn2\.to_v',
'cross_attention.qkv': r'attn2\.(to_q|to_k|to_v)',
'feedforward': r'ff',
'resnets': r'resnet',
'resnets.conv': r'resnets\.\d\.conv'
}
You can set these predefined pattern in config (the following two have same effect):
# regular pattern
unet.training_args = {
r'attn2\.to_k': {
...
},
}
# predefined pattern
unet.training_args = {
r'cross_attention.k': {
...
},
You can also write your regular pattern:
# all convolution layers
conv
# the second fully-connected layer in feedforward layer
ff\.net\.2
# all attention layer in upsample blocks
up_blocks\.\d\.attentions
text encoder
text_model <class 'transformers.models.clip.modeling_clip.CLIPTextTransformer'>
text_model.embeddings <class 'transformers.models.clip.modeling_clip.CLIPTextEmbeddings'>
text_model.embeddings.token_embedding <class 'torch.nn.modules.sparse.Embedding'>
text_model.embeddings.position_embedding <class 'torch.nn.modules.sparse.Embedding'>
text_model.encoder <class 'transformers.models.clip.modeling_clip.CLIPEncoder'>
text_model.encoder.layers <class 'torch.nn.modules.container.ModuleList'>
text_model.encoder.layers.0 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.0.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.0.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.0.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.0.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.0.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.0.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.0.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.0.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.0.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.0.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.0.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.1 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.1.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.1.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.1.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.1.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.1.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.1.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.1.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.1.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.1.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.1.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.1.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.2 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.2.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.2.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.2.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.2.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.2.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.2.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.2.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.2.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.2.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.2.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.2.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.3 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.3.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.3.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.3.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.3.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.3.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.3.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.3.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.3.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.3.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.3.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.3.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.4 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.4.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.4.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.4.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.4.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.4.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.4.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.4.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.4.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.4.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.4.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.4.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.5 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.5.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.5.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.5.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.5.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.5.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.5.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.5.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.5.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.5.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.5.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.5.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.6 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.6.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.6.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.6.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.6.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.6.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.6.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.6.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.6.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.6.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.6.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.6.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.7 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.7.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.7.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.7.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.7.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.7.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.7.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.7.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.7.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.7.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.7.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.7.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.8 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.8.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.8.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.8.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.8.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.8.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.8.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.8.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.8.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.8.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.8.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.8.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.9 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.9.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.9.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.9.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.9.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.9.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.9.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.9.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.9.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.9.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.9.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.9.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.10 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.10.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.10.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.10.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.10.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.10.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.10.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.10.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.10.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.10.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.10.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.10.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.11 <class 'transformers.models.clip.modeling_clip.CLIPEncoderLayer'>
text_model.encoder.layers.11.self_attn <class 'transformers.models.clip.modeling_clip.CLIPAttention'>
text_model.encoder.layers.11.self_attn.k_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.11.self_attn.v_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.11.self_attn.q_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.11.self_attn.out_proj <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.11.layer_norm1 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.encoder.layers.11.mlp <class 'transformers.models.clip.modeling_clip.CLIPMLP'>
text_model.encoder.layers.11.mlp.activation_fn <class 'transformers.activations.QuickGELUActivation'>
text_model.encoder.layers.11.mlp.fc1 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.11.mlp.fc2 <class 'torch.nn.modules.linear.Linear'>
text_model.encoder.layers.11.layer_norm2 <class 'torch.nn.modules.normalization.LayerNorm'>
text_model.final_layer_norm <class 'torch.nn.modules.normalization.LayerNorm'>