Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

准确率不变 #75

Open
shiyao1999 opened this issue Dec 28, 2023 · 4 comments
Open

准确率不变 #75

shiyao1999 opened this issue Dec 28, 2023 · 4 comments

Comments

@shiyao1999
Copy link

shiyao1999 commented Dec 28, 2023

我使用ALBERT和孪生网络来训练一个主观问题评分模型,训练策略参考的你的代码,孪生网络由双向LSTM和全连接层组成。在训练中,我发现准确率没有提高,一直保持不变。我感觉像是权重没有更新,可能是因为梯度太小导致了权重变化不大。或者,训练策略可能存在问题,但我不确定具体原因。下面是我训练期时的准确率:
training
`

class MetaTask(nn.Module):
def init(self, args):
super(MetaTask, self).init()
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.loss_fn = nn.CrossEntropyLoss()
self.update_lr = args.update_lr
self.meta_lr = args.meta_lr
self.finetunning_lr = args.finetunning_lr
self.n_way = args.n_way
self.k_spt = args.k_spt
self.k_qry = args.k_qry
self.task_num = args.task_num
self.update_step = args.update_step
self.update_step_test = args.update_step_test
self.net = SubjectiveGradingModel().to(self.device)
self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)

def forward(self, support_x, support_y, query_x, query_y):
    task_num = len(support_x)
    querysz = len(query_x[0])
    losses_q = [0 for _ in range(self.update_step + 1)]
    corrects = [0 for _ in range(self.update_step + 1)]
    for i in range(task_num):
        self.net.train()
        # 1. run the i-th task and compute loss for k=0
        logits = self.net(support_x[i])
        loss = self.loss_fn(logits, torch.cat(support_y[i], dim=0).long())
        fast_weights = OrderedDict(self.net.named_parameters())
        grad = torch.autograd.grad(loss, fast_weights.values(), retain_graph=True)
        # 输出梯度为None的参数
        # for (name, param), gra in zip(self.net.named_parameters(), grad):
        #     if gra is None:
        #         print("梯度为None的参数:", name)
        fast_weights = OrderedDict(
            (name, param - self.update_lr * grad)
            for ((name, param), grad) in zip(fast_weights.items(), grad)
        )
        # this is the loss and accuracy before first update
        with torch.no_grad():
            self.net.eval()
            logits_q = self.net(query_x[i])
            loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
            losses_q[0] += loss_q
            pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
            correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()
            corrects[0] = corrects[0] + correct

        # this is the loss and accuracy after the first update
        with torch.no_grad():
            self.net.eval()
            self.net.load_state_dict(fast_weights, strict=False)
            logits_q = self.net(query_x[i])
            loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
            losses_q[1] += loss_q

            pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
            correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()
            corrects[1] = corrects[1] + correct
        self.net.train()
        for k in range(1, self.update_step):
            # 1. run the i-th task and compute loss for k=1~K-1
            self.net.load_state_dict(fast_weights, strict=False)
            logits = self.net(support_x[i])
            loss = self.loss_fn(logits, torch.cat(support_y[i], dim=0).long())
            # 2. compute grad on theta_pi
            fast_weights = OrderedDict(self.net.named_parameters())
            grad = torch.autograd.grad(loss, fast_weights.values(), retain_graph=True)
            # 3. theta_pi = theta_pi - train_lr * grad
            fast_weights = OrderedDict(
                (name, param - self.update_lr * grad)
                for ((name, param), grad) in zip(fast_weights.items(), grad)
            )
            self.net.load_state_dict(fast_weights, strict=False)
            logits_q = self.net(query_x[i])
            # loss_q will be overwritten and just keep the loss_q on last update step.
            loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
            losses_q[k + 1] += loss_q

            with torch.no_grad():
                pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
                correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()  # convert to numpy
                corrects[k + 1] = corrects[k + 1] + correct

    loss_q = losses_q[-1] / task_num
    # optimize theta parameters
    self.meta_optim.zero_grad()
    loss_q.backward(retain_graph=True)
    # print('meta update')
    self.meta_optim.step()
    accs = np.array(corrects) / (querysz * task_num)
    return accs

class SubjectiveGradingModel(nn.Module):
def init(self, hidden_size=384):
super(SubjectiveGradingModel, self).init()

    # 加载预训练的BERT模型和分词器
    self.bert = AlbertModel.from_pretrained('src/datamoudle/model/albert_chinese_small')
    # 孪生网络
    self.siamese_network = Siamese(max_length=378, embedding_size=hidden_size)


def forward(self, input_data ,weights=None):
    # 将每个字典中的数据拆分成单独的列表
    input_ids_list = [item['input_ids'].squeeze(0).squeeze(0) for item in input_data]
    token_type_ids_list = [item['token_type_ids'].squeeze(0).squeeze(0) for item in input_data]
    attention_mask_list = [item['attention_mask'].squeeze(0).squeeze(0) for item in input_data]
    answer_input_ids_list = [item['answer_input_ids'].squeeze(0).squeeze(0) for item in input_data]
    answer_token_type_ids_list = [item['answer_token_type_ids'].squeeze(0).squeeze(0) for item in input_data]
    answer_attention_mask_list = [item['answer_attention_mask'].squeeze(0).squeeze(0) for item in input_data]

    # 转换成 PyTorch 张量
    input_ids = torch.stack(input_ids_list)
    token_type_ids = torch.stack(token_type_ids_list)
    attention_mask = torch.stack(attention_mask_list)
    answer_input_ids = torch.stack(answer_input_ids_list)
    answer_token_type_ids = torch.stack(answer_token_type_ids_list)
    answer_attention_mask = torch.stack(answer_attention_mask_list)


    outputs = self.bert(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)
    pooled_output = outputs.last_hidden_state
    cls_output = outputs.pooler_output
    outputs_answer = self.bert(input_ids=answer_input_ids, token_type_ids=answer_token_type_ids, attention_mask=answer_attention_mask)
    pooled_output_answer = outputs_answer.last_hidden_state
    cls_output_answer = outputs_answer.pooler_output

    siamese_output = self.siamese_network(pooled_output, pooled_output_answer, cls_output, cls_output_answer)

    return siamese_output

`
这会是什么原因?

@jay152forcreate
Copy link

兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化

2 similar comments
@jay152forcreate
Copy link

兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化

@jay152forcreate
Copy link

兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化

@shiyao1999
Copy link
Author

shiyao1999 commented Jun 16, 2024 via email

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants