尝试修改Bert模型为相对位置

Published on Aug. 22, 2023, 12:10 p.m.

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

修改默认Bert模型

transformers已经默认实现来多种位置编码方案,只不过bert模型模型使用了经典的绝对位置编码,我们这里尝试修改默认的位置编码,之后用来进行训练即可达到相对位置编码的效果。

  1. 修改为相对位置
  2. 修改多个token_type_embeddings
!pip install transformers
MODEL_NAME="uer/chinese_roberta_L-2_H-512"
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel,BertConfig

Config = BertConfig.from_pretrained(MODEL_NAME)
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertModel.from_pretrained(MODEL_NAME)
Downloading:   0%|          | 0.00/466 [00:00, ?B/s]

Downloading:   0%|          | 0.00/110k [00:00, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00, ?B/s]

Downloading:   0%|          | 0.00/216 [00:00, ?B/s]

Downloading:   0%|          | 0.00/70.7M [00:00, ?B/s]

Some weights of BertModel were not initialized from the model checkpoint at uer/chinese_roberta_L-2_H-512 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Config
BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 512,
  "initializer_range": 0.02,
  "intermediate_size": 2048,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 8,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.5.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}
Config.position_embedding_type="relative_key_query"

Config.type_vocab_size=100
# tokenizer
# model.embeddings.word_embeddings=nn.Embedding(tokenizer.vocab_size, 128, padding_idx=tokenizer.pad_token_id)
# 修改嵌入类型
# help(model.embeddings.token_type_embeddings)

model.embeddings.token_type_embeddings=nn.Embedding(100, model.embeddings.token_type_embeddings.embedding_dim, padding_idx=tokenizer.pad_token_id)

print("new",Config)

Config.save_pretrained("model")
tokenizer.save_pretrained("model")
PATH="model/pytorch_model.bin"
torch.save(model.state_dict(), PATH)
new BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 512,
  "initializer_range": 0.02,
  "intermediate_size": 2048,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 8,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "relative_key_query",
  "transformers_version": "4.5.1",
  "type_vocab_size": 100,
  "use_cache": true,
  "vocab_size": 21128
}

Tags: