尝试修改Bert模型为相对位置
Published on Aug. 22, 2023, 12:10 p.m.
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
修改默认Bert模型
transformers已经默认实现来多种位置编码方案,只不过bert模型模型使用了经典的绝对位置编码,我们这里尝试修改默认的位置编码,之后用来进行训练即可达到相对位置编码的效果。
- 修改为相对位置
- 修改多个token_type_embeddings
!pip install transformers
MODEL_NAME="uer/chinese_roberta_L-2_H-512"
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel,BertConfig
Config = BertConfig.from_pretrained(MODEL_NAME)
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertModel.from_pretrained(MODEL_NAME)
Downloading: 0%| | 0.00/466 [00:00, ?B/s]
Downloading: 0%| | 0.00/110k [00:00, ?B/s]
Downloading: 0%| | 0.00/112 [00:00, ?B/s]
Downloading: 0%| | 0.00/216 [00:00, ?B/s]
Downloading: 0%| | 0.00/70.7M [00:00, ?B/s]
Some weights of BertModel were not initialized from the model checkpoint at uer/chinese_roberta_L-2_H-512 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Config
BertConfig {
"architectures": [
"BertForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 512,
"initializer_range": 0.02,
"intermediate_size": 2048,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 8,
"num_hidden_layers": 2,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"transformers_version": "4.5.1",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 21128
}
Config.position_embedding_type="relative_key_query"
Config.type_vocab_size=100
# tokenizer
# model.embeddings.word_embeddings=nn.Embedding(tokenizer.vocab_size, 128, padding_idx=tokenizer.pad_token_id)
# 修改嵌入类型
# help(model.embeddings.token_type_embeddings)
model.embeddings.token_type_embeddings=nn.Embedding(100, model.embeddings.token_type_embeddings.embedding_dim, padding_idx=tokenizer.pad_token_id)
print("new",Config)
Config.save_pretrained("model")
tokenizer.save_pretrained("model")
PATH="model/pytorch_model.bin"
torch.save(model.state_dict(), PATH)
new BertConfig {
"architectures": [
"BertForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 512,
"initializer_range": 0.02,
"intermediate_size": 2048,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 8,
"num_hidden_layers": 2,
"pad_token_id": 0,
"position_embedding_type": "relative_key_query",
"transformers_version": "4.5.1",
"type_vocab_size": 100,
"use_cache": true,
"vocab_size": 21128
}