词元id转嵌入向量

约 366 字大约 1 分钟

词元id转嵌入向量.py
import torch  

# 创建输入样本：包含4个token索引的张量
input_ids = torch.tensor([2, 3, 5, 1])  

# 定义模型参数
vocab_size = 6    # 词汇表大小
output_dim = 3    # 输出维度
max_seq_len = 4   # 最大序列长度 

# 固定随机种子
torch.manual_seed(123)  

# 初始化词嵌入层（Token Embedding）
embedding_layer = torch.nn.Embedding(vocab_size, output_dim)

# 新增位置嵌入层（Position Embedding）
position_emb_layer = torch.nn.Embedding(
    num_embeddings=max_seq_len,
    embedding_dim=output_dim
)

# 生成位置索引序列（0~3）
positions = torch.arange(max_seq_len)

# 组合双重嵌入
def embed_with_position(input_ids):
    # 词嵌入查询
    token_emb = embedding_layer(input_ids)  # (4,3)
    
    # 位置嵌入查询
    pos_emb = position_emb_layer(positions)  # (4,3)
    
    return token_emb + pos_emb

# ==== 打印所有中间结果 ====
# 原始词嵌入矩阵
print("词嵌入矩阵：")
print(embedding_layer.weight)
"""
词嵌入矩阵：
tensor([[ 0.3374, -0.1778, -0.1690],
        [ 0.9178,  1.5810,  1.3010],
        [ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-1.1589,  0.3255, -0.6315],
        [-2.8400, -0.7849, -1.4096]], requires_grad=True)
"""

# 纯词嵌入结果（无位置）
print("\n词元嵌入结果（原始语义向量）：")
token_vectors = embedding_layer(input_ids)
print(token_vectors)
"""
词元嵌入结果：
tensor([[ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-2.8400, -0.7849, -1.4096],
        [ 0.9178,  1.5810,  1.3010]], grad_fn=<EmbeddingBackward0>)
"""

# 位置嵌入矩阵
print("\n位置嵌入矩阵：")
print(position_emb_layer.weight)
"""
位置嵌入矩阵：
tensor([[-0.6307,  1.2340,  0.3127],
        [ 0.6972, -0.9950, -1.1476],
        [-0.9178,  0.9045, -2.0975],
        [ 1.1558, -1.2157,  0.1295]], requires_grad=True)
"""

# 组合嵌入结果
print("\n组合嵌入（词嵌入 + 位置嵌入）：")
print(embed_with_position(input_ids))
"""
组合嵌入结果：
tensor([[ 0.6446,  1.0331,  0.1521],  # 词嵌入[2] + 位置0
        [ 0.2957, -0.0285, -2.2958],  # 词嵌入[3] + 位置1
        [-3.7578,  0.1197, -3.5071],  # 词嵌入[5] + 位置2
        [ 2.0735,  0.3653,  1.4306]], # 词嵌入[1] + 位置3
        grad_fn=<AddBackward0>)
"""

更新日志

2025/6/26 11:30

查看所有更新日志

dfb81-update于 2025/6/26
dc6b2-update于 2025/5/23

版权所有

版权归属：NateHHX

许可证：署名 4.0 国际 (CC-BY-4.0)

1.八皇后问题

2.手写数字识别

3.MCP协议

4.RAG系统

5.从零构建大模型

数据准备与采样

自注意力机制

大模型架构

词元id转嵌入向量

更新日志

版权所有