admin管理员组

文章数量:1279178

I have a RX 6600 XT and Ryzen 7 5700G and I am trying to run the deepseek r1 qweb 7b instill model from huggingface after downloading the model weights, the tokenizer and normal config and the weight's index. I am trying to make the code work and run it. Please explain why my code stops after model = Model(config) within my code:

import torch
from safetensors.torch import load_file
import torch.nn.functional as nnF
import torch.nn as nn
import json
from tokenizers import Tokenizer
import torch_directml

shard1 = load_file("model-00001-of-000002.safetensors")
shard2 = load_file("model-00002-of-000002.safetensors")

state_dict = {**shard1, **shard2}

with open("config.json", "r") as f:
    config = json.load(f)

with open("tokenizer.json", "r", encoding = "utf-8") as f:
    tokenizer_config = json.load(f)

class TransformerBlock(nn.Module):
    
    def __init__(self, embed_dim, num_heads, dropout = 0.1):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout = dropout)
        self.ln1 = nn.LayerNorm(embed_dim)
        ffn_dim = config.get("ffn_dim", 4 * embed_dim)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ffn_dim),
            nn.GELU(),
            nn.Linear(ffn_dim, embed_dim)
        )
        self.ln2 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x, attn_mask = None):
        attn_output, _ = self.attn(x, x, x, attn_mask = attn_mask)
        x = self.ln1(x + self.dropout(attn_output))
        ff_output = self.ffn(x)
        x = self.ln2(x + self.dropout(ff_output))
        return x
    
class Model(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.vocab_size = config.get("vocab_size", 50257)
        self.embed_dim = config.get("hidden_size", 4096)
        self.num_layers = config.get("num_hidden_layers", 32)
        self.num_heads = config.get("num_attention_heads", 16)
        self.max_seq_length = config.get("max_position_embeddings", 2048)
        dropout = config.get("dropout", 0.1)
        self.token_embed = nn.Embedding(self.vocab_size, self.embed_dim)
        self.pos_embed = nn.Parameter(torch.zeros(1, self.max_seq_length, self.embed_dim))
        self.blocks = nn.ModuleList([
            TransformerBlock(self.embed_dim, self.num_heads, dropout)
            for _ in range(self.num_layers)
        ])
        self.ln_final = nn.LayerNorm(self.embed_dim)
        self.head = nn.Linear(self.embed_dim, self.vocab_size, bias = False)
    
    def forward(self, input_ids, attn_mask = None):
        x = self.token_embed(input_ids)
        seq_length = input_ids.size(1)
        x = x + self.pos_embed[:, :seq_length, :]
        x = x.transpose(0, 1)
        for block in self.blocks:
            x = block(x, attn_mask)
        x = self.ln_final(0,1)
        logits = self.head(x)
        return logits
print("1")
model = Model(config)
print("2")
model.load_state_dict(state_dict)
print("3")
device = torch_directml.device()
print("4")
model.to(device)
print("5")
tokenizer = Tokenizer.from_file("tokenizer.json")
tokenizer.extra_config = tokenizer_config

def generate_text(prompt, max_length=50):
    encoding = tokenizer.encode(prompt)
    input_ids = torch.tensor([encoding.ids])
    print("Initial input_ids:", input_ids)
    with torch.no_grad():
        for i in range(max_length):
            outputs = model(input_ids)
            next_token_logits = outputs[0, -1, :]
            next_token_id = torch.argmax(next_token_logits).item()
            print(f"Iteration {i}: next_token_id: {next_token_id}")
            decoded_token = tokenizer.decode([next_token_id])
            print(f"Iteration {i}: decoded token: {decoded_token}")
            input_ids = torch.cat([input_ids, torch.tensor([[next_token_id]])], dim=1)
            if next_token_id == tokenizer.token_to_id("<eos>"):
                print("Encountered <eos> token, stopping generation.")
                break
    generated_text = tokenizer.decode(input_ids[0].tolist())
    print("Final generated text:", generated_text)
    return generated_text

print(generate_text("Once upon a time"))

I tried first swapping from cpu to gpu and it still didn't work, I haven't been able to get it to work anyways. (it prints out 1 and then jsut ends)

本文标签: debuggingIssue with running LLMStack Overflow