admin管理员组文章数量:1279178
I have a RX 6600 XT and Ryzen 7 5700G and I am trying to run the deepseek r1 qweb 7b instill model from huggingface after downloading the model weights, the tokenizer and normal config and the weight's index. I am trying to make the code work and run it. Please explain why my code stops after model = Model(config) within my code:
import torch
from safetensors.torch import load_file
import torch.nn.functional as nnF
import torch.nn as nn
import json
from tokenizers import Tokenizer
import torch_directml
shard1 = load_file("model-00001-of-000002.safetensors")
shard2 = load_file("model-00002-of-000002.safetensors")
state_dict = {**shard1, **shard2}
with open("config.json", "r") as f:
config = json.load(f)
with open("tokenizer.json", "r", encoding = "utf-8") as f:
tokenizer_config = json.load(f)
class TransformerBlock(nn.Module):
def __init__(self, embed_dim, num_heads, dropout = 0.1):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout = dropout)
self.ln1 = nn.LayerNorm(embed_dim)
ffn_dim = config.get("ffn_dim", 4 * embed_dim)
self.ffn = nn.Sequential(
nn.Linear(embed_dim, ffn_dim),
nn.GELU(),
nn.Linear(ffn_dim, embed_dim)
)
self.ln2 = nn.LayerNorm(embed_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, x, attn_mask = None):
attn_output, _ = self.attn(x, x, x, attn_mask = attn_mask)
x = self.ln1(x + self.dropout(attn_output))
ff_output = self.ffn(x)
x = self.ln2(x + self.dropout(ff_output))
return x
class Model(nn.Module):
def __init__(self, config):
super().__init__()
self.vocab_size = config.get("vocab_size", 50257)
self.embed_dim = config.get("hidden_size", 4096)
self.num_layers = config.get("num_hidden_layers", 32)
self.num_heads = config.get("num_attention_heads", 16)
self.max_seq_length = config.get("max_position_embeddings", 2048)
dropout = config.get("dropout", 0.1)
self.token_embed = nn.Embedding(self.vocab_size, self.embed_dim)
self.pos_embed = nn.Parameter(torch.zeros(1, self.max_seq_length, self.embed_dim))
self.blocks = nn.ModuleList([
TransformerBlock(self.embed_dim, self.num_heads, dropout)
for _ in range(self.num_layers)
])
self.ln_final = nn.LayerNorm(self.embed_dim)
self.head = nn.Linear(self.embed_dim, self.vocab_size, bias = False)
def forward(self, input_ids, attn_mask = None):
x = self.token_embed(input_ids)
seq_length = input_ids.size(1)
x = x + self.pos_embed[:, :seq_length, :]
x = x.transpose(0, 1)
for block in self.blocks:
x = block(x, attn_mask)
x = self.ln_final(0,1)
logits = self.head(x)
return logits
print("1")
model = Model(config)
print("2")
model.load_state_dict(state_dict)
print("3")
device = torch_directml.device()
print("4")
model.to(device)
print("5")
tokenizer = Tokenizer.from_file("tokenizer.json")
tokenizer.extra_config = tokenizer_config
def generate_text(prompt, max_length=50):
encoding = tokenizer.encode(prompt)
input_ids = torch.tensor([encoding.ids])
print("Initial input_ids:", input_ids)
with torch.no_grad():
for i in range(max_length):
outputs = model(input_ids)
next_token_logits = outputs[0, -1, :]
next_token_id = torch.argmax(next_token_logits).item()
print(f"Iteration {i}: next_token_id: {next_token_id}")
decoded_token = tokenizer.decode([next_token_id])
print(f"Iteration {i}: decoded token: {decoded_token}")
input_ids = torch.cat([input_ids, torch.tensor([[next_token_id]])], dim=1)
if next_token_id == tokenizer.token_to_id("<eos>"):
print("Encountered <eos> token, stopping generation.")
break
generated_text = tokenizer.decode(input_ids[0].tolist())
print("Final generated text:", generated_text)
return generated_text
print(generate_text("Once upon a time"))
I tried first swapping from cpu to gpu and it still didn't work, I haven't been able to get it to work anyways. (it prints out 1 and then jsut ends)
本文标签: debuggingIssue with running LLMStack Overflow
版权声明:本文标题:debugging - Issue with running LLM - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1741258826a2367209.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论