Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

TheBloke/Llama-2-7b does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack

As you can guess from the title, this is the error I get. I only changed the model in AutoModelForCausalLM, Older version was


model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)

However, since my GPU is NVIDIA GeForce RTX 2080 TI, it answers a simple question in 20 mins. Then I changed it to:


model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF",

model_file = "llama-2-7b-chat.q4_K_M.gguf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)

However, this is not working, and giving the error. Below is the full code, if it is needed to solve.

Before the full code: Also, I have the file "llama-2-7b.Q5_K_m.gguf" downloaded from HF in my local env, but not virtual env. I am not using this local file in the code, but saying if it helps.

from langchain.document_loaders import JSONLoader

from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter, RecursiveCharacterTextSplitter

from langchain.embeddings import HuggingFaceEmbeddings

from langchain.vectorstores import Chroma

from langchain import HuggingFacePipeline

from langchain.chains import ConversationalRetrievalChain

from langchain.memory import ConversationBufferMemory

from langchain.embeddings.openai import OpenAIEmbeddings

from langchain.embeddings.huggingface import HuggingFaceEmbeddings

from langchain.chat_models import ChatOpenAI

import os

import sys

import huggingface_hub

from huggingface_hub import notebook_login

import torch

import transformers

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from torch import cuda, bfloat16

import chromadb

from pathlib import Path

from pprint import pprint

import json

from loader import JSONLoader

from langchain.prompts.chat import PromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate

import json

from langchain.docstore.document import Document



def parse_json(json_data):

"""Parse JSON data into a Python dictionary."""

return json.loads(json_data)



def create_doc(json_data):

"""Create a Document object from JSON data."""

data = parse_json(json_data)

content_value = ""



# Collect values of keys that contain "item" in their name

for key, value in data.items():

if "item" in key.lower():

content_value += value + "\n"



return Document(page_content=content_value, metadata={"company": data["company"]})





##embed_model_id = 'BAAI/bge-base-en' ## CHANGE



embed_model_id = 'sentence-transformers/all-mpnet-base-v2'







device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' ## NVIDIA GeForce RTX 2080 TI



embed_model = HuggingFaceEmbeddings(

model_name=embed_model_id,

model_kwargs={'device': device},

encode_kwargs={'device': device, 'batch_size': 32}

)



docs = []





for file in os.listdir("lessdata"):

if file.endswith(".json"):

file_path = "./lessdata/"+file

with open(file_path) as file:

json_data = file.read()

document = create_doc(json_data)

docs.append(document)





document_splitter = RecursiveCharacterTextSplitter(separators=['\n'], chunk_size = 500, chunk_overlap = 100)

document_chunks = document_splitter.split_documents(docs)





vectordb = Chroma.from_documents(document_chunks,embedding=embed_model, persist_directory='./database')



##vectordb.persist()

'''

vectordb = Chroma.from_documents(document_chunks,embedding=embed_model, persist_directory='./database')

vectordb.persist('./database')





'''







### PLEASE DO NOT TOUCH THE VSCODE





tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_auth_token = True,)





model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF",

model_file = "llama-2-7b-chat.q4_K_M.gguf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)









'''

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)





'''







pipe = pipeline("text-generation",

model = model,

tokenizer = tokenizer,

device_map='auto',

max_new_tokens = 512,

min_new_tokens = 1,

top_k = 5) ##see it



## In vectorstore, take top 5 closest vectors-inputs-contexts, whatever you wanna call.



llm = HuggingFacePipeline(pipeline=pipe, model_kwargs= {'temperature':0.7})



memory = ConversationBufferMemory(memory_key="chat_history", input_key='question', output_key='answer', return_messages=True)



system_template = r"""

Given a context, use your knowledge and answer the question. Be flexible, and try everything to answer in the format asked by query.

----

{context}

----

"""





user_template = "Question:```{question}```"



messages = [

SystemMessagePromptTemplate.from_template(system_template),

HumanMessagePromptTemplate.from_template(user_template)

]





qa_prompt = ChatPromptTemplate.from_messages(messages)







jsonExpert = ConversationalRetrievalChain.from_llm(llm = llm,

retriever=vectordb.as_retriever(search_kwargs = {'k': 1}), ## whats it

verbose = True, memory = memory, combine_docs_chain_kwargs={'prompt': qa_prompt},

return_source_documents = True

)



##retriever returns 1 output object.



chat_history = []

query = "Consider the financials and progress of companies who is in the tech business."

result = jsonExpert({"question": query}, {"chat_history": chat_history})

#result = jsonExpert({"question": query})





sources = result["source_documents"][0]

print(result['answer'])

pprint(sources)

pprint(memory)
like image 201
rraven-v2 Avatar asked Oct 20 '25 00:10

rraven-v2


1 Answers

Basically you have to convert your downloaded weights to Hugging Face Transformers format using this python src/transformers/models/llama/convert_llama_weights_to_hf.py \ --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path

Then you would be able to get the config.json.

Source: https://huggingface.co/docs/transformers/main/en/model_doc/llama

like image 112
Siraparapu Dilip Avatar answered Oct 24 '25 15:10

Siraparapu Dilip



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!