TheBloke/Llama-2-7b does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack

Question

As you can guess from the title, this is the error I get. I only changed the model in AutoModelForCausalLM, Older version was


model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)

However, since my GPU is NVIDIA GeForce RTX 2080 TI, it answers a simple question in 20 mins. Then I changed it to:


model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF",

model_file = "llama-2-7b-chat.q4_K_M.gguf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)

However, this is not working, and giving the error. Below is the full code, if it is needed to solve.

Before the full code: Also, I have the file "llama-2-7b.Q5_K_m.gguf" downloaded from HF in my local env, but not virtual env. I am not using this local file in the code, but saying if it helps.

from langchain.document_loaders import JSONLoader

from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter, RecursiveCharacterTextSplitter

from langchain.embeddings import HuggingFaceEmbeddings

from langchain.vectorstores import Chroma

from langchain import HuggingFacePipeline

from langchain.chains import ConversationalRetrievalChain

from langchain.memory import ConversationBufferMemory

from langchain.embeddings.openai import OpenAIEmbeddings

from langchain.embeddings.huggingface import HuggingFaceEmbeddings

from langchain.chat_models import ChatOpenAI

import os

import sys

import huggingface_hub

from huggingface_hub import notebook_login

import torch

import transformers

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from torch import cuda, bfloat16

import chromadb

from pathlib import Path

from pprint import pprint

import json

from loader import JSONLoader

from langchain.prompts.chat import PromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate

import json

from langchain.docstore.document import Document



def parse_json(json_data):

"""Parse JSON data into a Python dictionary."""

return json.loads(json_data)



def create_doc(json_data):

"""Create a Document object from JSON data."""

data = parse_json(json_data)

content_value = ""



# Collect values of keys that contain "item" in their name

for key, value in data.items():

if "item" in key.lower():

content_value += value + "
"



return Document(page_content=content_value, metadata={"company": data["company"]})





##embed_model_id = 'BAAI/bge-base-en' ## CHANGE



embed_model_id = 'sentence-transformers/all-mpnet-base-v2'







device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' ## NVIDIA GeForce RTX 2080 TI



embed_model = HuggingFaceEmbeddings(

model_name=embed_model_id,

model_kwargs={'device': device},

encode_kwargs={'device': device, 'batch_size': 32}

)



docs = []





for file in os.listdir("lessdata"):

if file.endswith(".json"):

file_path = "./lessdata/"+file

with open(file_path) as file:

json_data = file.read()

document = create_doc(json_data)

docs.append(document)





document_splitter = RecursiveCharacterTextSplitter(separators=['
'], chunk_size = 500, chunk_overlap = 100)

document_chunks = document_splitter.split_documents(docs)





vectordb = Chroma.from_documents(document_chunks,embedding=embed_model, persist_directory='./database')



##vectordb.persist()

'''

vectordb = Chroma.from_documents(document_chunks,embedding=embed_model, persist_directory='./database')

vectordb.persist('./database')





'''







### PLEASE DO NOT TOUCH THE VSCODE





tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_auth_token = True,)





model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF",

model_file = "llama-2-7b-chat.q4_K_M.gguf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)









'''

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",

device_map ='auto',

torch_dtype = torch.float16,

use_auth_token = True)





'''







pipe = pipeline("text-generation",

model = model,

tokenizer = tokenizer,

device_map='auto',

max_new_tokens = 512,

min_new_tokens = 1,

top_k = 5) ##see it



## In vectorstore, take top 5 closest vectors-inputs-contexts, whatever you wanna call.



llm = HuggingFacePipeline(pipeline=pipe, model_kwargs= {'temperature':0.7})



memory = ConversationBufferMemory(memory_key="chat_history", input_key='question', output_key='answer', return_messages=True)



system_template = r"""

Given a context, use your knowledge and answer the question. Be flexible, and try everything to answer in the format asked by query.

----

{context}

----

"""





user_template = "Question:```{question}```"



messages = [

SystemMessagePromptTemplate.from_template(system_template),

HumanMessagePromptTemplate.from_template(user_template)

]





qa_prompt = ChatPromptTemplate.from_messages(messages)







jsonExpert = ConversationalRetrievalChain.from_llm(llm = llm,

retriever=vectordb.as_retriever(search_kwargs = {'k': 1}), ## whats it

verbose = True, memory = memory, combine_docs_chain_kwargs={'prompt': qa_prompt},

return_source_documents = True

)



##retriever returns 1 output object.



chat_history = []

query = "Consider the financials and progress of companies who is in the tech business."

result = jsonExpert({"question": query}, {"chat_history": chat_history})

#result = jsonExpert({"question": query})





sources = result["source_documents"][0]

print(result['answer'])

pprint(sources)

pprint(memory)

Siraparapu Dilip · Accepted Answer

Basically you have to convert your downloaded weights to Hugging Face Transformers format using this python src/transformers/models/llama/convert_llama_weights_to_hf.py \ --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path

Then you would be able to get the config.json.

Source: https://huggingface.co/docs/transformers/main/en/model_doc/llama

Then you would be able to get the config.json.

Source: https://huggingface.co/docs/transformers/main/en/model_doc/llama

TheBloke/Llama-2-7b does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack

Tags:

large-language-model

huggingface

llama

rraven-v2

1 Answers

Siraparapu Dilip

Recent Activity

Donate For Us

TheBloke/Llama-2-7b does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack

Tags:

large-language-model

huggingface

llama

rraven-v2

1 Answers

Siraparapu Dilip

Related questions

Recent Activity

Donate For Us