Exploring AI-ML-NLP: 2024

Saturday, July 13, 2024

Finetune Large Language Models with DoRA (Weight-Decomposed Low-Rank Adaptation)

Introduction.

The DoRA (Weight-Decomposed Low-Rank Adaptation) algorithm offers an advanced approach to fine-tuning Large Language Models (LLMs) by decomposing weight matrices into magnitude and direction components. Traditional methods like Low-Rank Adaptation (LoRA) improve parameter efficiency but often face performance and stability trade-offs. DoRA addresses these issues by leveraging the Frobenius norm to separate the weight matrix into a stable magnitude and a fine-tuned direction. This decomposition ensures efficient learning while maintaining model expressiveness and stability. Key advantages of DoRA include enhanced parameter efficiency, improved generalization, faster adaptation to new tasks, and minimal inference overhead.

Key Points:

Decomposes weights into magnitude and direction.
Enhances parameter efficiency without compromising performance.
Improves training stability and generalization.
Facilitates faster adaptation to new tasks.
Maintains efficient inference with minimal overhead.

Video Tutorial.

Code: Finetune Large Language Models with DoRA (Train).

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
import transformers
# pip install peft
# Sample QA Data
data = {
    'question': [
        "What is the capital of France?",
        "Who painted the Mona Lisa?",
        "What is the tallest mountain in the world?",
        "When did World War II end?",
        "Who wrote the play 'Romeo and Juliet'?",
        "What is the chemical symbol for gold?"
    ],
    'context': [
        "Paris is the capital and most populous city of France.",
        "The Mona Lisa is a half-length portrait painting by Italian Renaissance artist Leonardo da Vinci.",
        "Mount Everest is Earth's highest mountain above sea level, located in the Mahalangur Himal sub-range of the Himalayas.",
        "World War II (WWII or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945.",
        "Romeo and Juliet is a tragedy written by William Shakespeare early in his career about two young star-crossed lovers whose deaths ultimately reconcile their feuding families.",
        "Gold is a chemical element with the symbol Au and atomic number 79. In its purest form, it is a bright, slightly reddish yellow, dense, soft, malleable, and ductile metal."
    ],
    'answer': [
        "Paris",
        "Leonardo da Vinci",
        "Mount Everest",
        "1945",
        "William Shakespeare",
        "Au"
    ]
}
dataset = Dataset.from_dict(data)

# Load Llama Model and Tokenizer
tokenizer = AutoTokenizer.from_pretrained("D:\\OLLAMA_MODELS\\meta-llama\\Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("D:\\OLLAMA_MODELS\\meta-llama\\Meta-Llama-3-8B-Instruct")
# Ensure padding token is set
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Configure LoRA
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    use_dora =True
)

# Create PEFT Model
model = get_peft_model(model, peft_config)

# Preprocess Data
def generate_prompt(data_point):
    return f"""[INST] {data_point["question"]} [/INST] {data_point["context"]} {data_point["answer"]} [/INST]"""

dataset = dataset.map(lambda data_point: {"text": generate_prompt(data_point)})

# Tokenize Data
def tokenize(prompt):
    result = tokenizer(prompt["text"])
    return {
        "input_ids": result["input_ids"],
        "attention_mask": result["attention_mask"],
    }
tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=dataset.column_names)

# Training Arguments (Optimized for CPU)
training_args = TrainingArguments(
    per_device_train_batch_size=1,  # Very small batch size for CPU
    gradient_accumulation_steps=8,  # Accumulate gradients over multiple steps
    num_train_epochs=3,
    learning_rate=1e-4,  # Smaller learning rate for CPU
    logging_steps=10,
    output_dir="./llama-3-finetuned-qa-cpu",
)

# Create Trainer
trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# Fine-tune!
model.config.use_cache = False
trainer.train()

# Save the Fine-tuned Model
model.save_pretrained("./llama-3-finetuned-qa-cpu")

Code: Finetune Large Language Models with DoRA (Test).

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

# Load Fine-Tuned Model and Tokenizer
model_path = "E:\\Niraj_Work\\DL_Projects\\llm_projects\\llm_advance_1\\llama-3-finetuned-qa-cpu"  # Path to your saved model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

# Ensure Model is on CPU
device = torch.device("cpu")
model.to(device)
if tokenizer.pad_token is None:
    # tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.pad_token = tokenizer.eos_token
# Load Your Question-Answering Dataset (Replace with your dataset)
# Assuming you have a list of dictionaries, each with 'question', 'context', and 'answer' keys
eval_data = [
    {"question": "What is the capital of France?", "context": "Paris is the capital and most populous city of France.", "answer": "Paris"},
    {"question": "Who painted the Mona Lisa?", "context": "The Mona Lisa is a half-length portrait painting by Italian Renaissance artist Leonardo da Vinci.", "answer": "Leonardo da Vinci"},
]

# Function to generate the prompt
def generate_prompt(data_point):
    return f"""[INST] {data_point["question"]} [/INST] {data_point["context"]} {data_point["answer"]} [/INST]"""


# Test the Model
for data_point in eval_data:
    input_text = generate_prompt(data_point)
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)  # Move input to CPU

    # Generate Answer
    generation_output = model.generate(
        input_ids=input_ids,
        max_new_tokens=50,  # Adjust as needed
        num_beams=1,  # You can try increasing num_beams if you have enough memory
        early_stopping=True,
    )

    # Extract and Print Answer
    generated_answer = tokenizer.decode(generation_output[0])
    print(f"Question: {data_point['question']}")
    print(f"Generated Answer: {generated_answer.split('[/INST]')[-2].strip()}")
    print(f"Actual Answer: {data_point['answer']}")

Reference.

Liu, Shih-Yang, Chien-Yi Wang, Hongxu Yin, Pavlo Molchanov, Yu-Chiang Frank Wang, Kwang-Ting Cheng, and Min-Hung Chen. "Dora: Weight-decomposed low-rank adaptation." arXiv preprint arXiv:2402.09353 (2024).
https://huggingface.co/papers/2402.09353
https://www.nirajai.com/home/llm

Saturday, June 29, 2024

LoRA, QLoRA and Fine-tuning large language models (LLMs)

Introduction.

Fine-tuning large language models (LLMs) is a common practice to adapt them for specific tasks, but it can be computationally expensive.

LoRA (Low-Rank Adaptation) is a technique that makes this process more efficient by introducing small adapter modules to the model. These adapters capture task-specific knowledge without modifying the original model's parameters, significantly reducing the number of trainable parameters.

QLoRA (Quantized LoRA) takes this further by combining LoRA with quantization, a process that reduces the precision of the model's weights. This decreases the model's memory footprint, making it possible to fine-tune LLMs on consumer-grade hardware. Both LoRA and QLoRA offer a powerful way to customize large language models for specific use cases while minimizing computational requirements and ensuring efficient model deployment.

Video Tutorial on LoRA.

Video Tutorial on QLoRA.

Video Tutorial on Fine-tuning Large Language Model (LLMs).

Fine-tuning Large Language Model (LLMs) - Without LORA/QLORA

Webpage Link: https://www.quantacosmos.com/2024/06/fine-tune-pretrained-large-language.html

Code: Fine-tuning Large Language Model (LLMs)- with LoRA (Training).

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset
import transformers
# pip install peft
# Sample QA Data
data = {
    'question': [
        "What is the capital of France?",
        "Who painted the Mona Lisa?",
        "What is the tallest mountain in the world?",
        "When did World War II end?",
        "Who wrote the play 'Romeo and Juliet'?",
        "What is the chemical symbol for gold?"
    ],
    'context': [
        "Paris is the capital and most populous city of France.",
        "The Mona Lisa is a half-length portrait painting by Italian Renaissance artist Leonardo da Vinci.",
        "Mount Everest is Earth's highest mountain above sea level, located in the Mahalangur Himal sub-range of the Himalayas.",
        "World War II (WWII or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945.",
        "Romeo and Juliet is a tragedy written by William Shakespeare early in his career about two young star-crossed lovers whose deaths ultimately reconcile their feuding families.",
        "Gold is a chemical element with the symbol Au and atomic number 79. In its purest form, it is a bright, slightly reddish yellow, dense, soft, malleable, and ductile metal."
    ],
    'answer': [
        "Paris",
        "Leonardo da Vinci",
        "Mount Everest",
        "1945",
        "William Shakespeare",
        "Au"
    ]
}
dataset = Dataset.from_dict(data)

# Load Llama Model and Tokenizer
tokenizer = AutoTokenizer.from_pretrained("D:\\OLLAMA_MODELS\\meta-llama\\Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("D:\\OLLAMA_MODELS\\meta-llama\\Meta-Llama-3-8B-Instruct")
# Ensure padding token is set
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Configure LoRA
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Create PEFT Model
model = get_peft_model(model, peft_config)

# Preprocess Data
def generate_prompt(data_point):
    return f"""[INST] {data_point["question"]} [/INST] {data_point["context"]} {data_point["answer"]} [/INST]"""

dataset = dataset.map(lambda data_point: {"text": generate_prompt(data_point)})

# Tokenize Data
def tokenize(prompt):
    result = tokenizer(prompt["text"])
    return {
        "input_ids": result["input_ids"],
        "attention_mask": result["attention_mask"],
    }
tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=dataset.column_names)

# Training Arguments (Optimized for CPU)
training_args = TrainingArguments(
    per_device_train_batch_size=1,  # Very small batch size for CPU
    gradient_accumulation_steps=8,  # Accumulate gradients over multiple steps
    num_train_epochs=3,
    learning_rate=1e-4,  # Smaller learning rate for CPU
    logging_steps=10,
    output_dir="./llama-3-finetuned-qa-cpu",
)

# Create Trainer
trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# Fine-tune!
model.config.use_cache = False
trainer.train()

# Save the Fine-tuned Model
model.save_pretrained("./llama-3-finetuned-qa-cpu")

Code: Fine-tuning Large Language Model (LLMs)- with LoRA (Test)

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

# Load Fine-Tuned Model and Tokenizer
model_path = "E:\\Niraj_Work\\DL_Projects\\llm_projects\\llm_advance_1\\llama-3-finetuned-qa-cpu"  # Path to your saved model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

# Ensure Model is on CPU
device = torch.device("cpu")
model.to(device)
if tokenizer.pad_token is None:
    # tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.pad_token = tokenizer.eos_token
# Load Your Question-Answering Dataset (Replace with your dataset)
# Assuming you have a list of dictionaries, each with 'question', 'context', and 'answer' keys
eval_data = [
    {"question": "What is the capital of France?", "context": "Paris is the capital and most populous city of France.", "answer": "Paris"},
    {"question": "Who painted the Mona Lisa?", "context": "The Mona Lisa is a half-length portrait painting by Italian Renaissance artist Leonardo da Vinci.", "answer": "Leonardo da Vinci"},
]

# Function to generate the prompt
def generate_prompt(data_point):
    return f"""[INST] {data_point["question"]} [/INST] {data_point["context"]} {data_point["answer"]} [/INST]"""


# Test the Model
for data_point in eval_data:
    input_text = generate_prompt(data_point)
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)  # Move input to CPU

    # Generate Answer
    generation_output = model.generate(
        input_ids=input_ids,
        max_new_tokens=50,  # Adjust as needed
        num_beams=1,  # You can try increasing num_beams if you have enough memory
        early_stopping=True,
    )

    # Extract and Print Answer
    generated_answer = tokenizer.decode(generation_output[0])
    print(f"Question: {data_point['question']}")
    print(f"Generated Answer: {generated_answer.split('[/INST]')[-2].strip()}")
    print(f"Actual Answer: {data_point['answer']}")

Reference.

Hu, Edward J., Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. "Lora: Low-rank adaptation of large language models." arXiv preprint arXiv:2106.09685 (2021).
Dettmers, Tim, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. "Qlora: Efficient finetuning of quantized llms." Advances in Neural Information Processing Systems 36 (2024).

Sunday, June 23, 2024

Fine Tune Pretrained Large Language Models for Local use

Fine Tuning Pretrained LLM's

Fine-tuning is a technique used to improve the performance of pre-trained Large Language Models (LLMs) on specific tasks or domains. By fine-tuning, the LLM learns the nuances and specific things related to our domain, improving its performance on that particular task.

Benefits of Fine-Tuning:

Increased Accuracy and Relevance: The LLM becomes more accurate and relevant to your specific needs by specializing in a particular domain.
Efficiency: Fine-tuning a pre-trained model is often faster and requires less computational power compared to training a new LLM from scratch.
Leveraging Existing Knowledge: The LLM retains its general language understanding from pre-training, which serves as a foundation for domain-specific learning.

Video Tutorial on Fine Tuning Pretrained Large Language Models.

Training: Fine Tuning Pretrained Large Language Models

import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
import torch

# Sample data creation
data = {
    "text": [
        "I loved this movie! The acting was great and the story was gripping.",
        "This was a terrible movie. The plot was predictable and the characters were uninteresting.",
        "The film was fantastic! I highly recommend it.",
        "I did not enjoy this movie at all. It was too slow and boring.",
        "The special effects were amazing, but the storyline was weak.",
        "An excellent film with a touching story.",
        "The movie was a masterpiece with brilliant performances.",
        "I found the movie to be dull and uninspiring.",
        "The direction and cinematography were outstanding.",
        "The movie was overly long and felt drawn out.",
        "An absolutely thrilling and engaging film.",
        "The characters were flat and the dialogue was poor.",
        "A wonderful film experience with a powerful message.",
        "The plot was confusing and hard to follow.",
        "One of the best movies I have seen this year!",
        "The acting was subpar and the story lacked depth.",
        "A heartwarming tale with excellent performances.",
        "The movie was full of clichés and very predictable.",
        "An emotional rollercoaster with a satisfying ending.",
        "I couldn't connect with the characters or the story.",
    ],
    "sentiment": [
        "positive", "negative", "positive", "negative", "negative",
        "positive", "positive", "negative", "positive", "negative",
        "positive", "negative", "positive", "negative", "positive",
        "negative", "positive", "negative", "positive", "negative"
    ]
}

df = pd.DataFrame(data)
train_df, val_df = df[:int(len(df)*0.8)], df[int(len(df)*0.8):]

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

model_name = "E:\\Niraj_Work\\LLM_Models\\Meta-Llama-3-8B-Instruct"  # Replace with the actual LLaMA3 model if available
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure padding token is set
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

# Mapping sentiment to numerical labels
def map_labels(example):
    example['label'] = 1 if example['sentiment'] == 'positive' else 0
    return example

tokenized_train = tokenized_train.map(map_labels)
tokenized_val = tokenized_val.map(map_labels)

# Remove the original 'sentiment' column
tokenized_train = tokenized_train.remove_columns(['sentiment'])
tokenized_val = tokenized_val.remove_columns(['sentiment'])

# Update model config if padding token is added
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model.resize_token_embeddings(len(tokenizer))

# Define Data Collator with Padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Training arguments
training_args = TrainingArguments(
    output_dir='E:\\Niraj_Work\\LLM_Models\\Meta-Llama-3-8B-Instruct_updated',
    evaluation_strategy='epoch',
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Fine-tune the model
trainer.train()

# Save the model and tokenizer
# model.save_pretrained('./fine-tuned-llama')
# tokenizer.save_pretrained('./fine-tuned-llama')

Test: Fine Tuning Pretrained Large Language Models

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd

# Load the fine-tuned model and tokenizer
model_name = 'E:\\Niraj_Work\\LLM_Models\\Meta-Llama-3-8B-Instruct_updated'  # Directory where the model and tokenizer are saved
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Sample test data
test_data = {
    "text": [
        "This movie was fantastic! I really enjoyed it.",
        "I didn't like this movie at all. It was very boring.",
        # Add more test examples as needed
    ]
}
# Create a DataFrame for test data
test_df = pd.DataFrame(test_data)
# Ensure padding token is set
if tokenizer.pad_token is None:
    # tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.pad_token = tokenizer.eos_token
# Tokenize the test data
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128, return_tensors='pt')

tokenized_test = test_df['text'].apply(lambda x: tokenize_function({'text': [x]}))

# Predict function
def predict(text_list):
    model.eval()
    predictions = []
    with torch.no_grad():
        for text in text_list:
            inputs = tokenize_function({'text': [text]})
            input_ids = inputs['input_ids'].to(model.device)
            attention_mask = inputs['attention_mask'].to(model.device)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predicted_class_id = torch.argmax(logits, dim=1).item()
            predictions.append(predicted_class_id)
    return predictions

# Convert the predictions to sentiment labels
sentiment_map = {0: 'negative', 1: 'positive'}
predicted_labels = predict(test_data['text'])

# Map numerical labels to sentiment labels
predicted_sentiments = [sentiment_map[label] for label in predicted_labels]

# Display the results
for text, sentiment in zip(test_data['text'], predicted_sentiments):
    print(f'Text: {text}\nPredicted Sentiment: {sentiment}\n')

Test Output:

C:\Users\admin\AppData\Local\Programs\Python\Python310\python.exe E:/Niraj_Work/DL_Projects/llm_projects/llm_advance_1/test_abcd.py
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [18:59<00:00, 284.85s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at E:\Niraj_Work\LLM_Models\Meta-Llama-3-8B-Instruct_updated and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Text: This movie was fantastic! I really enjoyed it.
Predicted Sentiment: positive

Text: I didn't like this movie at all. It was very boring.
Predicted Sentiment: negative


Process finished with exit code 0

Sunday, June 16, 2024

One-Shot LLM + RAG with Knowledge Graph

Background.

One-Shot.

One-shot learning refers to the model's ability to perform a task by seeing only one example. The model uses this single example as a reference to understand the task better and generate the appropriate response.

Application in LLM RAG.

Single Example Provided: The prompt includes a specific example that demonstrates how the task should be performed.
Learning from Example: The model uses this single example to learn how to apply the task's requirements to new inputs.

Video Tutorial.

Code.

import ollama
import chromadb

documents = [
  "Bears are carnivoran mammals of the family Ursidae.",
  "They are classified as caniforms, or doglike carnivorans.",
  "Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere.",
  "Bears are found on the continents of North America, South America, and Eurasia.",
  "Common characteristics of modern bears include large bodies with stocky legs, long snouts, small rounded ears, shaggy hair, plantigrade paws with five nonretractile claws, and short tails.",
  "With the exception of courting individuals and mothers with their young, bears are typically solitary animals.",
  "They may be diurnal or nocturnal and have an excellent sense of smell.",
  "Despite their heavy build and awkward gait, they are adept runners, climbers, and swimmers.",
  "Bears use shelters, such as caves and logs, as their dens; most species occupy their dens during the winter for a long period of hibernation, up to 100 days.",
]
Kg_triplets = [["Bears", "are", "carnivoran mammals"],
["Bears", "belong to", "family Ursidae"],
["Bears", "are classified as", "caniforms"],
["Caniforms", "are", "doglike carnivorans"],
["Bears", "have", "eight species"],
["Bears", "are", "widespread"],
["Bears", "appear in", "a wide variety of habitats"],
["Bears", "are found in", "Northern Hemisphere"],
["Bears", "are partially found in", "Southern Hemisphere"],
["Bears", "are found on", "North America"],
["Bears", "are found on", "South America"],
["Bears", "are found on", "Eurasia"],
["Modern bears", "have", "large bodies"],
["Modern bears", "have", "stocky legs"],
["Modern bears", "have", "long snouts"],
["Modern bears", "have", "small rounded ears"],
["Modern bears", "have", "shaggy hair"],
["Modern bears", "have", "plantigrade paws with five nonretractile claws"],
["Modern bears", "have", "short tails"],
["Bears", "are typically", "solitary animals"],
["Bears", "can be", "diurnal"],
["Bears", "can be", "nocturnal"],
["Bears", "have", "an excellent sense of smell"],
["Bears", "are", "adept runners"],
["Bears", "are", "adept climbers"],
["Bears", "are", "adept swimmers"],
["Bears", "use", "shelters"],
["Shelters", "include", "caves"],
["Shelters", "include", "logs"],
["Bears", "use", "shelters as dens"],
["Most species", "occupy", "dens during winter"],
["Bears", "hibernate for", "up to 100 days"],]
# Convert the triplets to text
def triplet_to_text(triplet):
  txt = str(triplet[0]) +" "+str(triplet[1]) +" "+str(triplet[2])
  # print(txt)
  return txt
# triplet_texts = [triplet_to_text(triplet) for triplet in Kg_triplets]
# Create database
client = chromadb.PersistentClient(path="E:\\Niraj_Work\\DL_Projects\\llm_projects\\database_tmp")
collection = client.create_collection(name="bear_kg_one_shot")
metadata = {"hnsw:space":"cosine"}
# store each document in a vector embedding database

for d in range(0,len(Kg_triplets)):
  triplet_txt = triplet_to_text(Kg_triplets[d])
  response = ollama.embeddings(model="mxbai-embed-large", prompt=triplet_txt)
  embedding = response["embedding"]
  collection.add(
    ids=[str(d)],
    embeddings=[embedding],
    documents=[triplet_txt]
  )

# an example prompt
prompt = "How Polar bear's body looks?"
one_shot_context = "The polar bear is a large bear native to the Arctic and nearby areas."
augmented_text = one_shot_context+"\n"+prompt
# generate an embedding for the prompt and retrieve the most relevant doc
response = ollama.embeddings(
  prompt=augmented_text,
  model="mxbai-embed-large"
)
results = collection.query(
  query_embeddings=[response["embedding"]],
  n_results=4
)
print("result = ",results)
# print(collection.get(include=['embeddings','documents','metadatas']))

# data = results['documents'][0][0]
data = ""
supported_docs = results['documents']
if len(supported_docs)==1:
  data = results['documents'][0][0]
else:
  for i in range(0, len(supported_docs)):
    data = data+" "+str(supported_docs[i])
    data = data.strip()
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3",
  prompt=f"Using this data: {data}. Respond to this prompt: {augmented_text}"
)

print(output['response'])

Zero-Shot LLM-RAG With Knowledge Graph

Background.

Zero-Shot.

Zero-shot learning refers to the model's ability to perform a task without having seen any explicit examples of that task during training or in the prompt. Instead, the model relies on its pre-existing knowledge and understanding of language to generate the desired output.

Application in LLM RAG.

No Examples Provided: When a task is given to the model, it receives only the task description or query without any specific examples to guide its response.
Generalization: The model generalizes from the task description to understand what is required and generates a response based on its training data.

Video Tutorial.

Working Code:

import ollama
import chromadb

documents = [
  "Bears are carnivoran mammals of the family Ursidae.",
  "They are classified as caniforms, or doglike carnivorans.",
  "Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere.",
  "Bears are found on the continents of North America, South America, and Eurasia.",
  "Common characteristics of modern bears include large bodies with stocky legs, long snouts, small rounded ears, shaggy hair, plantigrade paws with five nonretractile claws, and short tails.",
  "With the exception of courting individuals and mothers with their young, bears are typically solitary animals.",
  "They may be diurnal or nocturnal and have an excellent sense of smell.",
  "Despite their heavy build and awkward gait, they are adept runners, climbers, and swimmers.",
  "Bears use shelters, such as caves and logs, as their dens; most species occupy their dens during the winter for a long period of hibernation, up to 100 days.",
]
Kg_triplets = [["Bears", "are", "carnivoran mammals"],
["Bears", "belong to", "family Ursidae"],
["Bears", "are classified as", "caniforms"],
["Caniforms", "are", "doglike carnivorans"],
["Bears", "have", "eight species"],
["Bears", "are", "widespread"],
["Bears", "appear in", "a wide variety of habitats"],
["Bears", "are found in", "Northern Hemisphere"],
["Bears", "are partially found in", "Southern Hemisphere"],
["Bears", "are found on", "North America"],
["Bears", "are found on", "South America"],
["Bears", "are found on", "Eurasia"],
["Modern bears", "have", "large bodies"],
["Modern bears", "have", "stocky legs"],
["Modern bears", "have", "long snouts"],
["Modern bears", "have", "small rounded ears"],
["Modern bears", "have", "shaggy hair"],
["Modern bears", "have", "plantigrade paws with five nonretractile claws"],
["Modern bears", "have", "short tails"],
["Bears", "are typically", "solitary animals"],
["Bears", "can be", "diurnal"],
["Bears", "can be", "nocturnal"],
["Bears", "have", "an excellent sense of smell"],
["Bears", "are", "adept runners"],
["Bears", "are", "adept climbers"],
["Bears", "are", "adept swimmers"],
["Bears", "use", "shelters"],
["Shelters", "include", "caves"],
["Shelters", "include", "logs"],
["Bears", "use", "shelters as dens"],
["Most species", "occupy", "dens during winter"],
["Bears", "hibernate for", "up to 100 days"],]
# Convert the triplets to text
def triplet_to_text(triplet):
  txt = str(triplet[0]) +" "+str(triplet[1]) +" "+str(triplet[2])
  # print(txt)
  return txt
# triplet_texts = [triplet_to_text(triplet) for triplet in Kg_triplets]
# Create database
client = chromadb.PersistentClient(path="E:\\Niraj_Work\\DL_Projects\\llm_projects\\database_tmp")
collection = client.create_collection(name="bear_kg")
metadata = {"hnsw:space":"cosine"}
# store each document in a vector embedding database

for d in range(0,len(Kg_triplets)):
  triplet_txt = triplet_to_text(Kg_triplets[d])
  response = ollama.embeddings(model="mxbai-embed-large", prompt=triplet_txt)
  embedding = response["embedding"]
  collection.add(
    ids=[str(d)],
    embeddings=[embedding],
    documents=[triplet_txt]
  )

# an example prompt
prompt = "How does the bear's body looks?"

# generate an embedding for the prompt and retrieve the most relevant doc
response = ollama.embeddings(
  prompt=prompt,
  model="mxbai-embed-large"
)
results = collection.query(
  query_embeddings=[response["embedding"]],
  n_results=3
)
print("result = ",results)
print(collection.get(include=['embeddings','documents','metadatas']))

# data = results['documents'][0][0]
data = ""
supported_docs = results['documents']
if len(supported_docs)==1:
  data = results['documents'][0][0]
else:
  for i in range(0, len(supported_docs)):
    data = data+" "+str(supported_docs[i])
    data = data.strip()
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3",
  prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
)

print(output['response'])

Knowledge Hyper Graph with LLM-RAG

Background.

Knowledge Hyper Graph.

A knowledge hypergraph generalizes the concept of a knowledge graph. Instead of triplets, relationships can involve multiple entities (not limited to three). This allows for more complex relationships.

Knowledge Hypergraph Triplets.

In a hypergraph, a single relationship can involve more than three entities.

(Bears, are, Carnivoran mammals, Family Ursidae)

(Bears, classified as, Caniforms, Doglike carnivorans)

(Eight species of bears, are, Extant, Widespread, Northern Hemisphere, Southern Hemisphere)

(Bears, found on, Continents, North America, South America, Eurasia)

(Modern bears, have characteristics, Large bodies, Stocky legs, Long snouts, Small rounded ears, Shaggy hair, Plantigrade paws, Five nonretractile claws, Short tails)

Video Tutorial.

Main Code: Knowledge Hyper Graph with LLM-RAG.

import ollama
import chromadb

documents = [
  "Bears are carnivoran mammals of the family Ursidae.",
  "They are classified as caniforms, or doglike carnivorans.",
  "Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere.",
  "Bears are found on the continents of North America, South America, and Eurasia.",
  "Common characteristics of modern bears include large bodies with stocky legs, long snouts, small rounded ears, shaggy hair, plantigrade paws with five nonretractile claws, and short tails.",
  "With the exception of courting individuals and mothers with their young, bears are typically solitary animals.",
  "They may be diurnal or nocturnal and have an excellent sense of smell.",
  "Despite their heavy build and awkward gait, they are adept runners, climbers, and swimmers.",
  "Bears use shelters, such as caves and logs, as their dens; most species occupy their dens during the winter for a long period of hibernation, up to 100 days.",
]
Kg_triplets = [["Bears", "belong to", "Family Ursidae"],
["Bears", "classified as", "Caniforms"],
["Bears", "number of species", "Eight"],
["Bears", "habitat", "Northern Hemisphere"],
["Bears", "habitat", "Southern Hemisphere"],
["Bears", "found in", "North America"],
["Bears", "found in", "South America"],
["Bears", "found in", "Eurasia"],
["Modern bears", "characteristic", "Large bodies"],
["Modern bears", "characteristic", "Stocky legs"],
["Modern bears", "characteristic", "Long snouts"],
["Modern bears", "characteristic", "Small rounded ears"],
["Modern bears", "characteristic", "Shaggy hair"],
["Modern bears", "characteristic", "Plantigrade paws with five nonretractile claws"],
["Modern bears", "characteristic", "Short tails"],
["Bears", "social behavior", "Solitary except courting and mothers with young"],
["Bears", "activity pattern", "Diurnal"],
["Bears", "activity pattern", "Nocturnal"],
["Bears", "sense", "Excellent smell"],
["Bears", "capability", "Adept runners"],
["Bears", "capability", "Adept climbers"],
["Bears", "capability", "Adept swimmers"],
["Bears", "use", "Shelters such as caves and logs"],
["Bears", "denning behavior", "Winter hibernation for up to 100 days"],]
# Convert the triplets to text
def triplet_to_text(triplet):
  txt = str(triplet[0]) +" "+str(triplet[1]) +" "+str(triplet[2])
  # print(txt)
  return txt
triplet_texts = [triplet_to_text(triplet) for triplet in Kg_triplets]
# Create database
client = chromadb.PersistentClient(path="E:\\Niraj_Work\\DL_Projects\\llm_projects\\database_tmp")
collection = client.create_collection(name="bear_hkg")
metadata = {"hnsw:space":"cosine"}
# store each document in a vector embedding database

for d in range(0,len(Kg_triplets)):
  triplet_txt = triplet_to_text(Kg_triplets[d])
  response = ollama.embeddings(model="mxbai-embed-large", prompt=triplet_txt)
  embedding = response["embedding"]
  collection.add(
    ids=[str(d)],
    embeddings=[embedding],
    documents=[triplet_txt]
  )

# an example prompt
prompt = "How does the bear's body looks?"

# generate an embedding for the prompt and retrieve the most relevant doc
response = ollama.embeddings(
  prompt=prompt,
  model="mxbai-embed-large"
)
results = collection.query(
  query_embeddings=[response["embedding"]],
  n_results=3
)

print(collection.get(include=['embeddings','documents','metadatas']))
print("result = ",results)

# data = results['documents'][0][0]
data = ""
supported_docs = results['documents']
if len(supported_docs)==1:
  data = results['documents'][0][0]
else:
  for i in range(0, len(supported_docs)):
    data = data+" "+str(supported_docs[i])
    data = data.strip()
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3",
  prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
)

print(output['response'])

Additional Code: Constructing Hypergraph Triplets using LLM.

import ollama
import chromadb

documents = [
  "Bears are carnivoran mammals of the family Ursidae.",
  "They are classified as caniforms, or doglike carnivorans.",
  "Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere.",
  "Bears are found on the continents of North America, South America, and Eurasia.",
  "Common characteristics of modern bears include large bodies with stocky legs, long snouts, small rounded ears, shaggy hair, plantigrade paws with five nonretractile claws, and short tails.",
  "With the exception of courting individuals and mothers with their young, bears are typically solitary animals.",
  "They may be diurnal or nocturnal and have an excellent sense of smell.",
  "Despite their heavy build and awkward gait, they are adept runners, climbers, and swimmers.",
  "Bears use shelters, such as caves and logs, as their dens; most species occupy their dens during the winter for a long period of hibernation, up to 100 days.",
]
single_doc = ' '.join(documents)
# an example prompt
prompt = "Give the list of all Knowledge Hypergraph triplets for the following text" +"\n"+single_doc

# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3",
  # prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
  prompt=prompt
)

print(output['response'])

Using Knowledge Graph with LLM-RAG

Background.

Knowledge Graph.

A knowledge graph represents information using a graph structure where entities are nodes and relationships between entities are edges. Each piece of information is typically represented as a triplet (subject, predicate, object).

Knowledge Graph Triplets.

Each triplet involves a single subject, predicate, and object. For eg.

(Bears, are, Carnivoran mammals)
(Bears, belong to, Family Ursidae)
(Bears, classified as, Caniforms)
(Caniforms, also known as, Doglike carnivorans)
(Bears, number of species, Eight)

Video Tutorial.

Code.

1. Main code for using Knowledge Graph with LLM-RAG

import ollama
import chromadb

documents = [
  "Bears are carnivoran mammals of the family Ursidae.",
  "They are classified as caniforms, or doglike carnivorans.",
  "Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere.",
  "Bears are found on the continents of North America, South America, and Eurasia.",
  "Common characteristics of modern bears include large bodies with stocky legs, long snouts, small rounded ears, shaggy hair, plantigrade paws with five nonretractile claws, and short tails.",
  "With the exception of courting individuals and mothers with their young, bears are typically solitary animals.",
  "They may be diurnal or nocturnal and have an excellent sense of smell.",
  "Despite their heavy build and awkward gait, they are adept runners, climbers, and swimmers.",
  "Bears use shelters, such as caves and logs, as their dens; most species occupy their dens during the winter for a long period of hibernation, up to 100 days.",
]
Kg_triplets = [["Bears", "are", "carnivoran mammals"],
["Bears", "belong to", "family Ursidae"],
["Bears", "are classified as", "caniforms"],
["Caniforms", "are", "doglike carnivorans"],
["Bears", "have", "eight species"],
["Bears", "are", "widespread"],
["Bears", "appear in", "a wide variety of habitats"],
["Bears", "are found in", "Northern Hemisphere"],
["Bears", "are partially found in", "Southern Hemisphere"],
["Bears", "are found on", "North America"],
["Bears", "are found on", "South America"],
["Bears", "are found on", "Eurasia"],
["Modern bears", "have", "large bodies"],
["Modern bears", "have", "stocky legs"],
["Modern bears", "have", "long snouts"],
["Modern bears", "have", "small rounded ears"],
["Modern bears", "have", "shaggy hair"],
["Modern bears", "have", "plantigrade paws with five nonretractile claws"],
["Modern bears", "have", "short tails"],
["Bears", "are typically", "solitary animals"],
["Bears", "can be", "diurnal"],
["Bears", "can be", "nocturnal"],
["Bears", "have", "an excellent sense of smell"],
["Bears", "are", "adept runners"],
["Bears", "are", "adept climbers"],
["Bears", "are", "adept swimmers"],
["Bears", "use", "shelters"],
["Shelters", "include", "caves"],
["Shelters", "include", "logs"],
["Bears", "use", "shelters as dens"],
["Most species", "occupy", "dens during winter"],
["Bears", "hibernate for", "up to 100 days"],]
# Convert the triplets to text
def triplet_to_text(triplet):
  txt = str(triplet[0]) +" "+str(triplet[1]) +" "+str(triplet[2])
  # print(txt)
  return txt
# triplet_texts = [triplet_to_text(triplet) for triplet in Kg_triplets]
# Create database
client = chromadb.PersistentClient(path="E:\\Niraj_Work\\DL_Projects\\llm_projects\\database_tmp")
collection = client.create_collection(name="bear_kg")
metadata = {"hnsw:space":"cosine"}
# store each document in a vector embedding database

for d in range(0,len(Kg_triplets)):
  triplet_txt = triplet_to_text(Kg_triplets[d])
  response = ollama.embeddings(model="mxbai-embed-large", prompt=triplet_txt)
  embedding = response["embedding"]
  collection.add(
    ids=[str(d)],
    embeddings=[embedding],
    documents=[triplet_txt]
  )

# an example prompt
prompt = "How does the bear's body looks?"

# generate an embedding for the prompt and retrieve the most relevant doc
response = ollama.embeddings(
  prompt=prompt,
  model="mxbai-embed-large"
)
results = collection.query(
  query_embeddings=[response["embedding"]],
  n_results=3
)
print("result = ",results)
print(collection.get(include=['embeddings','documents','metadatas']))

# data = results['documents'][0][0]
data = ""
supported_docs = results['documents']
if len(supported_docs)==1:
  data = results['documents'][0][0]
else:
  for i in range(0, len(supported_docs)):
    data = data+" "+str(supported_docs[i])
    data = data.strip()
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3",
  prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
)

print(output['response'])

2. Code to Construct Knowledge Graph.

import ollama
import chromadb

documents = [
  "Bears are carnivoran mammals of the family Ursidae.",
  "They are classified as caniforms, or doglike carnivorans.",
  "Although only eight species of bears are extant, they are widespread, appearing in a wide variety of habitats throughout most of the Northern Hemisphere and partially in the Southern Hemisphere.",
  "Bears are found on the continents of North America, South America, and Eurasia.",
  "Common characteristics of modern bears include large bodies with stocky legs, long snouts, small rounded ears, shaggy hair, plantigrade paws with five nonretractile claws, and short tails.",
  "With the exception of courting individuals and mothers with their young, bears are typically solitary animals.",
  "They may be diurnal or nocturnal and have an excellent sense of smell.",
  "Despite their heavy build and awkward gait, they are adept runners, climbers, and swimmers.",
  "Bears use shelters, such as caves and logs, as their dens; most species occupy their dens during the winter for a long period of hibernation, up to 100 days.",
]
single_doc = ' '.join(documents)
# an example prompt
prompt = "Give the list of all Knowledge Graph triplets for the following text" +"\n"+single_doc

# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="llama3",
  # prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
  prompt=prompt
)

print(output['response'])