A Retrieval Augmented Generation (RAG) chain is a system that combines the strengths of large language models (LLMs) and information retrieval. It allows LLMs to access and process information from external sources, making their responses more comprehensive and informative.

in this example the external source is Wikipedia.

import requests
from bs4 import BeautifulSoup
#for notebook
from tqdm.autonotebook import tqdm, trange

# Send a request to the Wikipedia page for Data Science
response = requests.get(
    url="https://en.wikipedia.org/wiki/Data_science",
)
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Get textual content inside the main body of the article
content = soup.find(id="bodyContent")
#print(content.text)

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,   # Set chunk size to 512 characters
    length_function=len
)
chunked_text = text_splitter.split_text(content.text)





from langchain.embeddings import SentenceTransformerEmbeddings

# Load the model for generating embeddings
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# Create embeddings for the third chunk of text
chunk_embedding = embeddings.embed_documents([chunked_text[3]])


print("\n\n\n chunked_text[3]",chunked_text[3])

print("\n\n\n chunk_embedding", chunk_embedding)
from langchain_community.vectorstores import DocArrayInMemorySearch


#from langchain.vectorstores.milvus import Milvus

# Store the embeddings in Milvus
#vector_db = Milvus.from_texts(texts=chunked_text, embedding=embeddings, collection_name="rag_milvus")
#Set Up the Retriever   retriever = vector_db.as_retriever()
vectorstore = DocArrayInMemorySearch.from_texts(
    chunked_text,
    chunk_embedding ,
)
retriever = vectorstore.as_retriever()
#Initialize the LLM
import ollama

llm = ollama.chat(model="phi3")
#Define a Custom Prompt
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

#Build the RAG Chain
#Create the RAG chain that will retrieve the most relevant chunks, pass them to the LLM, and output the generated response.
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("What is a Data Scientist?"):
    print(chunk, end="", flush=True)

Leave a Reply

Your email address will not be published. Required fields are marked *