A Retrieval Augmented Generation (RAG) chain is a system that combines the strengths of large language models (LLMs) and information retrieval. It allows LLMs to access and process information from external sources, making their responses more comprehensive and informative.
in this example the external source is Wikipedia.
import requests
from bs4 import BeautifulSoup
#for notebook
from tqdm.autonotebook import tqdm, trange
# Send a request to the Wikipedia page for Data Science
response = requests.get(
url="https://en.wikipedia.org/wiki/Data_science",
)
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Get textual content inside the main body of the article
content = soup.find(id="bodyContent")
#print(content.text)
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=512, # Set chunk size to 512 characters
length_function=len
)
chunked_text = text_splitter.split_text(content.text)
from langchain.embeddings import SentenceTransformerEmbeddings
# Load the model for generating embeddings
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# Create embeddings for the third chunk of text
chunk_embedding = embeddings.embed_documents([chunked_text[3]])
print("\n\n\n chunked_text[3]",chunked_text[3])
print("\n\n\n chunk_embedding", chunk_embedding)
from langchain_community.vectorstores import DocArrayInMemorySearch
#from langchain.vectorstores.milvus import Milvus
# Store the embeddings in Milvus
#vector_db = Milvus.from_texts(texts=chunked_text, embedding=embeddings, collection_name="rag_milvus")
#Set Up the Retriever retriever = vector_db.as_retriever()
vectorstore = DocArrayInMemorySearch.from_texts(
chunked_text,
chunk_embedding ,
)
retriever = vectorstore.as_retriever()
#Initialize the LLM
import ollama
llm = ollama.chat(model="phi3")
#Define a Custom Prompt
from langchain_core.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)
#Build the RAG Chain
#Create the RAG chain that will retrieve the most relevant chunks, pass them to the LLM, and output the generated response.
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| custom_rag_prompt
| llm
| StrOutputParser()
)
for chunk in rag_chain.stream("What is a Data Scientist?"):
print(chunk, end="", flush=True)