Semantic Representation: They capture the semantic meaning of text, allowing for more nuanced understanding and processing of language
Efficient Retrieval: Embeddings enable efficient retrieval and similarity searches among large volumes of text data
Enhanced LLM Interactions: They improve the quality and relevance of LLM responses by providing context and enabling semantic search
from tqdm.autonotebook import tqdm, trange
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from tqdm.autonotebook import tqdm, trange
# load the document and split it into chunks
import requests
from bs4 import BeautifulSoup
#for notebook
from tqdm.autonotebook import tqdm, trange
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
#from silly import no_ssl_verification
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# load the document and split it into chunks
loader = TextLoader("state_of_the_union.txt",encoding='utf-8')
documents = loader.load()
# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# hfemb = HuggingFaceEmbeddings()
# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)
# query it
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)
# print results
print(docs[0].page_content)