Skip to content

Commit

Permalink
Update TigerRag Langchain Folder to Make it Match Newest Movie_Recs L…
Browse files Browse the repository at this point in the history
  • Loading branch information
gjyotin305 committed Dec 2, 2023
1 parent 7c1c161 commit 027f1d8
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 82 deletions.
81 changes: 8 additions & 73 deletions TigerRag/demos/langchain/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from tigerrag.base.loaders import TigerRAGDataFrameLoader
# from langchain.embeddings import GPT4AllEmbeddings
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from tigerrag.base.models import EmbeddingModel, TigerRAGEmbeddingModel
from langchain.embeddings import GPT4AllEmbeddings
from tigerrag.rag.retrieval_augmenters import OpenAIRetrievalAugmenter
from tigerrag.gar.query_augmenters import OpenAIQueryAugmenter
import openai
import os
import sys

Expand All @@ -19,73 +15,13 @@
# python demo.py
# python demo.py -number_of_run 4

# Initialize BERT tokenizer and model
trag_bert_model = TigerRAGEmbeddingModel(EmbeddingModel.BERT)

embedding_func = SentenceTransformerEmbeddings(model_name="bert-base-uncased")


def get_documents_embeddings(documents):
# Load documents
loader = WebBaseLoader(documents)

# Split documents
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(loader.load())

# Embed and store splits
vectorstore = Chroma.from_documents(
documents=splits, embedding=trag_bert_model)

return vectorstore


# EBR
def ebr(question, vectorstore):
# Perform similarity search
docs = vectorstore.similarity_search(question)

return docs[0]


# RAG
def generate_answer_with_rag_gpt3(question, context, openai_text_model):
# Retrivel Augmented Generation
prompt = f"Context: {context} Question: {question}. Provide a summary or answer:"

# # Generation using GPT-3
# response = openai.Completion.create(
# engine=openai_text_model, prompt=prompt, max_tokens=100)
# answer = response.choices[0].text.strip()

openai_generative_retrieval_augmenter = OpenAIRetrievalAugmenter(
openai_text_model)
answer = openai_generative_retrieval_augmenter.get_augmented_retrieval(
prompt)

return answer


# GAR
def generate_answer_with_gar_gpt3(question, context, openai_text_model, vectorstore):
# Generation Augmented Retrieval
prompt = f"Expand on the query: {question}"

# Generation using GPT-3
# response = openai.Completion.create(
# engine=openai_text_model, prompt=prompt, max_tokens=100)
# augmented_query = response.choices[0].text.strip()
openai_generative_query_augmenter = OpenAIQueryAugmenter(openai_text_model)
augmented_query = openai_generative_query_augmenter.get_augmented_query(
prompt)

# Retrieval
answer = ebr(augmented_query, vectorstore)

return answer


def is_intstring(s):
try:
int(s)
Expand Down Expand Up @@ -122,30 +58,29 @@ def main():
chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(loader.load())

vectorstore = get_documents_embeddings(documents)
vectorstore = Chroma.from_documents(
documents=splits, embedding=GPT4AllEmbeddings())
print("The following is EBR output for question: "+question)
retrieved_context = ebr(question, vectorstore)
print(retrieved_context)

# Example usage of RAG
print("The following is RAG output for question: "+question)
# print(generate_answer_with_rag_gpt3(
# question, retrieved_context, 'text-davinci-003'))

print("The following is RAG output for question: "+question)
# Retrivel Augmented Generation
prompt_rag = f"Context: {retrieved_context} Question: {question}. Provide a summary or answer:"

prompt_rag = f"""Context: {retrieved_context} Question: {question}.
Provide a summary or answer:"""
openai_generative_retrieval_augmenter = OpenAIRetrievalAugmenter(
"text-davinci-003")
answer_rag = openai_generative_retrieval_augmenter.get_augmented_retrieval(
prompt_rag)
print(answer_rag)

# Example usage of GAR

print("The following is GAR output for question: "+question)
# print(generate_answer_with_gar_gpt3(
# question, retrieved_context, 'text-davinci-003', vectorstore))
prompt_gar = prompt = f"Expand on the query: {question}"
prompt_gar = f"Expand on the query: {question}"
openai_generative_query_augmenter = OpenAIQueryAugmenter(
"text-davinci-003")
augmented_query = openai_generative_query_augmenter.get_augmented_query(
Expand Down
10 changes: 1 addition & 9 deletions TigerRag/tigerrag/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class EmbeddingModel(Enum):
XLNET = 3


class TigerRAGEmbeddingModels:
class TigerRAGEmbeddingModel:
def __init__(self, model_id: EmbeddingModel):
if model_id is EmbeddingModel.BERT:
self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
Expand All @@ -53,11 +53,3 @@ def get_embedding_from_text(self, text: str) -> npt.NDArray:
def get_embedding_from_series(self, pd_series: pd.Series) -> npt.NDArray:
embeddings = np.vstack(pd_series.apply(self.get_embedding_from_text))
return embeddings

def embed_documents(self, texts: List[str]) -> List[List[float]]:
embeddings = list(map(self.get_embedding_from_text, texts))
return embeddings

def embed_query(self, query: str) -> List[float]:
embeddings = self.get_embedding_from_text(query)
return embeddings

0 comments on commit 027f1d8

Please sign in to comment.