Skip to content

Commit

Permalink
Merge branch 'main' into yoom/dspy
Browse files Browse the repository at this point in the history
  • Loading branch information
yoomlam authored Apr 9, 2024
2 parents 5bb49e6 + 817addc commit 9b48cb9
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 15 deletions.
17 changes: 11 additions & 6 deletions 02-household-queries/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@

# split text into chunks
def get_text_chunks_langchain(text, source):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
texts = text_splitter.split_text(text)
docs = [Document(page_content=t, metadata={"source": source}) for t in texts]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=100)
texts = text_splitter.split_text(source + "\n\n" + text)
print(" Split into", len(texts))
docs = [
Document(page_content=t, metadata={"source": source.strip()}) for t in texts
]
return docs


Expand All @@ -31,18 +34,20 @@ def add_json_html_data_to_vector_db(vectordb, file_path, content_key, index_key)
json_data = json.load(data_file)

for content in json_data:
if not content[index_key].strip().endswith("?"):
continue
soup = BeautifulSoup(content[content_key], "html.parser")
text = soup.get_text(separator="\n", strip=True)
print("Processing document:", content[index_key])
chunks = get_text_chunks_langchain(text, content[index_key])
print(f"Loading Document {content[index_key]} chunk into vector db")
vectordb.add_documents(documents=chunks)


def ingest_call(vectordb):
# Load the PDF and create chunks
# download from https://drive.google.com/file/d/1--qDjraIk1WGxwuCGBP-nfxzOr9IHvcZ/view?usp=drive_link
pdf_path = "./tanf.pdf"
add_pdf_to_vector_db(vectordb=vectordb, file_path=pdf_path)
# pdf_path = "./tanf.pdf"
# add_pdf_to_vector_db(vectordb=vectordb, file_path=pdf_path)

# download from https://drive.google.com/drive/folders/1DkAQ03bBVIPoO1d8gcHVnilQ-9VXfhJ8?usp=drive_link
guru_file_path = "./guru_cards_for_nava.json"
Expand Down
14 changes: 7 additions & 7 deletions 02-household-queries/question_answer_citations.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
{
"id": 2,
"orig_question": "The client's son is 20 and is still living with them, but has his own job and buys his own food. The client doesn't want to list him on the application, do they have to? If so, do we need to include the son's income?",
"question": "The client's son is 20, is still living with them, but has his own job and buys his own food. Does the client have to list him on the application? If so, do we need to include the son's income?",
"question": "The client's son is 20, is still living with them, but has his own job and buys his own food. Does the client have to list him on the application? If so, do we need to include the dependent's income for SNAP?",
"orig_answer": "Yes, since the son is under 22, he is a mandatory household member and needs to be on the application. His income would also need to be listed since he is no longer a minor.",
"answer": "Yes",
"guru_cards": [
Expand All @@ -37,7 +37,7 @@
"orig_answer": "Not necessarily. If the client want to apply by himself, he can, he just needs to only use the benefit for himself and no longer share meals with the rest of his household. If he wants to continue to share food with his roommates, they would need to be included in the application.",
"answer": "Not necessarily",
"guru_cards": [
"What if my client shares meals but whats to apply to food stamps on their own?"
"What if my client shares meals but wants to apply for food stamps on their own?"
]
},
{
Expand All @@ -47,7 +47,7 @@
"orig_answer": "No. She would need to include her husband if they are still married and living at the same address.",
"answer": "No",
"guru_cards": [
"How is spouse defined for food stamp applications?"
"How is \"spouse\" defined for food stamp applications?"
]
},
{
Expand All @@ -73,7 +73,7 @@
{
"id": 7,
"orig_question": "The client and her husband live with her parents. She is 20, but her husband is 23. They buy their own groceries. Can they apply by themselves?",
"question": "The client is 20 years old. She and her husband live with her parents, but they buy their own groceries. Can the client and her husband apply by themselves?",
"question": "The client is 20 years old. She and her husband live with her parents, but they buy their own groceries. Can the client and her husband apply for food stamps by themselves?",
"orig_answer": "The client would need to include their parents if they live in the same address since she is under 22.",
"answer": "No",
"guru_cards": [
Expand All @@ -83,7 +83,7 @@
{
"id": 8,
"orig_question": "This client and her husband are ineligible (LPR but only for 2 years), they have three kids. One 16, one 14 and one is 20. I just apply the kids right?",
"question": "The client and her husband have been landed permanent residents for 2 years. They have children ages 14, 16, and 20. Which family members should be listed on the application?",
"question": "The client and her husband have been landed permanent residents for 2 years. They have non-citizen children under 18 (ages 14, 16, and 20). Which family members should be listed on the SNAP application?",
"orig_answer": "The two younger kids would be eligible, but the parents would still need to be listed on the application. They are considered countable non-applicants. The 20 year old may need to be listed if they meet citizenship eligibility and share meals with the rest of the household.",
"answer": "The parents, the two younger children, and potentially the 20 year old child",
"guru_cards": [
Expand All @@ -95,7 +95,7 @@
{
"id": 9,
"orig_question": "The client lives with her boyfriend and they have a kid together. With the boyfriend's income, they are ineligible. Can I just apply the client with her kid since they aren't married?",
"question": "The client and her boyfriend have a child. Can the client and the child apply without listing the boyfriend?",
"question": "The client and her boyfriend have a child. Can the client and the child apply for food stamps without listing the boyfriend?",
"orig_answer": "No, boyfriend is still a mandatory household member because of the child. So he would need to be listed on the application.",
"answer": "No",
"guru_cards": [
Expand All @@ -115,7 +115,7 @@
{
"id": 11,
"orig_question": "This client is 21 so I'm including his mother on the application. We managed to get by the income and resources on best estimations but now we're on the expenses step and the client is unsure if his mother pays mortgage and is really at loss on how much anything in the household costs. Are we able to continue without this information or should the client call back when he does have it? The household is likely eligible based on what we have so far.",
"question": "Can the client apply without knowing their household expenses?",
"question": "This client is 21 so I'm including his mother on the food stamps application. The client is unsure about household costs. The household is likely eligible based on what we have so far. Are we able to continue without this information or should the client call back when he does have it?",
"orig_answer": "Yes, we can continue with the application. Expenses do not determine eligibility, but can add to benefit amount. They should add the expenses later on to potentially increase the benefit amount.",
"answer": "Yes",
"guru_cards": [
Expand Down
8 changes: 6 additions & 2 deletions 02-household-queries/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def count_extra_cards(retrieved_cards, guru_cards):
return len(extra_cards)


def evaluate_retrieval():
def evaluate_retrieval(vectordb):
qa = load_training_json()
results = []
retriever = create_retriever(vectordb)
Expand Down Expand Up @@ -126,6 +126,7 @@ def evaluate_retrieval():
2. Ingest and retrieve
3. Ingest only
4. Evaluate retrieval
5. Ingest and Evaluate retrieval
""")
run_option = input()
if run_option == "2":
Expand All @@ -134,6 +135,9 @@ def evaluate_retrieval():
elif run_option == "3":
ingest_call(vectordb=vectordb)
elif run_option == "4":
evaluate_retrieval()
evaluate_retrieval(vectordb)
elif run_option == "5":
ingest_call(vectordb=vectordb)
evaluate_retrieval(vectordb)
else:
retrieval_call(llm=llm, vectordb=vectordb)

0 comments on commit 9b48cb9

Please sign in to comment.