Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add document structure into GraphRAG #2033

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dbgpt/app/knowledge/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,12 +650,12 @@ def query_graph(self, space_name, limit):
{
"id": node.vid,
"communityId": node.get_prop("_community_id"),
"name": node.vid,
"type": "",
"name": node.name,
"type": node.get_prop("type") or ""
}
)
for edge in graph.edges():
res["edges"].append(
{"source": edge.sid, "target": edge.tid, "name": edge.name, "type": ""}
{"source": edge.sid, "target": edge.tid, "name": edge.name, "type": edge.get_prop("type") or ""}
)
return res
4 changes: 2 additions & 2 deletions dbgpt/rag/transformer/graph_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _parse_response(self, text: str, limit: Optional[int] = None) -> List[Graph]
match = re.match(r"\((.*?)#(.*?)\)", line)
if match:
name, summary = [part.strip() for part in match.groups()]
graph.upsert_vertex(Vertex(name, description=summary))
graph.upsert_vertex(Vertex(name, description=summary, vertex_type='entity'))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is incorrect. The vertex_type and edge_type should be determined by TuGraphStore itself when inserting.

elif current_section == "Relationships":
match = re.match(r"\((.*?)#(.*?)#(.*?)#(.*?)\)", line)
if match:
Expand All @@ -74,7 +74,7 @@ def _parse_response(self, text: str, limit: Optional[int] = None) -> List[Graph]
]
edge_count += 1
graph.append_edge(
Edge(source, target, name, description=summary)
Edge(source, target, name, description=summary, edge_type='relation')
)

if limit and edge_count >= limit:
Expand Down
18 changes: 17 additions & 1 deletion dbgpt/storage/graph_store/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from dbgpt._private.pydantic import BaseModel, ConfigDict, Field
from dbgpt.core import Embeddings
from dbgpt.storage.graph_store.graph import Direction, Graph
from dbgpt.storage.graph_store.graph import Direction, Graph, Vertex

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -44,6 +44,10 @@ def get_vertex_type(self) -> str:
def get_edge_type(self) -> str:
"""Get the edge type."""

@abstractmethod
def get_document_vertex(self, doc_name:str) -> Vertex:
"""Add graph."""

@abstractmethod
def insert_triplet(self, sub: str, rel: str, obj: str):
"""Add triplet."""
Expand All @@ -60,6 +64,10 @@ def get_triplets(self, sub: str) -> List[Tuple[str, str]]:
def delete_triplet(self, sub: str, rel: str, obj: str):
"""Delete triplet."""

@abstractmethod
def delete_document(self, doc_name: str):
"""Delete document."""

@abstractmethod
def truncate(self):
"""Truncate Graph."""
Expand Down Expand Up @@ -87,6 +95,14 @@ def explore(
) -> Graph:
"""Explore on graph."""

def explore_text_link(
self,
subs: List[str],
depth: Optional[int] = None,
limit: Optional[int] = None,
) -> Graph:
"""Explore text link on graph."""

@abstractmethod
def query(self, query: str, **args) -> Graph:
"""Execute a query."""
Expand Down
20 changes: 14 additions & 6 deletions dbgpt/storage/graph_store/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,11 @@ def get_neighbor_edges(
"""Get neighbor edges."""

@abstractmethod
def vertices(self) -> Iterator[Vertex]:
def vertices(self, vertex_prop:Optional[str] = None) -> Iterator[Vertex]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

过滤器使用lambda表达式实现:
def vertices(self, filter_fn: Optional[Callable[[Vertex], bool]]=None) -> Iterator[Vertex]:

"""Get vertex iterator."""

@abstractmethod
def edges(self) -> Iterator[Edge]:
def edges(self, edge_prop:Optional[str] = None) -> Iterator[Edge]:
"""Get edge iterator."""

@abstractmethod
Expand Down Expand Up @@ -335,13 +335,21 @@ def unique_elements(elements):

return itertools.islice(es, limit) if limit else es

def vertices(self) -> Iterator[Vertex]:
def vertices(self, vertex_type: Optional[str] = None) -> Iterator[Vertex]:
"""Return vertices."""
return iter(self._vs.values())
return (
item for item in self._vs.values()
if vertex_type is None or item.get_prop('vertex_type') == vertex_type
)

def edges(self) -> Iterator[Edge]:
def edges(self, edge_type: Optional[str] = None) -> Iterator[Edge]:
"""Return edges."""
return iter(e for nbs in self._oes.values() for es in nbs.values() for e in es)
return (
e for nbs in self._oes.values()
for es in nbs.values()
for e in es
if edge_type is None or e.get_prop('edge_type') == edge_type
)

def del_vertices(self, *vids: str):
"""Delete specified vertices."""
Expand Down
Loading
Loading