Skip to content

Commit

Permalink
Add funcion specific README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
slimslenderslacks committed Oct 9, 2024
1 parent 6fd4da1 commit f6ed8a3
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 53 deletions.
72 changes: 72 additions & 0 deletions functions/tree-sitter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Usage

When an agent needs to extract code ranges using tree-sitter queries (tree-sitter queries are already very common and current foundational LLM can already generate these queries).
Internally, this tool uses nixpkgs to pull pre-compiled, and verified parsers. `markdown`, `python`, `java`, `html`, `dockerfile`, and `bash` are all tested but obviously hundreds of different
languages are supported.

For example, if we prompt an LLM to "extract top-level function definitions from a Python module", it will generate the following query.

```lisp
(module (function_definition) @top-level)
```

The agent interface is shown here. This is the interface that the agent will use to interace with the tool.
As always, the tool itself is a docker container.

```yaml
name: tree-sitter
description: Extract code ranges using tree-sitter queries
parameters:
type: object
properties:
lang:
type: string
description: language to parse
query:
type: string
description: tree-sitter query
file:
type: string
description: the file to parse
container:
image: vonwig/tree-sitter:latest
command:
- "-lang"
- "{{lang}}"
- "-query"
- "{{query}}"
stdin:
file: "{{file}}"
```
The tool streams back a series of json code ranges.
```json
{
"capture_name": "top-level",
"node_text": "def hello():\\n\\tprint(\"hello\")",
"start_byte": 0,
"end_byte": 30,
"start_point": {
"row": 0,
"column": 0
},
"end_point": {
"row": 0,
"column": 30
}
}
```

## Aside on tool creation

This tool itself was generated by an LLM.

## Using the container

The tool can also be called directly using `docker run`.

```sh
docker run --rm -i vonwig/tree-sitter -lang python -query "(module (function_definition) @top-level)" < <(echo "def hello():\n\tprint(\"hello\")")
```

72 changes: 64 additions & 8 deletions functions/tree-sitter/cmd/ts/main.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package main

import (
"flag"
"fmt"
"io/ioutil"
"os"
"encoding/json"

sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/python"
Expand All @@ -15,13 +17,22 @@ import (
)

func main() {
// Check if both language and query string are provided as arguments
if len(os.Args) < 3 {
fmt.Println("Usage: ./program <language> <query_string>")
// Define flags
languagePtr := flag.String("lang", "", "The programming language to parse (required)")
queryPtr := flag.String("query", "", "The query string to execute (required)")

// Parse flags
flag.Parse()

// Check if required flags are provided
if *languagePtr == "" {
fmt.Println("Usage: ./program -lang <language> -query <query_string>")
flag.PrintDefaults()
return
}
language := os.Args[1]
queryString := os.Args[2]

// language is mandatory
language := *languagePtr

// Create a parser
parser := sitter.NewParser()
Expand Down Expand Up @@ -58,8 +69,12 @@ func main() {
tree := parser.Parse(nil, sourceCode)
defer tree.Close()

// Write the S-expression of the tree to stdout
fmt.Println(tree.RootNode().String())
queryString := *queryPtr
if queryString == "" {
// Write the S-expression of the tree to stdout
fmt.Println(tree.RootNode().String())
return
}

// Create a query
query, err := sitter.NewQuery([]byte(queryString), lang)
Expand All @@ -85,7 +100,48 @@ func main() {
for _, capture := range match.Captures {
captureName := query.CaptureNameForId(capture.Index)
nodeText := capture.Node.Content(sourceCode)
fmt.Printf("Capture: %s, Node: %s\n", captureName, nodeText)

captureInfo := struct {
CaptureName string `json:"capture_name"`
NodeText string `json:"node_text"`
StartByte uint32 `json:"start_byte"`
EndByte uint32 `json:"end_byte"`
StartPoint struct {
Row uint32 `json:"row"`
Column uint32 `json:"column"`
} `json:"start_point"`
EndPoint struct {
Row uint32 `json:"row"`
Column uint32 `json:"column"`
} `json:"end_point"`
}{
CaptureName: captureName,
NodeText: nodeText,
StartByte: capture.Node.StartByte(),
EndByte: capture.Node.EndByte(),
StartPoint: struct {
Row uint32 `json:"row"`
Column uint32 `json:"column"`
}{
Row: capture.Node.StartPoint().Row,
Column: capture.Node.StartPoint().Column,
},
EndPoint: struct {
Row uint32 `json:"row"`
Column uint32 `json:"column"`
}{
Row: capture.Node.EndPoint().Row,
Column: capture.Node.EndPoint().Column,
},
}

jsonData, err := json.MarshalIndent(captureInfo, "", " ")
if err != nil {
fmt.Println("Error marshaling JSON:", err)
continue
}

fmt.Println(string(jsonData))
}
}
}
42 changes: 0 additions & 42 deletions functions/tree-sitter/flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -25,47 +25,6 @@

in rec {
packages = rec {
# darwin versus linux
dylibExt = if nixpkgs.lib.hasInfix "darwin" system then "dylib" else "so";

lib = pkgs.stdenv.mkDerivation {
name = "lib";
src = ./.;
installPhase = ''
mkdir -p $out/lib;
cp ${pkgs.tree-sitter}/lib/libtree-sitter.${dylibExt} $out/lib/;
cp ${pkgs.tree-sitter-grammars.tree-sitter-markdown}/parser $out/lib/libtree-sitter-markdown.${dylibExt};
cp ${pkgs.tree-sitter-grammars.tree-sitter-python}/parser $out/lib/libtree-sitter-python.${dylibExt};
'';
};

# derive the parser
parser = pkgs.stdenv.mkDerivation {
name = "parser";
src = ./.;
nativeBuildInputs = [
pkgs.gcc
pkgs.findutils
pkgs.patchelf
];
buildPhase = ''
${pkgs.gcc}/bin/gcc -o parser \
main.c \
-I${pkgs.tree-sitter}/include \
${pkgs.tree-sitter-grammars.tree-sitter-markdown}/parser \
${pkgs.tree-sitter-grammars.tree-sitter-python}/parser \
${pkgs.tree-sitter}/lib/libtree-sitter.${dylibExt}
'';

installPhase = ''
mkdir -p $out/bin;
cp parser $out/bin/parser;
'';

fixupPhase = ''
find $out -type f -exec patchelf --shrink-rpath '{}' \; -exec strip '{}' \; 2>/dev/null
'';
};

goBinary = pkgs.buildGoModule {
pname = "tree-sitter-query";
Expand All @@ -90,7 +49,6 @@
subPackages = [ "cmd/ts" ];
};

# the script must have gh in the PATH
default = pkgs.writeShellScriptBin "entrypoint" ''
export PATH=${pkgs.lib.makeBinPath [goBinary]}
ts "$@"
Expand Down
21 changes: 18 additions & 3 deletions functions/tree-sitter/runbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,26 @@ docker build -t vonwig/tree-sitter .
```

```sh
./result/bin/ts python "(module (function_definition) @top-level)" < test/resources/hello.py
./result/bin/ts markdown "(document (section (atx_heading (atx_h1_marker))) @h1)" < test/resources/hello.md
# docker:command=release-build

docker buildx build \
--builder hydrobuild \
--platform linux/amd64,linux/arm64 \
--tag vonwig/tree-sitter:latest \
--file Dockerfile \
--push .
```

```sh
./result/bin/ts -lang python -query "(module (function_definition) @top-level)" < test/resources/hello.py
./result/bin/ts -lang markdown -query "(document (section (atx_heading (atx_h1_marker))) @h1)" < test/resources/hello.md
```

```sh
./result/bin/ts -lang markdown < test/resources/hello.md
```

```sh
docker run --rm -i vonwig/tree-sitter python "(module (function_definition) @top-level)" < <(echo "def hello():\n\tprint(\"hello\")")
docker run --rm -i vonwig/tree-sitter -lang python -query "(module (function_definition) @top-level)" < <(echo "def hello():\n\tprint(\"hello\")")
```

0 comments on commit f6ed8a3

Please sign in to comment.