Skip to content

Commit

Permalink
Merge pull request #715 from github/always-be-treeing
Browse files Browse the repository at this point in the history
(Quasi)ABTs for syntax types
  • Loading branch information
robrix committed Mar 20, 2024
2 parents fe4a775 + 1d70d13 commit 3baf0b4
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 253 deletions.
4 changes: 2 additions & 2 deletions semantic-analysis/semantic-analysis.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ library
Analysis.Project
Analysis.Reference
Analysis.Syntax
Analysis.Syntax.Python
Analysis.VM
build-depends:
, aeson >= 1.4 && < 3
, base >= 4.13 && < 5
, bytestring >= 0.10.8.2 && < 0.13
, containers ^>= 0.6
, filepath
, fused-effects ^>= 1.1
, hashable
, semantic-source ^>= 0.2
, text ^>= 1.2.3.1
, transformers ^>= 0.5
, vector ^>= 0.12.3
1 change: 0 additions & 1 deletion semantic-analysis/src/Analysis/Name.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Analysis.Name
( Name
-- * Constructors
Expand Down
283 changes: 33 additions & 250 deletions semantic-analysis/src/Analysis/Syntax.hs
Original file line number Diff line number Diff line change
@@ -1,270 +1,53 @@
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}
module Analysis.Syntax
( -- * Terms
( -- * Syntax
Term(..)
, subterms
-- * Abstract interpretation
, eval0
, eval
-- * Macro-expressible syntax
, let'
, letrec
-- * Parsing
, parseFile
, parseGraph
, parseNode
-- * Debugging
, analyzeFile
, parseToTerm
, foldTerm
, paraTerm
, mendlerTerm
, mendlerParaTerm
) where

import qualified Analysis.Carrier.Statement.State as S
import Analysis.Effect.Domain
import Analysis.Effect.Env (Env, bind, lookupEnv)
import Analysis.Effect.Store
import Analysis.File
import Analysis.Name (Name, name)
import Analysis.Reference as Ref
import Control.Applicative (Alternative (..), liftA2, liftA3)
import Control.Carrier.Throw.Either (runThrow)
import Control.Effect.Labelled
import Control.Effect.Reader
import Control.Effect.Throw (Throw, throwError)
import Control.Exception
import Control.Monad (guard)
import Control.Monad.IO.Class
import qualified Data.Aeson as A
import qualified Data.Aeson.Parser as A
import qualified Data.Aeson.Types as A
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold, foldl')
import Data.Function (fix)
import qualified Data.IntMap as IntMap
import Data.List (sortOn)
import Data.List.NonEmpty (NonEmpty, fromList)
import Data.Maybe (listToMaybe)
import Data.Monoid (First (..))
import qualified Data.Set as Set
import Data.String (IsString (..))
import Data.Text (Text)
import qualified Data.Vector as V
import qualified Source.Source as Source
import Source.Span
import System.FilePath

data Term
= Var Name
| Noop
| Iff Term Term Term
| Bool Bool
| String Text
| Throw Term
| Let Name Term Term
| Term :>> Term
| Import (NonEmpty Text)
| Function Name [Name] Term
| Call Term [Term]
| Locate Span Term
deriving (Eq, Ord, Show)
-- Syntax

infixl 1 :>>
-- | (Currently) untyped term representations.
data Term sig v
= Var v
| Term (sig (Term sig v))

subterms :: Term -> Set.Set Term
subterms t = Set.singleton t <> case t of
Var _ -> mempty
Noop -> mempty
Iff c t e -> subterms c <> subterms t <> subterms e
Bool _ -> mempty
String _ -> mempty
Throw t -> subterms t
Let _ v b -> subterms v <> subterms b
a :>> b -> subterms a <> subterms b
Import _ -> mempty
Function _ _ b -> subterms b
Call f as -> subterms f <> foldMap subterms as
Locate _ b -> subterms b
instance (Eq (sig (Term sig v)), Eq v) => Eq (Term sig v) where
Var v1 == Var v2 = v1 == v2
Term s1 == Term s2 = s1 == s2
_ == _ = False

instance (Ord (sig (Term sig v)), Ord v) => Ord (Term sig v) where
compare (Var v1) (Var v2) = compare v1 v2
compare (Var _) _ = LT
compare (Term s1) (Term s2) = compare s1 s2
compare _ _ = GT

-- Abstract interpretation

eval0 :: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m, Has (Dom val) sig m, Has (Reader Reference) sig m, Has S.Statement sig m) => Term -> m val
eval0 = fix eval
subterms :: (Ord (sig (Term sig v)), Ord v, Foldable sig) => Term sig v -> Set.Set (Term sig v)
subterms = mendlerParaTerm (Set.singleton . Var) (\ k -> foldMap (uncurry Set.insert . k))

eval
:: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m, Has (Dom val) sig m, Has (Reader Reference) sig m, Has S.Statement sig m)
=> (Term -> m val)
-> (Term -> m val)
eval eval = \case
Var n -> lookupEnv n >>= maybe (dvar n) fetch
Noop -> dunit
Iff c t e -> do
c' <- eval c
dif c' (eval t) (eval e)
Bool b -> dbool b
String s -> dstring s
Throw e -> eval e >>= ddie
Let n v b -> do
v' <- eval v
let' n v' (eval b)
t :>> u -> do
t' <- eval t
u' <- eval u
t' >>> u'
Import ns -> S.simport ns >> dunit
Function n ps b -> letrec n (dabs ps (foldr (\ (p, a) m -> let' p a m) (eval b) . zip ps))
Call f as -> do
f' <- eval f
as' <- traverse eval as
dapp f' as'
Locate s t -> local (setSpan s) (eval t)
where
setSpan s r = r{ refSpan = s }


-- Macro-expressible syntax

let' :: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m) => Name -> val -> m a -> m a
let' n v m = do
addr <- alloc n
addr .= v
bind n addr m

letrec :: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m) => Name -> m val -> m val
letrec n m = do
addr <- alloc n
v <- bind n addr m
addr .= v
pure v


-- Parsing

parseFile :: (Has (Throw String) sig m, MonadIO m) => FilePath -> FilePath -> m (Source.Source, File Term)
parseFile srcPath jsonPath = do
contents <- liftIO (B.readFile jsonPath)
-- FIXME: get this from the JSON itself (cf https://github.com/tree-sitter/tree-sitter-graph/issues/69)
let sourcePath = replaceExtensions jsonPath "py"
sourceContents <- Source.fromUTF8 . B.toStrict <$> liftIO (B.readFile srcPath)
let span = decrSpan (Source.totalSpan sourceContents)
case A.eitherDecodeWith A.json' (A.iparse parseGraph) contents of
Left (_, err) -> throwError err
Right (_, Nothing) -> throwError "no root node found"
Right (_, Just root) -> pure (sourceContents, File (Reference sourcePath span) root)
where
decrSpan (Span (Pos sl sc) (Pos el ec)) = Span (Pos (sl - 1) (sc - 1)) (Pos (el - 1) (ec - 1))

newtype Graph = Graph { terms :: IntMap.IntMap Term }

-- | Parse a @Value@ into an entire graph of terms, as well as a root node, if any exists.
parseGraph :: A.Value -> A.Parser (Graph, Maybe Term)
parseGraph = A.withArray "nodes" $ \ nodes -> do
(untied, First root) <- fold <$> traverse parseNode (V.toList nodes)
-- @untied@ is an intmap, where the keys are graph node IDs and the values are functions from the final graph to the representations of said graph nodes. Likewise, @root@ is a function of the same variety, wrapped in a @Maybe@.
--
-- We define @tied@ as the fixpoint of the former to yield the former as a graph of type @Graph@, and apply the latter to said graph to yield the entry point, if any, from which to evaluate.
let tied = fix (\ tied -> ($ Graph tied) <$> untied)
pure (Graph tied, ($ Graph tied) <$> root)
foldTerm :: Functor sig => (v -> r) -> (sig r -> r) -> (Term sig v -> r)
foldTerm var sig = mendlerTerm var (\ k -> sig . fmap k)

-- | Parse a node from a JSON @Value@ into a pair of a partial graph of unfixed terms and optionally an unfixed term representing the root node.
--
-- The partial graph is represented as an adjacency map relating node IDs to unfixed terms—terms which may make reference to a completed graph to find edges, and which therefore can't be inspected until the full graph is known.
parseNode :: A.Value -> A.Parser (IntMap.IntMap (Graph -> Term), First (Graph -> Term))
parseNode = A.withObject "node" $ \ o -> do
edges <- o A..: fromString "edges"
index <- o A..: fromString "id"
o A..: fromString "attrs" >>= A.withObject "attrs" (\ attrs -> do
ty <- attrs A..: fromString "type"
node <- parseTerm attrs edges ty
pure (IntMap.singleton index node, node <$ First (guard (ty == "module"))))
paraTerm :: Functor sig => (v -> r) -> (sig (Term sig v, r) -> r) -> (Term sig v -> r)
paraTerm var sig = mendlerParaTerm var (\ k -> sig . fmap k)

parseTerm :: A.Object -> [A.Value] -> String -> A.Parser (Graph -> Term)
parseTerm attrs edges = locate attrs . \case
"string" -> const . String <$> attrs A..: fromString "text"
"true" -> pure (const (Bool True))
"false" -> pure (const (Bool False))
"throw" -> fmap Throw <$> maybe empty resolve (listToMaybe edges)
"if" -> liftA3 Iff <$> findEdgeNamed edges "condition" <*> findEdgeNamed edges "consequence" <*> findEdgeNamed edges "alternative" <|> pure (const Noop)
"block" -> children edges
"module" -> children edges
"identifier" -> const . Var . name <$> attrs A..: fromString "text"
"import" -> const . Import . fromList . map snd . sortOn fst <$> traverse (resolveWith (const moduleNameComponent)) edges
"function" -> liftA3 Function . pure . name <$> attrs A..: fromString "name" <*> pure (pure []) <*> findEdgeNamed edges "body"
"call" -> liftA2 Call . const . Var . name <$> attrs A..: fromString "function" <*> (sequenceA <$> traverse resolve edges)
"noop" -> pure (pure Noop)
t -> A.parseFail ("unrecognized type: " <> t <> " attrs: " <> show attrs <> " edges: " <> show edges)

findEdgeNamed :: (Foldable t, A.FromJSON a, Eq a) => t A.Value -> a -> A.Parser (Graph -> Term)
findEdgeNamed edges name = foldMap (resolveWith (\ rep attrs -> attrs A..: fromString "type" >>= (rep <$) . guard . (== name))) edges

-- | Map a list of edges to a list of child nodes.
children :: [A.Value] -> A.Parser (Graph -> Term)
children edges = fmap chain . traverse snd . sortOn fst <$> traverse (resolveWith child) edges
mendlerTerm :: (v -> r) -> (forall r' . (r' -> r) -> sig r'-> r) -> (Term sig v -> r)
mendlerTerm var sig = go
where
child :: (Graph -> Term) -> A.Object -> A.Parser (Int, Graph -> Term)
child term attrs = (,) <$> attrs A..: fromString "index" <*> pure term

chain :: [Term] -> Term
chain [] = Noop
chain (t:ts) = foldl' (:>>) t ts

moduleNameComponent :: A.Object -> A.Parser (Int, Text)
moduleNameComponent attrs = (,) <$> attrs A..: fromString "index" <*> attrs A..: fromString "text"

resolve :: A.Value -> A.Parser (Graph -> Term)
resolve = resolveWith (const . pure)

resolveWith :: ((Graph -> Term) -> A.Object -> A.Parser a) -> A.Value -> A.Parser a
resolveWith f = resolveWith' (f . flip ((IntMap.!) . terms))
go (Var v) = var v
go (Term s) = sig go s

resolveWith' :: (IntMap.Key -> A.Object -> A.Parser a) -> A.Value -> A.Parser a
resolveWith' f = A.withObject "edge" (\ edge -> do
sink <- edge A..: fromString "sink"
attrs <- edge A..: fromString "attrs"
f sink attrs)

locate :: A.Object -> A.Parser (Graph -> Term) -> A.Parser (Graph -> Term)
locate attrs p = do
span <- span
<$> attrs A..:? fromString "start-line"
<*> attrs A..:? fromString "start-col"
<*> attrs A..:? fromString "end-line"
<*> attrs A..:? fromString "end-col"
t <- p
case span of
Nothing -> pure t
Just s -> pure (Locate s <$> t)
mendlerParaTerm :: (v -> r) -> (forall r' . (r' -> (Term sig v, r)) -> sig r'-> r) -> (Term sig v -> r)
mendlerParaTerm var sig = go
where
span sl sc el ec = Span <$> (Pos <$> sl <*> sc) <*> (Pos <$> el <*> ec)


-- Debugging

analyzeFile
:: (Algebra sig m, MonadIO m)
=> FilePath
-> FilePath
-> ( forall term
. Ord term
=> ( forall sig m
. (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m, Has (Dom val) sig m, Has (Reader Reference) sig m, Has S.Statement sig m)
=> (term -> m val)
-> (term -> m val) )
-> Source.Source
-> File term
-> m b )
-> m b
analyzeFile srcPath jsonPath analyze = do
(src, file) <- parseToTerm srcPath jsonPath
analyze eval src file

parseToTerm :: (Algebra sig m, MonadIO m) => FilePath -> FilePath -> m (Source.Source, File Term)
parseToTerm srcPath jsonPath = do
parsed <- runThrow @String (parseFile srcPath jsonPath)
either (liftIO . throwIO . ErrorCall) pure parsed
go (Var v) = var v
go (Term s) = sig ((,) <*> go) s
Loading

0 comments on commit 3baf0b4

Please sign in to comment.