Hide code cell source
from IPython.display import YouTubeVideo

YouTubeVideo("I2ICNT56Rdc")

EGRAPHS Community Call Talk#

Egglog as a tool for building an optimizing composable type safe DSLs in Python

… to help drive theoretical development of e-graphs in conjunction with impacting (large) real world communities.

Now that I have this great e-graph library in Python, what extra mechanisms do I need to make it useful in existing Python code?

This talk will go thorugh a few techniques developed and also point to how by bringing in use cases from scientific Python can help drive further theoretic research

Optimizing Scikit-learn with Numba#

We are going to work through the different pieces needed to optimize a Scikit-learn pipeline using Numba and egglog.

from __future__ import annotations

import sklearn
from sklearn.datasets import make_classification
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Tell sklearn to treat arrays as following array API
sklearn.set_config(array_api_dispatch=True)

X_np, y_np = make_classification(random_state=0, n_samples=1000000)


# Assumption: I want to optimize calling this many times on data similar to that above
def run_lda(x, y):
    lda = LinearDiscriminantAnalysis()
    return lda.fit(x, y).transform(x)

We can do this using egglog to generate Python code and Numba to JIT compile it to LLVM, resulting in a speedup:

# The first thing we need to do is create our symbolic arrays and get back our symbolic output

from egglog.exp.array_api import *

X_arr = NDArray.var("X")
assume_dtype(X_arr, X_np.dtype)
assume_shape(X_arr, X_np.shape)
assume_isfinite(X_arr)

y_arr = NDArray.var("y")
assume_dtype(y_arr, y_np.dtype)
assume_shape(y_arr, y_np.shape)
assume_value_one_of(y_arr, tuple(map(int, np.unique(y_np))))  # type: ignore[arg-type]

with EGraph():
    res = run_lda(X_arr, y_arr)
res
_NDArray_1 = NDArray.var("X")
assume_dtype(_NDArray_1, DType.float64)
assume_shape(_NDArray_1, TupleInt(Int(1000000)) + TupleInt(Int(20)))
assume_isfinite(_NDArray_1)
_NDArray_2 = NDArray.var("y")
assume_dtype(_NDArray_2, DType.int64)
assume_shape(_NDArray_2, TupleInt(Int(1000000)))
assume_value_one_of(_NDArray_2, TupleValue(Value.int(Int(0))) + TupleValue(Value.int(Int(1))))
_NDArray_3 = asarray(reshape(asarray(_NDArray_2), TupleInt(Int(-1))))
_NDArray_4 = astype(unique_counts(_NDArray_3)[Int(1)], asarray(_NDArray_1).dtype) / NDArray.scalar(Value.float(Float(1000000.0)))
_NDArray_5 = zeros(
    TupleInt(unique_inverse(_NDArray_3)[Int(0)].shape[Int(0)]) + TupleInt(asarray(_NDArray_1).shape[Int(1)]),
    OptionalDType.some(asarray(_NDArray_1).dtype),
    OptionalDevice.some(asarray(_NDArray_1).device),
)
_MultiAxisIndexKey_1 = MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice()))
_IndexKey_1 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(0))) + _MultiAxisIndexKey_1)
_OptionalIntOrTuple_1 = OptionalIntOrTuple.some(IntOrTuple.int(Int(0)))
_NDArray_5[_IndexKey_1] = mean(asarray(_NDArray_1)[ndarray_index(unique_inverse(_NDArray_3)[Int(1)] == NDArray.scalar(Value.int(Int(0))))], _OptionalIntOrTuple_1)
_IndexKey_2 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(1))) + _MultiAxisIndexKey_1)
_NDArray_5[_IndexKey_2] = mean(asarray(_NDArray_1)[ndarray_index(unique_inverse(_NDArray_3)[Int(1)] == NDArray.scalar(Value.int(Int(1))))], _OptionalIntOrTuple_1)
_NDArray_6 = unique_values(concat(TupleNDArray(unique_values(asarray(_NDArray_3)))))
_NDArray_7 = concat(
    TupleNDArray(asarray(_NDArray_1)[ndarray_index(_NDArray_3 == _NDArray_6[IndexKey.int(Int(0))])] - _NDArray_5[_IndexKey_1])
    + TupleNDArray(asarray(_NDArray_1)[ndarray_index(_NDArray_3 == _NDArray_6[IndexKey.int(Int(1))])] - _NDArray_5[_IndexKey_2]),
    OptionalInt.some(Int(0)),
)
_NDArray_8 = std(_NDArray_7, _OptionalIntOrTuple_1)
_NDArray_8[ndarray_index(std(_NDArray_7, _OptionalIntOrTuple_1) == NDArray.scalar(Value.int(Int(0))))] = NDArray.scalar(Value.float(Float(1.0)))
_TupleNDArray_1 = svd(
    sqrt(asarray(NDArray.scalar(Value.float(Float(1.0) / Float.from_int(asarray(_NDArray_1).shape[Int(0)] - _NDArray_6.shape[Int(0)]))))) * (_NDArray_7 / _NDArray_8), FALSE
)
_Slice_1 = Slice(OptionalInt.none, OptionalInt.some(sum(astype(_TupleNDArray_1[Int(1)] > NDArray.scalar(Value.float(Float(0.0001))), DType.int32)).to_value().to_int))
_NDArray_9 = (_TupleNDArray_1[Int(2)][IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(_Slice_1)) + _MultiAxisIndexKey_1)] / _NDArray_8).T / _TupleNDArray_1[
    Int(1)
][IndexKey.slice(_Slice_1)]
_TupleNDArray_2 = svd(
    (
        sqrt(
            (NDArray.scalar(Value.int(asarray(_NDArray_1).shape[Int(0)])) * _NDArray_4)
            * NDArray.scalar(Value.float(Float(1.0) / Float.from_int(_NDArray_6.shape[Int(0)] - Int(1))))
        )
        * (_NDArray_5 - (_NDArray_4 @ _NDArray_5)).T
    ).T
    @ _NDArray_9,
    FALSE,
)
(
    (asarray(_NDArray_1) - (_NDArray_4 @ _NDArray_5))
    @ (
        _NDArray_9
        @ _TupleNDArray_2[Int(2)].T[
            IndexKey.multi_axis(
                _MultiAxisIndexKey_1
                + MultiAxisIndexKey(
                    MultiAxisIndexKeyItem.slice(
                        Slice(
                            OptionalInt.none,
                            OptionalInt.some(
                                sum(astype(_TupleNDArray_2[Int(1)] > (NDArray.scalar(Value.float(Float(0.0001))) * _TupleNDArray_2[Int(1)][IndexKey.int(Int(0))]), DType.int32))
                                .to_value()
                                .to_int
                            ),
                        )
                    )
                )
            )
        ]
    )
)[IndexKey.multi_axis(_MultiAxisIndexKey_1 + MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice(OptionalInt.none, OptionalInt.some(_NDArray_6.shape[Int(0)] - Int(1))))))]

In order to run this, scikit-learn treated these objects as “array like”, meaning they conformed to the Array API.

Conversions: From Python to egglog#

Use conversions if you want your egglog API to be called with existing Python objects, without manually upcasting them

We will see this in our example:

class LinearDiscriminantAnalysis:
    ...
    def fit(self, X, y):
        ...
        _, cnts = xp.unique_counts(y)  # non-negative ints
        self.priors_ = xp.astype(cnts, X.dtype) / float(y.shape[0])

Ends up resulting in this expression:

astype(unique_counts(_NDArray_3)[Int(1)], asarray(_NDArray_1).dtype) / NDArray.scalar(Value.float(Float(1000000.0)))

How?

We have exposed a global conversion logic, where if you pass an arg to egglog and it isn’t the correct type, it will try to upcast the arg to the required egglog type.

There is a graph of all conversions and it will find the shortest path from the input to the desired type and automatically upcast to that.

Example#

For example, in indexing, if we do a slice (i.e. 1:10:2), we convert this to our custom egglog Slice expressions:

class Slice(Expr):
    def __init__(
        self,
        start: OptionalInt = OptionalInt.none,
        stop: OptionalInt = OptionalInt.none,
        step: OptionalInt = OptionalInt.none,
    ) -> None: ...


converter(
    slice,
    Slice,
    lambda x: Slice(
        convert(x.start, OptionalInt),
        convert(x.stop, OptionalInt),
        convert(x.step, OptionalInt),
    ),
)
class A(Expr):
    def __init__(self) -> None: ...
    def __getitem__(self, s: Slice) -> Int: ...
A()[:1:2]  # Pytohn desugars this to A()[slice(None, 1, 2)]
A()[Slice(OptionalInt.none, OptionalInt.some(Int(1)), OptionalInt.some(Int(2)))]

Preserved Methods#

If you need your egglog objects to interact with Python control flow, you can use preserved methods to stop, compile, and return an eager result to Python

In the fit function in sklearn, there are complicated analysis that must be done eagerly, like this one, which depends on knowing the priors, which are based on the counts of the classes in the training data, which we provided:

class LinearDiscriminantAnalysis:
    def fit(self, x, y):
        ...
        if xp.abs(xp.sum(self.priors_) - 1.0) > 1e-5:
            warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning)
            self.priors_ = self.priors_ / self.priors_.sum()

That is why we have to provide the metadata about the arrays, so we can reduce this expression to a boolean, using some interval analysis:

_NDArray_1 = NDArray.var("y")
assume_dtype(_NDArray_1, DType.int64)
assume_shape(_NDArray_1, TupleInt(Int(1000000)))
assume_value_one_of(_NDArray_1, TupleValue(Value.int(Int(0))) + TupleValue(Value.int(Int(1))))
_NDArray_2 = NDArray.var("X")
assume_dtype(_NDArray_2, DType.float64)
assume_shape(_NDArray_2, TupleInt(Int(1000000)) + TupleInt(Int(20)))
assume_isfinite(_NDArray_2)
(
    abs(
        sum(astype(unique_counts(asarray(reshape(asarray(_NDArray_1), TupleInt(Int(-1)))))[Int(1)], asarray(_NDArray_2).dtype) / NDArray.scalar(Value.float(Float(1000000.0))))
        - NDArray.scalar(Value.float(Float(1.0)))
    )
    > NDArray.scalar(Value.float(Float(1e-05)))
).to_value().to_bool.bool

But how does it move through the control flow if the expression is lazy? We can implement “preserved methods” which can evaluate an expression eagerly by adding it to the EGraph and evalutating it:

Example#

class Boolean(Expr):
    # Can be constructed from and convert to a primitive egglog bool:
    def __init__(self, b: BoolLike) -> None: ...
    @property
    def bool(self) -> Bool: ...

    # support boolean ops
    def __and__(self, other: Boolean) -> Boolean: ...

    # Can be treated like a Python bool
    @method(preserve=True)
    def __bool__(self) -> bool:
        egraph = EGraph()
        egraph.register(self)
        egraph.run(bool_rewrites.saturate())
        return egraph.eval(self.bool)


x = var("x", Boolean)
y = var("y", Bool)
bool_rewrites = ruleset(
    rule(eq(x).to(Boolean(y))).then(set_(x.bool).to(y)),
    rewrite(Boolean(True) & Boolean(True)).to(Boolean(True)),
)

expr = Boolean(True) & Boolean(True)
expr
Boolean(True) & Boolean(True)
if expr:
    print("yep it's true")
yep it's true

Mutations#

Mark a function or method as mutating the first arg, to translate it to pure function, but which acts imperative.

Another pattern that comes up a lot in Python is methods that mutate their arguments. But egglog is a pure functional language, so how do we support that?

Well we can convert functions that mutate an arg into one that returns a modified value of that argument. That way, we can keep using it with existing imperative methods and things work as they should.

For example, arrays support __setitem__, and this is used by scikit-learn:

class LinearDiscriminantAnalysis:
    ...
    def _solve_svd(self, X, y):
        ...
        # 1) within (univariate) scaling by with classes std-dev
        std = xp.std(Xc, axis=0)
        # avoid division by zero in normalization
        std[std == 0] = 1.0

This will be translated to the following expressions, where there will be a new array created in the graph for the modified version:

_NDArray_8 = std(_NDArray_7, _OptionalIntOrTuple_1)
_NDArray_8[ndarray_index(std(_NDArray_7, _OptionalIntOrTuple_1) == NDArray.scalar(Value.int(Int(0))))] = NDArray.scalar(Value.float(Float(1.0)))

Example#

We can see a simpler example of this below:

class ListOfInts(Expr):
    def __init__(self) -> None: ...
    def __getitem__(self, i: i64Like) -> Int: ...

    def __setitem__(self, i: i64Like, v: Int) -> None: ...


xs = ListOfInts()
xs[0] = Int(1)

egraph = EGraph()
egraph.register(xs[0])
egraph.display()
../_images/f6d42df9b6fd59c0fc987e9572961150ece01dcc9be19632bf02d4fbe679ab69.svg

Subsumption#

mark a rewrite as subsumed to replace a smaller expression with a larger one

Now that we have a program, what do we do with it? Well first, we can optimize it, running rewrites, including those to translate from Numba forms to others.

We have added “subsumption” to egglog, to support directional rewrites, so that the left hand side is not extractable and not matchable. This is handy when we want to extract a value with more expressions or a higher cost, in a particular instance:

@array_api_numba_ruleset.register
def _mean(y: NDArray, x: NDArray, i: Int):
    axis = OptionalIntOrTuple.some(IntOrTuple.int(i))
    res = sum(x, axis) / NDArray.scalar(Value.int(x.shape[i]))

    yield rewrite(mean(x, axis, FALSE), subsume=True).to(res)
    yield rewrite(mean(x, axis, TRUE), subsume=True).to(expand_dims(res, i))

We can optimize this with the numba rules and we can see this rule take place in the _NDArray_9 line:

from egglog.exp.array_api_numba import array_api_numba_schedule

simplified_res = EGraph().simplify(res, array_api_numba_schedule)
simplified_res
_NDArray_1 = NDArray.var("X")
assume_dtype(_NDArray_1, DType.float64)
assume_shape(_NDArray_1, TupleInt(Int(1000000)) + TupleInt(Int(20)))
assume_isfinite(_NDArray_1)
_NDArray_2 = NDArray.var("y")
assume_dtype(_NDArray_2, DType.int64)
assume_shape(_NDArray_2, TupleInt(Int(1000000)))
assume_value_one_of(_NDArray_2, TupleValue(Value.int(Int(0))) + TupleValue(Value.int(Int(1))))
_NDArray_3 = astype(
    NDArray.vector(TupleValue(sum(_NDArray_2 == NDArray.scalar(Value.int(Int(0)))).to_value()) + TupleValue(sum(_NDArray_2 == NDArray.scalar(Value.int(Int(1)))).to_value())),
    DType.float64,
) / NDArray.scalar(Value.float(Float.rational(Rational(1000000, 1))))
_NDArray_4 = zeros(TupleInt(Int(2)) + TupleInt(Int(20)), OptionalDType.some(DType.float64), OptionalDevice.some(_NDArray_1.device))
_MultiAxisIndexKey_1 = MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice()))
_IndexKey_1 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(0))) + _MultiAxisIndexKey_1)
_NDArray_5 = _NDArray_1[ndarray_index(_NDArray_2 == NDArray.scalar(Value.int(Int(0))))]
_OptionalIntOrTuple_1 = OptionalIntOrTuple.some(IntOrTuple.int(Int(0)))
_NDArray_4[_IndexKey_1] = sum(_NDArray_5, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_5.shape[Int(0)]))
_IndexKey_2 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(1))) + _MultiAxisIndexKey_1)
_NDArray_6 = _NDArray_1[ndarray_index(_NDArray_2 == NDArray.scalar(Value.int(Int(1))))]
_NDArray_4[_IndexKey_2] = sum(_NDArray_6, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_6.shape[Int(0)]))
_NDArray_7 = concat(TupleNDArray(_NDArray_5 - _NDArray_4[_IndexKey_1]) + TupleNDArray(_NDArray_6 - _NDArray_4[_IndexKey_2]), OptionalInt.some(Int(0)))
_NDArray_8 = square(_NDArray_7 - expand_dims(sum(_NDArray_7, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_7.shape[Int(0)]))))
_NDArray_9 = sqrt(sum(_NDArray_8, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_8.shape[Int(0)])))
_NDArray_10 = copy(_NDArray_9)
_NDArray_10[ndarray_index(_NDArray_9 == NDArray.scalar(Value.int(Int(0))))] = NDArray.scalar(Value.float(Float.rational(Rational(1, 1))))
_TupleNDArray_1 = svd(sqrt(NDArray.scalar(Value.float(Float.rational(Rational(1, 999998))))) * (_NDArray_7 / _NDArray_10), FALSE)
_Slice_1 = Slice(OptionalInt.none, OptionalInt.some(sum(astype(_TupleNDArray_1[Int(1)] > NDArray.scalar(Value.float(Float(0.0001))), DType.int32)).to_value().to_int))
_NDArray_11 = (_TupleNDArray_1[Int(2)][IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(_Slice_1)) + _MultiAxisIndexKey_1)] / _NDArray_10).T / _TupleNDArray_1[
    Int(1)
][IndexKey.slice(_Slice_1)]
_TupleNDArray_2 = svd(
    (sqrt((NDArray.scalar(Value.int(Int(1000000))) * _NDArray_3) * NDArray.scalar(Value.float(Float.rational(Rational(1, 1))))) * (_NDArray_4 - (_NDArray_3 @ _NDArray_4)).T).T
    @ _NDArray_11,
    FALSE,
)
(
    (_NDArray_1 - (_NDArray_3 @ _NDArray_4))
    @ (
        _NDArray_11
        @ _TupleNDArray_2[Int(2)].T[
            IndexKey.multi_axis(
                _MultiAxisIndexKey_1
                + MultiAxisIndexKey(
                    MultiAxisIndexKeyItem.slice(
                        Slice(
                            OptionalInt.none,
                            OptionalInt.some(
                                sum(astype(_TupleNDArray_2[Int(1)] > (NDArray.scalar(Value.float(Float(0.0001))) * _TupleNDArray_2[Int(1)][IndexKey.int(Int(0))]), DType.int32))
                                .to_value()
                                .to_int
                            ),
                        )
                    )
                )
            )
        ]
    )
)[IndexKey.multi_axis(_MultiAxisIndexKey_1 + MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice(OptionalInt.none, OptionalInt.some(Int(1))))))]

Program Gen#

Generate an imperative program from your e-graph with replacement rules that walk the graph in a fixed order

Now that we have a program, what do we do with it?

Well we showed how we can use eager evaluation to get a result, but what if we don’t want to do the computation in egglog, but instead export a program so we can execute that back in Python or in this case feed it to Python?

Well in this case we have designed a Program object which we can use to convert a funtional egglog expression back to imperative Python code:

from egglog.exp.array_api_program_gen import *

egraph = EGraph()
fn_program = egraph.let(
    "fn_program",
    ndarray_function_two(simplified_res, NDArray.var("X"), NDArray.var("y")),
)
egraph.run(array_api_program_gen_schedule)
fn = egraph.eval(fn_program.py_object)

fn
<function __fn(X, y)>
import inspect

print(inspect.getsource(fn))
def __fn(X, y):
    assert X.dtype == np.dtype(np.float64)
    assert X.shape == (1000000, 20,)
    assert np.all(np.isfinite(X))
    assert y.dtype == np.dtype(np.int64)
    assert y.shape == (1000000,)
    assert set(np.unique(y)) == set((0, 1,))
    _0 = y == np.array(0)
    _1 = np.sum(_0)
    _2 = y == np.array(1)
    _3 = np.sum(_2)
    _4 = np.array((_1, _3,)).astype(np.dtype(np.float64))
    _5 = _4 / np.array(float(1000000))
    _6 = np.zeros((2, 20,), dtype=np.dtype(np.float64))
    _7 = np.sum(X[_0], axis=0)
    _8 = _7 / np.array(X[_0].shape[0])
    _6[0, :] = _8
    _9 = np.sum(X[_2], axis=0)
    _10 = _9 / np.array(X[_2].shape[0])
    _6[1, :] = _10
    _11 = _5 @ _6
    _12 = X - _11
    _13 = np.sqrt(np.array(float(1 / 999998)))
    _14 = X[_0] - _6[0, :]
    _15 = X[_2] - _6[1, :]
    _16 = np.concatenate((_14, _15,), axis=0)
    _17 = np.sum(_16, axis=0)
    _18 = _17 / np.array(_16.shape[0])
    _19 = np.expand_dims(_18, 0)
    _20 = _16 - _19
    _21 = np.square(_20)
    _22 = np.sum(_21, axis=0)
    _23 = _22 / np.array(_21.shape[0])
    _24 = np.sqrt(_23)
    _25 = _24 == np.array(0)
    _24[_25] = np.array(float(1))
    _26 = _16 / _24
    _27 = _13 * _26
    _28 = np.linalg.svd(_27, full_matrices=False)
    _29 = _28[1] > np.array(0.0001)
    _30 = _29.astype(np.dtype(np.int32))
    _31 = np.sum(_30)
    _32 = _28[2][:_31, :] / _24
    _33 = _32.T / _28[1][:_31]
    _34 = np.array(1000000) * _5
    _35 = _34 * np.array(float(1))
    _36 = np.sqrt(_35)
    _37 = _6 - _11
    _38 = _36 * _37.T
    _39 = _38.T @ _33
    _40 = np.linalg.svd(_39, full_matrices=False)
    _41 = np.array(0.0001) * _40[1][0]
    _42 = _40[1] > _41
    _43 = _42.astype(np.dtype(np.int32))
    _44 = np.sum(_43)
    _45 = _33 @ _40[2].T[:, :_44]
    _46 = _12 @ _45
    return _46[:, :1]

From there we can complete our work, by optimizing with numba and we can call with our original values:

from numba import njit

njit(fn)(X_np, y_np)
/tmp/egglog-826e83db-ad16-4a78-84a8-5b3f5644b50a.py:56: NumbaPerformanceWarning: '@' is faster on contiguous arrays, called on (Array(float64, 2, 'C', False, aligned=True), Array(float64, 2, 'A', False, aligned=True))
  _45 = _33 @ _40[2].T[:, :_44]
array([[ 0.64233002],
       [ 0.63661245],
       [-1.603293  ],
       ...,
       [-1.1506433 ],
       [ 0.71687176],
       [-1.51119579]])

These program rewrites work by first translating the code into an intermediate IR of just program assignments and statements, and then turning this into source. It does this by walking the graph first top to bottom, recording the first parent that saw every child. Then it goes from bottom to top, building up a larger set of statements as well as an expression representing each node. If a node has already been emitted somewhere else, we record that, and we always wait for all the children to complete before moving forward. That way, we can enforce some ordering and we know that everything will be emitted only once.

Example#

Here is a small example, we might want to compile, let’s say we want a function like this:

def __fn(x, y)
    x[1] = 10
    z = x + x
    return sum(z) + y

We can build up a functional program for this and then compile it to Python source:

from egglog.exp.program_gen import *

x = Program("x", is_identifier=True)
y = Program("y", is_identifier=True)
# To reference x, we need to first emit the statement
x_modified = Program("x").statement(x + "[1] = 10")

z = (x_modified + " + " + x_modified).assign()
res = Program("sum(") + z + ") + " + y
fn = res.function_two(x, y)

egraph = EGraph()
egraph.register(fn.compile())
egraph.run(program_gen_ruleset.saturate())
print(egraph.eval(fn.statements))
def __fn(x, y):
    x[1] = 10
    _0 = x + x
    return sum(_0) + y

This happens, by first going top to bottom with the compile, which will put a total ordering on all nodes, by defining one, and only one, parent expression for each expression.

Then, from the bottom up, for each node we compute an expression string and a list of statements string.

egraph
%3 outer_cluster_5 cluster_5 outer_cluster_6 cluster_6 outer_cluster_15 cluster_15 outer_cluster_3 cluster_3 outer_cluster_2 cluster_2 outer_cluster_10 cluster_10 outer_cluster_12 cluster_12 outer_cluster_16 cluster_16 outer_cluster_9 cluster_9 outer_cluster_8 cluster_8 outer_cluster_4 cluster_4 outer_cluster_7 cluster_7 outer_cluster_0 cluster_0 outer_cluster_1 cluster_1 outer_cluster_11 cluster_11 outer_cluster_13 cluster_13 outer_cluster_14 cluster_14 outer_cluster_Program_expr-11743562013128004906-value cluster_Program_expr-11743562013128004906-value outer_cluster_Program_statements-10912160959110460649-value cluster_Program_statements-10912160959110460649-value outer_cluster_Program_expr-15121139857639374588-value cluster_Program_expr-15121139857639374588-value outer_cluster_Program_expr-0-value cluster_Program_expr-0-value outer_cluster_Program_expr-1714775736476281168-value cluster_Program_expr-1714775736476281168-value outer_cluster_Program_expr-17615343019692007359-value cluster_Program_expr-17615343019692007359-value outer_cluster_Program_statements-11743562013128004906-value cluster_Program_statements-11743562013128004906-value outer_cluster_Program_expr-5040379952546458196-value cluster_Program_expr-5040379952546458196-value outer_cluster_String-718359051288455306 cluster_String-718359051288455306 outer_cluster_Program_statements-8417957797057827878-value cluster_Program_statements-8417957797057827878-value outer_cluster_Program_expr-10912160959110460649-value cluster_Program_expr-10912160959110460649-value outer_cluster_Program_statements-5871781006564002453-value cluster_Program_statements-5871781006564002453-value outer_cluster_String-6617174651184748400 cluster_String-6617174651184748400 outer_cluster_Program_expr-3377577844511369682-value cluster_Program_expr-3377577844511369682-value outer_cluster_Program_expr-5871781006564002453-value cluster_Program_expr-5871781006564002453-value outer_cluster_Program_statements-17615343019692007359-value cluster_Program_statements-17615343019692007359-value outer_cluster_Program_expr-2546176790493825425-value cluster_Program_expr-2546176790493825425-value outer_cluster_Program_expr-14289738803621830331-value cluster_Program_expr-14289738803621830331-value outer_cluster_Program_expr-15952540911656918845-value cluster_Program_expr-15952540911656918845-value outer_cluster_Program_expr-4208978898528913939-value cluster_Program_expr-4208978898528913939-value outer_cluster_String-313937328870544552 cluster_String-313937328870544552 outer_cluster_Program_statements-16783941965674463102-value cluster_Program_statements-16783941965674463102-value outer_cluster_Program_statements-4208978898528913939-value cluster_Program_statements-4208978898528913939-value outer_cluster_Program_expr-16783941965674463102-value cluster_Program_expr-16783941965674463102-value outer_cluster_Program_statements-0-value cluster_Program_statements-0-value outer_cluster_Program_statements-9249358851075372135-value cluster_Program_statements-9249358851075372135-value outer_cluster_String-17954539539390402795 cluster_String-17954539539390402795 outer_cluster_Program_statements-15121139857639374588-value cluster_Program_statements-15121139857639374588-value outer_cluster_Program_statements-1714775736476281168-value cluster_Program_statements-1714775736476281168-value outer_cluster_Program_statements-14289738803621830331-value cluster_Program_statements-14289738803621830331-value outer_cluster_String-10787254494509254142 cluster_String-10787254494509254142 outer_cluster_Program_statements-15952540911656918845-value cluster_Program_statements-15952540911656918845-value outer_cluster_String-5935442095835774456 cluster_String-5935442095835774456 outer_cluster_Program_statements-10080759905092916392-value cluster_Program_statements-10080759905092916392-value outer_cluster_Program_expr-10080759905092916392-value cluster_Program_expr-10080759905092916392-value outer_cluster_Program_statements-5040379952546458196-value cluster_Program_statements-5040379952546458196-value outer_cluster_Program_statements-3377577844511369682-value cluster_Program_statements-3377577844511369682-value outer_cluster_String-14586553597486223834 cluster_String-14586553597486223834 outer_cluster_Program_statements-2546176790493825425-value cluster_Program_statements-2546176790493825425-value outer_cluster_Program_expr-9249358851075372135-value cluster_Program_expr-9249358851075372135-value outer_cluster_Program_expr-8417957797057827878-value cluster_Program_expr-8417957797057827878-value outer_cluster_Program_compile-1351883367118893594-value cluster_Program_compile-1351883367118893594-value outer_cluster_Program_compile-395596399104602408-value cluster_Program_compile-395596399104602408-value outer_cluster_Program_compile-2868860904042873558-value cluster_Program_compile-2868860904042873558-value outer_cluster_Program_compile-0-value cluster_Program_compile-0-value outer_cluster_Program_compile-5145996267849227305-value cluster_Program_compile-5145996267849227305-value outer_cluster_Program_compile-10457086705503540980-value cluster_Program_compile-10457086705503540980-value outer_cluster_Program_compile-5706686836758916083-value cluster_Program_compile-5706686836758916083-value outer_cluster_Program_compile-12930351210441812130-value cluster_Program_compile-12930351210441812130-value outer_cluster_Program_compile-9896396136593852202-value cluster_Program_compile-9896396136593852202-value outer_cluster_Program_compile-8179951341697187233-value cluster_Program_compile-8179951341697187233-value outer_cluster_Program_compile-11413373673517832166-value cluster_Program_compile-11413373673517832166-value outer_cluster_Program_compile-12369660641532123352-value cluster_Program_compile-12369660641532123352-value outer_cluster_Program_compile-1912573936028582372-value cluster_Program_compile-1912573936028582372-value outer_cluster_Program_compile-6662973804773207269-value cluster_Program_compile-6662973804773207269-value outer_cluster_Program_compile-956286968014291186-value cluster_Program_compile-956286968014291186-value outer_cluster_Program_compile-17845845248991523397-value cluster_Program_compile-17845845248991523397-value outer_cluster_Program_compile-7619260772787498455-value cluster_Program_compile-7619260772787498455-value outer_cluster_Program_is_identifer-3377577844511369682-value cluster_Program_is_identifer-3377577844511369682-value outer_cluster_Program_is_identifer-8417957797057827878-value cluster_Program_is_identifer-8417957797057827878-value outer_cluster_Program_is_identifer-4208978898528913939-value cluster_Program_is_identifer-4208978898528913939-value outer_cluster_Program_is_identifer-9249358851075372135-value cluster_Program_is_identifer-9249358851075372135-value outer_cluster_Program_is_identifer-14289738803621830331-value cluster_Program_is_identifer-14289738803621830331-value outer_cluster_Program_is_identifer-1714775736476281168-value cluster_Program_is_identifer-1714775736476281168-value outer_cluster_Program_is_identifer-15121139857639374588-value cluster_Program_is_identifer-15121139857639374588-value outer_cluster_Program_is_identifer-16783941965674463102-value cluster_Program_is_identifer-16783941965674463102-value outer_cluster_Program_is_identifer-15952540911656918845-value cluster_Program_is_identifer-15952540911656918845-value outer_cluster_Program_is_identifer-0-value cluster_Program_is_identifer-0-value outer_cluster_bool-0 cluster_bool-0 outer_cluster_Program_is_identifer-2546176790493825425-value cluster_Program_is_identifer-2546176790493825425-value outer_cluster_Program_is_identifer-10912160959110460649-value cluster_Program_is_identifer-10912160959110460649-value outer_cluster_Program_is_identifer-10080759905092916392-value cluster_Program_is_identifer-10080759905092916392-value outer_cluster_Program_is_identifer-5040379952546458196-value cluster_Program_is_identifer-5040379952546458196-value outer_cluster_Program_is_identifer-11743562013128004906-value cluster_Program_is_identifer-11743562013128004906-value outer_cluster_Program_is_identifer-5871781006564002453-value cluster_Program_is_identifer-5871781006564002453-value outer_cluster_bool-5871781006564002453 cluster_bool-5871781006564002453 outer_cluster_Program_is_identifer-17615343019692007359-value cluster_Program_is_identifer-17615343019692007359-value outer_cluster_i64-5871781006564002453 cluster_i64-5871781006564002453 outer_cluster_Program_next_sym-5040379952546458196-value cluster_Program_next_sym-5040379952546458196-value outer_cluster_Program_next_sym-16783941965674463102-value cluster_Program_next_sym-16783941965674463102-value outer_cluster_Program_next_sym-8417957797057827878-value cluster_Program_next_sym-8417957797057827878-value outer_cluster_i64-0 cluster_i64-0 outer_cluster_Program_next_sym-10912160959110460649-value cluster_Program_next_sym-10912160959110460649-value outer_cluster_Program_next_sym-15121139857639374588-value cluster_Program_next_sym-15121139857639374588-value outer_cluster_Program_next_sym-10080759905092916392-value cluster_Program_next_sym-10080759905092916392-value outer_cluster_Program_next_sym-3377577844511369682-value cluster_Program_next_sym-3377577844511369682-value outer_cluster_Program_next_sym-2546176790493825425-value cluster_Program_next_sym-2546176790493825425-value outer_cluster_Program_next_sym-0-value cluster_Program_next_sym-0-value outer_cluster_Program_next_sym-4208978898528913939-value cluster_Program_next_sym-4208978898528913939-value outer_cluster_Program_next_sym-5871781006564002453-value cluster_Program_next_sym-5871781006564002453-value outer_cluster_Program_next_sym-11743562013128004906-value cluster_Program_next_sym-11743562013128004906-value outer_cluster_Program_next_sym-14289738803621830331-value cluster_Program_next_sym-14289738803621830331-value outer_cluster_Program_next_sym-1714775736476281168-value cluster_Program_next_sym-1714775736476281168-value outer_cluster_Program_next_sym-17615343019692007359-value cluster_Program_next_sym-17615343019692007359-value outer_cluster_Program_next_sym-9249358851075372135-value cluster_Program_next_sym-9249358851075372135-value outer_cluster_Program_next_sym-15952540911656918845-value cluster_Program_next_sym-15952540911656918845-value Program_statement-5996666920560749382:s->Program___init__-3134044275977402624 Program_statement-5996666920560749382:s->Program___add__-1081172882011038115 Program___init__-3134044275977402624:s->bool-0 Program___init__-3134044275977402624:s->String-313937328870544552 Program___add__-1081172882011038115:s->Program___init__-9005825282541405077 Program___add__-1081172882011038115:s->Program___init__-11609550517566447851 Program___add__-2671062704490572354:s->Program___init__-3134044275977402624 Program___add__-2671062704490572354:s->Program_parent-5040379952546458196 Program_parent-5040379952546458196:s->Program___add__-1081172882011038115 Program_parent-5871781006564002453:s->Program___init__-3134044275977402624 Program_parent-1714775736476281168:s->Program_parent-5040379952546458196 Program___init__-12238976891847021781:s->String-718359051288455306 Program___init__-12238976891847021781:s->bool-0 Program_function_two-11239823400800561233:s->Program_parent-15121139857639374588 Program_function_two-11239823400800561233:s->Program___init__-9005825282541405077 Program_function_two-11239823400800561233:s->Program___init__-16291883994304651111 Program_function_two-11239823400800561233:s->String-10787254494509254142 Program_parent-15121139857639374588:s->Program_parent-3377577844511369682 Program___init__-9005825282541405077:s->String-313937328870544552 Program___init__-9005825282541405077:s->bool-5871781006564002453 Program___init__-16291883994304651111:s->bool-5871781006564002453 Program___init__-16291883994304651111:s->String-5935442095835774456 Program_parent-11743562013128004906:s->Program___init__-9005825282541405077 Program_parent-2546176790493825425:s->Program___init__-16291883994304651111 Program_parent-8417957797057827878:s->Program_parent-15121139857639374588 Program___init__-11609550517566447851:s->bool-0 Program___init__-11609550517566447851:s->String-17954539539390402795 Program___add__-15952540911656918845:s->Program___init__-9582703652367694439 Program___add__-15952540911656918845:s->Program_parent-10080759905092916392 Program___init__-9582703652367694439:s->bool-0 Program___init__-9582703652367694439:s->String-6617174651184748400 Program_parent-10080759905092916392:s->Program_parent-4208978898528913939 Program_parent-0:s->Program___init__-9582703652367694439 Program_parent-15952540911656918845:s->Program_parent-10080759905092916392 Program___add__-2651793105796594534:s->Program_parent-0 Program___add__-2651793105796594534:s->Program___init__-17280709677866812621 Program___init__-17280709677866812621:s->bool-0 Program___init__-17280709677866812621:s->String-14586553597486223834 Program_parent-3377577844511369682:s->Program_parent-0 Program_parent-9249358851075372135:s->Program___init__-17280709677866812621 Program_expr_to_statement-5040379952546458196:s->Program___add__-1081172882011038115 Program_assign-10080759905092916392:s->Program___add__-13241269951358007050 Program___add__-13241269951358007050:s->Program___add__-2671062704490572354 Program___add__-13241269951358007050:s->Program_parent-10912160959110460649 Program_parent-4208978898528913939:s->Program_parent-10912160959110460649 Program_parent-10912160959110460649:s->Program___add__-2671062704490572354 Program_parent-17615343019692007359:s->Program___init__-11609550517566447851 Program___add__-13095445380246898500:s->Program_statement-5996666920560749382 Program___add__-13095445380246898500:s->Program___init__-12238976891847021781 Program_parent-16783941965674463102:s->Program___init__-12238976891847021781 Program___add__-13761752264459356387:s->Program___init__-16291883994304651111 Program___add__-13761752264459356387:s->Program_parent-3377577844511369682 Program_expr-11743562013128004906:s->Program___init__-9005825282541405077 Program_statements-10912160959110460649:s->Program_parent-1714775736476281168 Program_expr-15121139857639374588:s->Program_parent-3377577844511369682 Program_expr-0:s->Program___init__-9582703652367694439 Program_expr-1714775736476281168:s->Program_parent-5040379952546458196 Program_expr-17615343019692007359:s->Program___init__-11609550517566447851 Program_statements-11743562013128004906:s->Program___init__-9005825282541405077 Program_expr-5040379952546458196:s->Program_parent-17615343019692007359 Program_statements-8417957797057827878:s->Program_parent-15121139857639374588 Program_expr-10912160959110460649:s->Program___add__-2671062704490572354 Program_statements-5871781006564002453:s->Program___init__-3134044275977402624 Program_expr-3377577844511369682:s->Program_parent-0 Program_expr-5871781006564002453:s->Program___init__-3134044275977402624 Program_statements-17615343019692007359:s->Program___init__-11609550517566447851 Program_expr-2546176790493825425:s->Program___init__-16291883994304651111 Program_expr-14289738803621830331:s->Program_parent-11743562013128004906 Program_expr-15952540911656918845:s->Program_parent-10080759905092916392 Program_expr-4208978898528913939:s->Program_parent-10912160959110460649 Program_statements-16783941965674463102:s->Program___init__-12238976891847021781 Program_statements-4208978898528913939:s->Program_parent-16783941965674463102 Program_expr-16783941965674463102:s->Program___init__-12238976891847021781 Program_statements-0:s->Program___init__-9582703652367694439 Program_statements-9249358851075372135:s->Program___init__-17280709677866812621 Program_statements-15121139857639374588:s->Program_parent-9249358851075372135 Program_statements-1714775736476281168:s->Program_expr_to_statement-5040379952546458196 Program_statements-14289738803621830331:s->Program_function_two-11239823400800561233 Program_statements-15952540911656918845:s->Program_parent-10080759905092916392 Program_statements-10080759905092916392:s->Program_parent-4208978898528913939 Program_expr-10080759905092916392:s->Program_parent-4208978898528913939 Program_statements-5040379952546458196:s->Program_parent-17615343019692007359 Program_statements-3377577844511369682:s->Program_parent-15952540911656918845 Program_statements-2546176790493825425:s->Program___init__-16291883994304651111 Program_expr-9249358851075372135:s->Program___init__-17280709677866812621 Program_expr-8417957797057827878:s->Program_parent-15121139857639374588 Program_compile-1351883367118893594:s->Program_statement-5996666920560749382 Program_compile-1351883367118893594:s->i64-0 Program_compile-395596399104602408:s->Program___add__-1081172882011038115 Program_compile-395596399104602408:s->i64-0 Program_compile-2868860904042873558:s->Program___init__-11609550517566447851 Program_compile-2868860904042873558:s->i64-0 Program_compile-0:s->Program___init__-9582703652367694439 Program_compile-0:s->i64-0 Program_compile-5145996267849227305:s->Program_parent-0 Program_compile-5145996267849227305:s->i64-0 Program_compile-10457086705503540980:s->Program___init__-16291883994304651111 Program_compile-10457086705503540980:s->i64-0 Program_compile-5706686836758916083:s->Program_parent-10912160959110460649 Program_compile-5706686836758916083:s->i64-0 Program_compile-12930351210441812130:s->Program_parent-3377577844511369682 Program_compile-12930351210441812130:s->i64-0 Program_compile-9896396136593852202:s->Program_parent-5040379952546458196 Program_compile-9896396136593852202:s->i64-0 Program_compile-8179951341697187233:s->Program___init__-12238976891847021781 Program_compile-8179951341697187233:s->i64-0 Program_compile-11413373673517832166:s->Program_parent-15121139857639374588 Program_compile-11413373673517832166:s->i64-0 Program_compile-12369660641532123352:s->Program_parent-11743562013128004906 Program_compile-12369660641532123352:s->i64-0 Program_compile-1912573936028582372:s->Program___init__-9005825282541405077 Program_compile-1912573936028582372:s->i64-0 Program_compile-6662973804773207269:s->Program_parent-4208978898528913939 Program_compile-6662973804773207269:s->i64-0 Program_compile-956286968014291186:s->Program___init__-3134044275977402624 Program_compile-956286968014291186:s->i64-0 Program_compile-17845845248991523397:s->Program___init__-17280709677866812621 Program_compile-17845845248991523397:s->i64-5871781006564002453 Program_compile-7619260772787498455:s->Program_parent-10080759905092916392 Program_compile-7619260772787498455:s->i64-0 Program_is_identifer-3377577844511369682:s->Program___add__-15952540911656918845 Program_is_identifer-8417957797057827878:s->Program___add__-13761752264459356387 Program_is_identifer-4208978898528913939:s->Program___add__-13095445380246898500 Program_is_identifer-9249358851075372135:s->Program___init__-17280709677866812621 Program_is_identifer-14289738803621830331:s->Program_parent-11743562013128004906 Program_is_identifer-1714775736476281168:s->Program_expr_to_statement-5040379952546458196 Program_is_identifer-15121139857639374588:s->Program___add__-2651793105796594534 Program_is_identifer-16783941965674463102:s->Program___init__-12238976891847021781 Program_is_identifer-15952540911656918845:s->Program_assign-10080759905092916392 Program_is_identifer-0:s->Program___init__-9582703652367694439 Program_is_identifer-2546176790493825425:s->Program___init__-16291883994304651111 Program_is_identifer-10912160959110460649:s->Program_statement-5996666920560749382 Program_is_identifer-10080759905092916392:s->Program___add__-13241269951358007050 Program_is_identifer-5040379952546458196:s->Program_parent-17615343019692007359 Program_is_identifer-11743562013128004906:s->Program___init__-9005825282541405077 Program_is_identifer-5871781006564002453:s->Program___init__-3134044275977402624 Program_is_identifer-17615343019692007359:s->Program___init__-11609550517566447851 Program_next_sym-5040379952546458196:s->Program_parent-17615343019692007359 Program_next_sym-16783941965674463102:s->Program___init__-12238976891847021781 Program_next_sym-8417957797057827878:s->Program___add__-13761752264459356387 Program_next_sym-10912160959110460649:s->Program_parent-1714775736476281168 Program_next_sym-15121139857639374588:s->Program_parent-9249358851075372135 Program_next_sym-10080759905092916392:s->Program___add__-13241269951358007050 Program_next_sym-3377577844511369682:s->Program_parent-15952540911656918845 Program_next_sym-2546176790493825425:s->Program___init__-16291883994304651111 Program_next_sym-0:s->Program___init__-9582703652367694439 Program_next_sym-4208978898528913939:s->Program_parent-16783941965674463102 Program_next_sym-5871781006564002453:s->Program___init__-3134044275977402624 Program_next_sym-11743562013128004906:s->Program___init__-9005825282541405077 Program_next_sym-14289738803621830331:s->Program_parent-2546176790493825425 Program_next_sym-1714775736476281168:s->Program_expr_to_statement-5040379952546458196 Program_next_sym-17615343019692007359:s->Program___init__-11609550517566447851 Program_next_sym-9249358851075372135:s->Program___init__-17280709677866812621 Program_next_sym-15952540911656918845:s->Program_assign-10080759905092916392 Program_statement-5996666920560749382 String('·').statement Program___init__-3134044275977402624 Program Program___add__-1081172882011038115 String('·') + '·' Program___add__-2671062704490572354 String('·') + '·' Program_parent-5040379952546458196 String('·').parent Program_parent-5871781006564002453 String('·').parent Program_parent-1714775736476281168 String('·').parent Program___init__-12238976891847021781 Program String-718359051288455306 " + " bool-0 false Program_function_two-11239823400800561233 String('·').function_two Program_parent-15121139857639374588 String('·').parent Program___init__-9005825282541405077 Program Program___init__-16291883994304651111 Program String-10787254494509254142 "__fn" Program_parent-11743562013128004906 String('·').parent Program_parent-2546176790493825425 String('·').parent Program_parent-8417957797057827878 String('·').parent Program___init__-11609550517566447851 Program String-17954539539390402795 "[1] = 10" String-313937328870544552 "x" bool-5871781006564002453 true Program___add__-15952540911656918845 String('·') + '·' Program___init__-9582703652367694439 Program Program_parent-10080759905092916392 String('·').parent Program_parent-0 String('·').parent Program_parent-15952540911656918845 String('·').parent Program___add__-2651793105796594534 String('·') + '·' Program___init__-17280709677866812621 Program Program_parent-3377577844511369682 String('·').parent Program_parent-9249358851075372135 String('·').parent Program_expr_to_statement-5040379952546458196 String('·').expr_to_statement Program_assign-10080759905092916392 String('·').assign Program___add__-13241269951358007050 String('·') + '·' Program_parent-4208978898528913939 String('·').parent Program_parent-10912160959110460649 String('·').parent Program_parent-17615343019692007359 String('·').parent Program___add__-13095445380246898500 String('·') + '·' Program_parent-16783941965674463102 String('·').parent String-6617174651184748400 "sum(" String-14586553597486223834 ") + " String-5935442095835774456 "y" Program___add__-13761752264459356387 String('·') + '·' Program_expr-11743562013128004906 String('·').expr Program_expr-11743562013128004906-value "x" Program_statements-10912160959110460649 String('·').statements Program_statements-10912160959110460649-value "x[1] = 10 " Program_expr-15121139857639374588 String('·').expr Program_expr-15121139857639374588-value "sum(_0) + " Program_expr-0 String('·').expr Program_expr-0-value "sum(" Program_expr-1714775736476281168 String('·').expr Program_expr-1714775736476281168-value "" Program_expr-17615343019692007359 String('·').expr Program_expr-17615343019692007359-value "[1] = 10" Program_statements-11743562013128004906 String('·').statements Program_statements-11743562013128004906-value "" Program_expr-5040379952546458196 String('·').expr Program_expr-5040379952546458196-value "x[1] = 10" Program_statements-8417957797057827878 String('·').statements Program_statements-8417957797057827878-value "x[1] = 10 _0 = x + x " Program_expr-10912160959110460649 String('·').expr Program_expr-10912160959110460649-value "x" Program_statements-5871781006564002453 String('·').statements Program_statements-5871781006564002453-value "" Program_expr-3377577844511369682 String('·').expr Program_expr-3377577844511369682-value "sum(_0" Program_expr-5871781006564002453 String('·').expr Program_expr-5871781006564002453-value "x" Program_statements-17615343019692007359 String('·').statements Program_statements-17615343019692007359-value "" Program_expr-2546176790493825425 String('·').expr Program_expr-2546176790493825425-value "y" Program_expr-14289738803621830331 String('·').expr Program_expr-14289738803621830331-value "__fn" Program_expr-15952540911656918845 String('·').expr Program_expr-15952540911656918845-value "_0" Program_expr-4208978898528913939 String('·').expr Program_expr-4208978898528913939-value "x + " Program_statements-16783941965674463102 String('·').statements Program_statements-16783941965674463102-value "" Program_statements-4208978898528913939 String('·').statements Program_statements-4208978898528913939-value "x[1] = 10 " Program_expr-16783941965674463102 String('·').expr Program_expr-16783941965674463102-value " + " Program_statements-0 String('·').statements Program_statements-0-value "" Program_statements-9249358851075372135 String('·').statements Program_statements-9249358851075372135-value "" Program_statements-15121139857639374588 String('·').statements Program_statements-15121139857639374588-value "x[1] = 10 _0 = x + x " Program_statements-1714775736476281168 String('·').statements Program_statements-1714775736476281168-value "x[1] = 10 " Program_statements-14289738803621830331 String('·').statements Program_statements-14289738803621830331-value "def __fn(x, y):    x[1] = 10    _0 = x + x    return sum(_0) + y " Program_statements-15952540911656918845 String('·').statements Program_statements-15952540911656918845-value "x[1] = 10 _0 = x + x " Program_statements-10080759905092916392 String('·').statements Program_statements-10080759905092916392-value "x[1] = 10 " Program_expr-10080759905092916392 String('·').expr Program_expr-10080759905092916392-value "x + x" Program_statements-5040379952546458196 String('·').statements Program_statements-5040379952546458196-value "" Program_statements-3377577844511369682 String('·').statements Program_statements-3377577844511369682-value "x[1] = 10 _0 = x + x " Program_statements-2546176790493825425 String('·').statements Program_statements-2546176790493825425-value "" Program_expr-9249358851075372135 String('·').expr Program_expr-9249358851075372135-value ") + " Program_expr-8417957797057827878 String('·').expr Program_expr-8417957797057827878-value "sum(_0) + y" Program_compile-1351883367118893594 String('·').compile i64-0 0 Program_compile-1351883367118893594-value () Program_compile-395596399104602408 String('·').compile Program_compile-395596399104602408-value () Program_compile-2868860904042873558 String('·').compile Program_compile-2868860904042873558-value () Program_compile-0 String('·').compile Program_compile-0-value () Program_compile-5145996267849227305 String('·').compile Program_compile-5145996267849227305-value () Program_compile-10457086705503540980 String('·').compile Program_compile-10457086705503540980-value () Program_compile-5706686836758916083 String('·').compile Program_compile-5706686836758916083-value () Program_compile-12930351210441812130 String('·').compile Program_compile-12930351210441812130-value () Program_compile-9896396136593852202 String('·').compile Program_compile-9896396136593852202-value () Program_compile-8179951341697187233 String('·').compile Program_compile-8179951341697187233-value () Program_compile-11413373673517832166 String('·').compile Program_compile-11413373673517832166-value () Program_compile-12369660641532123352 String('·').compile Program_compile-12369660641532123352-value () Program_compile-1912573936028582372 String('·').compile Program_compile-1912573936028582372-value () Program_compile-6662973804773207269 String('·').compile Program_compile-6662973804773207269-value () Program_compile-956286968014291186 String('·').compile Program_compile-956286968014291186-value () Program_compile-17845845248991523397 String('·').compile i64-5871781006564002453 1 Program_compile-17845845248991523397-value () Program_compile-7619260772787498455 String('·').compile Program_compile-7619260772787498455-value () Program_is_identifer-3377577844511369682 String('·').is_identifer Program_is_identifer-3377577844511369682-value false Program_is_identifer-8417957797057827878 String('·').is_identifer Program_is_identifer-8417957797057827878-value false Program_is_identifer-4208978898528913939 String('·').is_identifer Program_is_identifer-4208978898528913939-value false Program_is_identifer-9249358851075372135 String('·').is_identifer Program_is_identifer-9249358851075372135-value false Program_is_identifer-14289738803621830331 String('·').is_identifer Program_is_identifer-14289738803621830331-value true Program_is_identifer-1714775736476281168 String('·').is_identifer Program_is_identifer-1714775736476281168-value false Program_is_identifer-15121139857639374588 String('·').is_identifer Program_is_identifer-15121139857639374588-value false Program_is_identifer-16783941965674463102 String('·').is_identifer Program_is_identifer-16783941965674463102-value false Program_is_identifer-15952540911656918845 String('·').is_identifer Program_is_identifer-15952540911656918845-value true Program_is_identifer-0 String('·').is_identifer Program_is_identifer-0-value false Program_is_identifer-2546176790493825425 String('·').is_identifer Program_is_identifer-2546176790493825425-value true Program_is_identifer-10912160959110460649 String('·').is_identifer Program_is_identifer-10912160959110460649-value false Program_is_identifer-10080759905092916392 String('·').is_identifer Program_is_identifer-10080759905092916392-value false Program_is_identifer-5040379952546458196 String('·').is_identifer Program_is_identifer-5040379952546458196-value false Program_is_identifer-11743562013128004906 String('·').is_identifer Program_is_identifer-11743562013128004906-value true Program_is_identifer-5871781006564002453 String('·').is_identifer Program_is_identifer-5871781006564002453-value false Program_is_identifer-17615343019692007359 String('·').is_identifer Program_is_identifer-17615343019692007359-value false Program_next_sym-5040379952546458196 String('·').next_sym Program_next_sym-5040379952546458196-value 0 Program_next_sym-16783941965674463102 String('·').next_sym Program_next_sym-16783941965674463102-value 0 Program_next_sym-8417957797057827878 String('·').next_sym Program_next_sym-8417957797057827878-value 1 Program_next_sym-10912160959110460649 String('·').next_sym Program_next_sym-10912160959110460649-value 0 Program_next_sym-15121139857639374588 String('·').next_sym Program_next_sym-15121139857639374588-value 1 Program_next_sym-10080759905092916392 String('·').next_sym Program_next_sym-10080759905092916392-value 0 Program_next_sym-3377577844511369682 String('·').next_sym Program_next_sym-3377577844511369682-value 1 Program_next_sym-2546176790493825425 String('·').next_sym Program_next_sym-2546176790493825425-value 0 Program_next_sym-0 String('·').next_sym Program_next_sym-0-value 0 Program_next_sym-4208978898528913939 String('·').next_sym Program_next_sym-4208978898528913939-value 0 Program_next_sym-5871781006564002453 String('·').next_sym Program_next_sym-5871781006564002453-value 0 Program_next_sym-11743562013128004906 String('·').next_sym Program_next_sym-11743562013128004906-value 0 Program_next_sym-14289738803621830331 String('·').next_sym Program_next_sym-14289738803621830331-value 0 Program_next_sym-1714775736476281168 String('·').next_sym Program_next_sym-1714775736476281168-value 0 Program_next_sym-17615343019692007359 String('·').next_sym Program_next_sym-17615343019692007359-value 0 Program_next_sym-9249358851075372135 String('·').next_sym Program_next_sym-9249358851075372135-value 1 Program_next_sym-15952540911656918845 String('·').next_sym Program_next_sym-15952540911656918845-value 1

PyObject - Python objects in EGraphs#

We can also add Python objects directly to the e-graph as primitives and run rewrite rules that call back into Python

Future Work#

The next milestone use case is to be able to optimize functional array programs and rewrite them.

To implement this we need to at least support functions as values and ideally also generic types.

Example#

There is a concrete example provided by Siu from the Numba project.

We would want users to be able to write code like this:

def linalg_norm_loopnest_egglog(X: enp.NDArray, axis: enp.TupleInt) -> enp.NDArray:
    # peel off the outer shape for result array
    outshape = ShapeAPI(X.shape).deselect(axis).to_tuple()
    # get only the inner shape for reduction
    reduce_axis = ShapeAPI(X.shape).select(axis).to_tuple()

    return enp.NDArray.from_fn(
        outshape,
        X.dtype,
        lambda k: enp.sqrt(
            LoopNestAPI.from_tuple(reduce_axis)
            .unwrap()
            .reduce(lambda carry, i: carry + enp.real(enp.conj(x := X[i + k]) * x), init=0.0)
        ).to_value(),
    )

Which would then be rewritten to:

def linalg_norm_array_api(X: enp.NDArray, axis: enp.TupleInt) -> enp.NDArray:
    outdim = enp.range_(X.ndim).filter(lambda x: ~axis.contains(x))
    outshape = convert(convert(X.shape, enp.NDArray)[outdim], enp.TupleInt)
    row_axis, col_axis = axis
    return enp.NDArray.from_fn(
        outshape,
        X.dtype,
        lambda k: enp.sqrt(
            enp.int_product(enp.range_(X.shape[row_axis]), enp.range_(X.shape[col_axis]))
            .map_to_ndarray(lambda rc: enp.real(enp.conj(x := X[rc + k]) * x))
            .sum()
        ).to_value(),
    )

And finally could be lowered to Python as:

def linalg_norm_low_level(
    X: np.ndarray[tuple, np.dtype[np.float64]], axis: tuple[int, int]
) -> np.ndarray[tuple, np.dtype[np.float64]]:
    # # If X ndim>=3 and axis is a 2-tuple
    assert X.ndim >= 3
    assert len(axis) == 2
    #  Y - 2
    outdim = [dim for dim in range(X.ndim) if dim not in axis]

    outshape = tuple(np.asarray(X.shape)[outdim])

    res = np.zeros(outshape, dtype=X.dtype)
    row_axis, col_axis = axis
    for k in np.ndindex(outshape):
        tmp = 0.0
        for row in range(X.shape[row_axis]):
            for col in range(X.shape[col_axis]):
                idx = (row, col, *k)
                x = X[idx]
                tmp += (x.conj() * x).real
        res[k] = np.sqrt(tmp)
    return res

Conclusion#

In this talk I have gone through some details of what is needed to connect data science users to egglog:

Overall, the idea is that if we can get egglog in more users hands, in particular for data intensive workloads where the tradeoff of time for pre-computation is worth it, than this can help drive exciting future research directions and also build meaningful useful tools for the scientific open source ecosystem in Python.

If you are building DSLs in Python, or more generally want to play with e-graphs, try out egglog-python!

Around on the e-graphs Zulip for any questions.