# # 02 - Datalog # # _[This tutorial is translated from egglog.](https://egraphs-good.github.io/egglog-tutorial/02-datalog.html)_ # # Datalog is a relational language for deductive reasoning. In the last lesson, we write our first # equality saturation program in egglog, but you can also write rules for deductive reasoning a la Datalog. # In this lesson, we will write several classic Datalog programs in egglog. One of the benifits # of egglog being a language for program optimization is that it can talk about terms natively, # so in egglog we get Datalog with terms for free. # mypy: disable-error-code="empty-body" from __future__ import annotations from collections.abc import Iterable from typing import TypeAlias from egglog import * # Let's first define relations edge and path. # We use `edge(a, b)` to mean the tuple (a, b) is in the `edge` relation. # `edge(a, b)` means there are directed edges from `a` to `b`, # and we will use it to compute the `path` relation, # where `path(a, b)` means there is a (directed) path from `a` to `b`. edge = relation("edge", i64, i64) path = relation("path", i64, i64) # We can insert edges into our relation by asserting facts: egraph = EGraph() egraph.register( edge(i64(1), i64(2)), edge(i64(2), i64(3)), edge(i64(3), i64(4)), ) # Fact definitions are similar to definitions using `egraph.let` definitions in the last lesson, # in that facts are immediately added to relations. # # Now let's tell egglog how to derive the `path` relation. # # First, if an edge from a to b exists, then it is already a proof # that there exists a path from a to b. @egraph.register def _(a: i64, b: i64) -> Iterable[RewriteOrRule]: yield rule(edge(a, b)).then(path(a, b)) # A rule has the form `rule(atom1, atom2 ..).then(action1 action2 ..)`. # # For the rule we have just defined, the only atom is `path(a, b)`, which asks egglog to search # for possible `a` and `b`s such that `path(a, b)` is a fact in the database. # # We call the first part the "query" of a rule, and the second part the "body" of a rule. # In Datalog terminology, confusingly, the first part is called the "body" of the rule # while the second part is called the "head" of the rule. This is because Datalog rules # are usually written as `head :- body`. To avoid confusion, we will refrain from using # Datalog terminology. # # The rule above defines the base case of the path relation. The inductive case reads as follows: # if we know there is a path from `a` to `b`, and there is an edge from `b` to `c`, then # there is also a path from `a` to `c`. # This can be expressed as egglog rule below: @egraph.register def _(a: i64, b: i64, c: i64) -> Iterable[RewriteOrRule]: yield rule(path(a, b), edge(b, c)).then(path(a, c)) # Again, defining a rule does not mean running it in egglog, which may be a surprise to those familiar with Datalog. # The user still needs to run the program. # For instance, the following check would fail at this point. egraph.check_fail(path(i64(1), i64(4))) # But it passes after we run our rules for 10 iterations. egraph.run(10) egraph.check(path(i64(1), i64(4))) # For many deductive rules, we do not know the number of iterations # needed to reach a fixpoint. The egglog language provides the `saturate` scheduling primitive to run the rules until fixpoint. egraph.run(run().saturate()) egraph # We will cover more details about schedules later in the tutorial. # Our last example determines whether there is a path from one node to another, # but we don't know the details about the path. # Let's slightly extend our program to obtain the length of the shortest path between any two nodes. # + @function def edge_len(from_: i64Like, to: i64Like) -> i64: ... @function(merge=lambda old, new: old.min(new)) def path_len(from_: i64Like, to: i64Like) -> i64: ... # - # Here, we use a new decorator called `function` to define a table that respects the functional dependency. # A relation is just a function with output domain `Unit`. # By defining `edge_len` and `path_len` with `function`, we can associate a length to each path. # # What happens it the same tuple of a function is mapped to two values? # In the case of relation, this is easy: `Unit` only has one value, so the two values must be identical. # But in general, that would be a violation of functional dependency, the property that `a = b` implies `f(a) = f(b)`. # Egglog allows us to specify how to reconcile two values that are mapped from the same tuple using _merge expressions_. # # For instance, for `path`, the merge expression is `old.min(new)`. The merge function is passed two special values # `old` and `new` that denotes the current output of the tuple and the output of the new, to-be-inserted value. # The merge expression for `path` says that, when there are two paths from `a` to `b` with lengths `old` and `new`, # we keep the shorter one, i.e., `old.min(new)`. # # For `edge_len`, we can define the merge expression the same as `path_len`, which means that we only keep the shortest edge # if there are multiple edges. But we can also assert that `edge_len` does not have a merge expression, # which is the default if none is provided. # This means we don't expect there will be multiple edges between two nodes. More generally, it is the user's # responsibility to ensure no tuples with conflicting output values exist. If a conflict happens, egglog will # raise an error. # Now let's insert the same edges as before, but we will assign a length to each edge. This is done using the `set` action, # which takes a tuple and an output value: egraph = EGraph() egraph.register( set_(edge_len(1, 2)).to(i64(10)), set_(edge_len(2, 3)).to(i64(10)), set_(edge_len(1, 3)).to(i64(30)), ) # Let us define the reflexive rule and transitive rule for the `path` function. # In this rule, we use the `set` action to set the output value of the `path` function. # On the query side, we use `a == b` (or `eq(a).to(b)` if equality is overloaded) to bind the output value of a function. @egraph.register def _(a: i64, b: i64, c: i64, ab: i64, bc: i64) -> Iterable[RewriteOrRule]: yield rule(edge_len(a, b) == ab).then(set_(path_len(a, b)).to(ab)) yield rule(path_len(a, b) == ab, edge_len(b, c) == bc).then(set_(path_len(a, c)).to(ab + bc)) # Let's run our rules and check we get the desired shortest path egraph.run(run().saturate()) egraph.check(eq(path_len(1, 3)).to(20)) egraph # Now let us combine the knowledge we have learned in lessons 1 and 2 to write a program that combines # both equality saturation and Datalog. # # We reuse our path example, but this time the nodes are terms constructed using the `Node` constructor, # # We start by defining a new, union-able sort (created by sub-classing expression) with a new constructor # + class Node(Expr): def __init__(self, x: i64Like) -> None: ... def edge(self, other: NodeLike) -> Unit: ... def path(self, other: NodeLike) -> Unit: ... NodeLike: TypeAlias = Node | i64Like converter(i64, Node, Node) # - # Note: We could have equivalently written # # ```python # class Node(Sort): ... # # @function # def mk(x: i64Like) -> Node: ... # edge_node = relation("edge_node", Node, Node) # path_node = relation("path_node", Node, Node) # ``` # # All methods of classes are syntactic sugar for creating functions. Note that properties, classmethods, and classvars # are also all supported ways of defining functions. # + egraph = EGraph() @egraph.register def _(x: Node, y: Node, z: Node) -> Iterable[RewriteOrRule]: yield rule(x.edge(y)).then(x.path(y)) yield rule(x.path(y), y.edge(z)).then(x.path(z)) egraph.register( Node(1).edge(2), Node(2).edge(3), Node(3).edge(1), Node(5).edge(6), ) egraph # - # Because we defined our nodes using custom expression `sort`, we can "union" two nodes. # This makes them indistinguishable to rules in `egglog`. egraph.register(union(Node(3)).with_(Node(5))) egraph # `union` is a new function here, but it is our old friend: `rewrite`s are implemented as rules whose # actions are `union`s. For instance, `rewrite(a + b).to(b + a)` is lowered to the following # rule: # # ```python # rule(a + b == e).then(union(e).with_(b + a))) # ``` # + egraph.run(run().saturate()) egraph.check( Node(3).edge(6), Node(1).path(6), ) egraph # - # We can also give a new meaning to equivalence by adding the following rule. @egraph.register def _(x: Node, y: Node) -> Iterable[RewriteOrRule]: yield rule(x.path(y), y.path(x)).then(union(x).with_(y)) # This rule says that if there is a path from `x` to `y` and from `y` to `x`, then # `x` and `y` are equivalent. # This rule allows us to tell if `a` and `b` are in the same connected component by checking # `egraph.check(Node(a) == Node(b))`. egraph.run(run().saturate()) egraph.check( Node(1) == Node(2), Node(1) == Node(3), Node(2) == Node(3), ) egraph