Skip to content
This repository has been archived by the owner on Mar 16, 2024. It is now read-only.

Commit

Permalink
progress port, update bounding box (#153)
Browse files Browse the repository at this point in the history
* progress port, update bounding box

* Feature/remove reference py loader (#154)

* remove reference to py module loader

* Remove all redbaron refs (#155)

* Remove all redbaron refs

* update nbs, indices, etc

* remove extra print

* modify req version

* fix sorting, update readme
  • Loading branch information
emrgnt-cmplxty authored Jul 8, 2023
1 parent fdffaca commit d139aa6
Show file tree
Hide file tree
Showing 34 changed files with 504 additions and 848 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
<img src="https://img.shields.io/static/v1?label=license&message=Apache 2.0&color=white" alt="License">
</a> |
[![Documentation Status](https://readthedocs.org/projects/automata/badge/?version=latest)](https://automata.readthedocs.io/en/latest/?badge=latest)
[![GitHub star chart](https://img.shields.io/github/stars/emrgnt-cmplxty/Automata?style=social)](https://star-history.com/#emrgnt-cmplxty/Automata)
[![Discord](https://img.shields.io/discord/1120774652915105934?logo=discord)](https://discord.gg/j9GxfbxqAe)
[![Twitter Follow](https://img.shields.io/twitter/follow/ocolegro?style=social)](https://twitter.com/ocolegro)


[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/emrgnt-cmplxty/Automata)

**Automata's objective is to evolve into a fully autonomous, self-programming Artificial Intelligence system**.

This project is inspired by the theory that code is essentially a form of memory, and when furnished with the right tools, AI can evolve real-time capabilities which can potentially lead to the creation of AGI. The word automata comes from the Greek word αὐτόματος, denoting "self-acting, self-willed, self-moving,", and [Automata theory](https://en.wikipedia.org/wiki/Automata_theory) is the study of abstract machines and [automata](https://en.wikipedia.org/wiki/Automaton), as well as the computational problems that can be solved using them. More information follows below.
Expand Down Expand Up @@ -203,4 +207,4 @@ Automata is licensed under the Apache License 2.0.

## Other

This project is an extension of an initial effort between [emrgnt-cmplxty](https://github.com/emrgnt-cmplxty) and [maks-ivanov](https://github.com/maks-ivanov) that began with this [repository](https://github.com/maks-ivanov/automata).
This project is an extension of an initial effort between [emrgnt-cmplxty](https://github.com/emrgnt-cmplxty) and [maks-ivanov](https://github.com/maks-ivanov) that began with this [repository](https://github.com/maks-ivanov/automata).
3 changes: 1 addition & 2 deletions automata/cli/scripts/run_doc_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from automata.memory_store.symbol_code_embedding import SymbolCodeEmbeddingHandler
from automata.memory_store.symbol_doc_embedding import SymbolDocEmbeddingHandler
from automata.singletons.dependency_factory import dependency_factory
from automata.singletons.py_module_loader import py_module_loader, pyast_module_loader
from automata.singletons.py_module_loader import py_module_loader
from automata.symbol.graph import SymbolGraph
from automata.symbol.symbol_utils import get_rankable_symbols

Expand All @@ -18,7 +18,6 @@

def initialize_providers(embedding_level, **kwargs):
py_module_loader.initialize()
pyast_module_loader.initialize()

embedding_provider = OpenAIEmbeddingProvider()

Expand Down
9 changes: 7 additions & 2 deletions automata/core/base/database/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,17 @@ def entry_to_key(self, entry: V) -> K:
# TODO - PyLance is complaining about the type of the ids parameter below
# Can we constrain the TypeVar to be a Chroma compatible type (e.g. ID)?

def contains(self, key: str) -> bool:
def contains(self, key: K) -> bool:
result = self._collection.get(ids=[key])
return len(result["ids"]) != 0

def discard(self, key: K) -> None:
self._collection.delete(ids=[key])
try:
self._collection.delete(ids=[key])
except RuntimeError as e:
# FIXME - It seems an error in Chroma is causing this to be raised falsely
if str(e) != "The requested to delete element is already deleted":
raise

def batch_discard(self, keys: List[K]) -> None:
self._collection.delete(ids=keys)
19 changes: 1 addition & 18 deletions automata/core/base/patterns/singleton.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import abc
from typing import Any, Dict, List
from typing import Any, Dict


class Singleton(abc.ABCMeta, type):
Expand All @@ -14,20 +14,3 @@ def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]


class Doubleton(abc.ABCMeta, type):
"""
Doubleton metaclass for ensuring at most two instances of a class.
"""

_instances: Dict[str, List[Any]] = {}

def __call__(cls, *args, **kwargs):
"""Call method for the doubleton metaclass."""
if cls not in cls._instances:
cls._instances[cls] = []
if len(cls._instances[cls]) < 2:
instance = super(Doubleton, cls).__call__(*args, **kwargs)
cls._instances[cls].append(instance)
return cls._instances[cls][-1] # Always return the last created instance
9 changes: 1 addition & 8 deletions automata/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@
import networkx as nx
import openai
import yaml
from redbaron import ClassNode, DefNode, Node, RedBaron, StringNode

from automata.symbol.base import Symbol

FSTNode = Union[Node, RedBaron]


def set_openai_api_key(override_key: Optional[str] = None) -> None:
"""Sets the OpenAI API key from the environment variable OPENAI_API_KEY"""
Expand Down Expand Up @@ -165,7 +162,7 @@ def get_logging_config(
return cast(dict[str, Any], logging_config)


def get_docstring_from_node(node: Optional[Union[FSTNode, AST]]) -> str:
def get_docstring_from_node(node: Optional[AST]) -> str:
"""
Gets the docstring from the specified node
Expand All @@ -175,10 +172,6 @@ def get_docstring_from_node(node: Optional[Union[FSTNode, AST]]) -> str:
if not node:
return "No result found."

if isinstance(node, (ClassNode, DefNode, RedBaron)):
filtered_nodes = node.filtered() # get rid of extra whitespace
if isinstance(filtered_nodes[0], StringNode):
return filtered_nodes[0].value.replace('"""', "").replace("'''", "")
elif isinstance(node, (FunctionDef, ClassDef, AsyncFunctionDef)):
doc_string = get_docstring(node)
if doc_string:
Expand Down
5 changes: 3 additions & 2 deletions automata/embedding/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from enum import Enum
from typing import Any, Dict, Sequence

import astunparse
import numpy as np

from automata.core.base.database.vector import VectorDatabaseProvider
Expand Down Expand Up @@ -55,10 +56,10 @@ def build(self, source_text: str, symbol: Symbol) -> Any:
def fetch_embedding_source_code(self, symbol: Symbol) -> str:
"""An abstract method for embedding the context is the source code itself."""
from automata.symbol.symbol_utils import ( # imported late for mocking
convert_to_fst_object,
convert_to_ast_object,
)

return str(convert_to_fst_object(symbol))
return astunparse.unparse(convert_to_ast_object(symbol))


class EmbeddingHandler(abc.ABC):
Expand Down
12 changes: 4 additions & 8 deletions automata/experimental/search/symbol_search.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
from ast import unparse as pyast_unparse
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import numpy as np
from redbaron import RedBaron

from automata.embedding.base import EmbeddingSimilarityCalculator
from automata.experimental.search.rank import SymbolRank, SymbolRankConfig
from automata.singletons.py_module_loader import py_module_loader
from automata.symbol.base import Symbol, SymbolReference
from automata.symbol.graph import SymbolGraph
from automata.symbol.parser import parse_symbol
from automata.symbol.symbol_utils import convert_to_fst_object
from automata.symbol.symbol_utils import convert_to_ast_object
from automata.symbol_embedding.handler import SymbolEmbeddingHandler

SymbolReferencesResult = Dict[str, List[SymbolReference]]
Expand Down Expand Up @@ -73,7 +72,7 @@ def symbol_references(self, symbol_uri: str) -> SymbolReferencesResult:

def retrieve_source_code_by_symbol(self, symbol_uri: str) -> SourceCodeResult:
"""Finds the raw text of a module, class, method, or standalone function."""
node = convert_to_fst_object(parse_symbol(symbol_uri))
node = convert_to_ast_object(parse_symbol(symbol_uri))
return str(node) if node else None

def exact_search(self, pattern: str) -> ExactSearchResult:
Expand Down Expand Up @@ -114,10 +113,7 @@ def _find_pattern_in_modules(self, pattern: str) -> Dict[str, List[int]]:
matches = {}
for module_path, module in py_module_loader.items():
if module:
if isinstance(module, RedBaron):
lines = module.dumps().splitlines()
else:
lines = pyast_unparse(module).splitlines()
lines = pyast_unparse(module).splitlines()
line_numbers = [i + 1 for i, line in enumerate(lines) if pattern in line.strip()]
if line_numbers:
matches[module_path] = line_numbers
Expand Down
1 change: 0 additions & 1 deletion automata/memory_store/symbol_code_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def update_existing_embedding(self, source_code: str, symbol: Symbol) -> None:
of the existing embedding. If there are differences, update the embedding.
"""
existing_embedding = self.embedding_db.get(symbol.dotpath)

if existing_embedding.document != source_code:
self.embedding_db.discard(symbol.dotpath)
symbol_embedding = self.embedding_builder.build(source_code, symbol)
Expand Down
156 changes: 20 additions & 136 deletions automata/navigation/py/navigation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,8 @@
from ast import AST, AsyncFunctionDef, ClassDef, FunctionDef
from ast import Module as ModuleNode
from ast import iter_child_nodes
from typing import List, Optional, Union

from redbaron import (
ClassNode,
DefNode,
FromImportNode,
ImportNode,
Node,
NodeList,
RedBaron,
)

from automata.navigation.py.dotpath_map import DotPathMap
from dataclasses import dataclass
from typing import List, Union

logger = logging.getLogger(__name__)

Expand All @@ -40,134 +29,29 @@ def find_subnode(node, obj_name):
return None


def find_syntax_tree_node(
code_obj: Optional[Union[RedBaron, ClassNode, ModuleNode]], object_path: Optional[str]
) -> Optional[Union[Node, RedBaron, AST]]:
"""
Find a module, or find a function, method, or class inside a module.
Args:
code_obj (RedBaron): The red baron FST object.
object_path (Optional[str]): The dot-separated object path (e.g., 'ClassName.method_name'). If None,
the module is returned.
Returns:
Optional[Union[Def, Class, Module]]: The found def, or class node, or None if not found.
"""
if not code_obj:
return None

if not object_path:
return code_obj

obj_parts = object_path.split(DotPathMap.DOT_SEP)

if isinstance(code_obj, RedBaron) or isinstance(code_obj, ClassNode):
node = code_obj
while node and obj_parts:
obj_name = obj_parts.pop(0)
node = _find_subnode(node, obj_name)
return node
else:
return find_syntax_tree_node_pyast(code_obj, obj_parts)

@dataclass
class LineItem:
"""A class to represent a line item in a bounding box."""

def find_import_syntax_tree_nodes(module: RedBaron) -> Optional[NodeList]:
"""
Find all imports in a module.
line: int
column: int

Args:
module (RedBaron): The module to search.

Returns:
Optional[NodeList]: A list of ImportNode and FromImportNode objects.
"""
return module.find_all(lambda identifier: identifier in ("import", "from_import"))
@dataclass
class BoundingBox:
"""A class to represent the bounding box of a symbol."""

top_left: LineItem
bottom_right: LineItem

def find_import_syntax_tree_node_by_name(
module: RedBaron, import_name: str
) -> Optional[Union[ImportNode, FromImportNode]]:
"""
Find an import by name.

Args:
module (RedBaron): The module to search.
import_name (str): The name of the import to find.
def construct_bounding_box(node: AST) -> BoundingBox:
if not node.end_lineno:
raise ValueError(f"{node} does not have an end line number")
elif not node.end_col_offset:
raise ValueError(f"{node} does not have an end column offset")

Returns:
Optional[Union[ImportNode, FromImportNode]]: The found import, or None if not found.
"""
return module.find(
lambda identifier: identifier in ("import", "from_import"), name=import_name
return BoundingBox(
top_left=LineItem(line=node.lineno, column=node.col_offset),
bottom_right=LineItem(line=node.end_lineno, column=node.end_col_offset),
)


def find_all_function_and_class_syntax_tree_nodes(module: RedBaron) -> NodeList:
"""
Find all imports in a module.
Args:
module (RedBaron): The module to search.
Returns:
NodeList: A list of ClassNode and DefNode objects.
"""
return module.find_all(lambda identifier: identifier in ("class", "def"))


def find_method_call_by_location(
module: RedBaron, line_number: int, column_number: int
) -> Optional[RedBaron]:
"""
Find a method call by a symbol reference in a module.
Args:
module (RedBaron): The module to search.
line_number (int): The line number of the symbol reference.
column_number (int): The column number of the symbol reference.
Returns:
Optional[Node]: The found node, or None if not found.
"""
try:
# Find all CallNode instances
all_calls = module.find_all("call")
return next(
(
call
for call in all_calls
if (
call.absolute_bounding_box.top_left.line - 1 < line_number
or (
call.absolute_bounding_box.top_left.line - 1 == line_number
and call.absolute_bounding_box.top_left.column - 1 <= column_number
)
)
and (
call.absolute_bounding_box.bottom_right.line - 1 > line_number
or (
call.absolute_bounding_box.bottom_right.line - 1 == line_number
and call.absolute_bounding_box.bottom_right.column - 1 >= column_number
)
)
),
None,
)
except IndexError:
return None


def _find_subnode(code_obj: RedBaron, obj_name: str) -> Optional[Union[DefNode, ClassNode]]:
"""
Find a DefNode or ClassNode node with the specified name within the given
FST code object.
Args:
code_obj (RedBaron): The FST code object (RedBaron or Node) to search.
obj_name (str): The name of the object to find.
Returns:
Optional[Union[DefNode, ClassNode]]: The found node, or None.
"""
return code_obj.find(lambda identifier: identifier in ("def", "class"), name=obj_name)
2 changes: 1 addition & 1 deletion automata/retrievers/py/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def process_imports(self, symbol: Symbol) -> None:
while not os.path.isdir(os.path.dirname(file_path)):
file_path = os.path.dirname(file_path)

# Load the source code with RedBaron
# Load the source code with AST
with open(f"{file_path}.py", "r") as f:
ast = pyast_parse(f.read())

Expand Down
Loading

0 comments on commit d139aa6

Please sign in to comment.