Skip to content

Commit

Permalink
Merge pull request #90 from Chemellia/at/docs
Browse files Browse the repository at this point in the history
At/docs
  • Loading branch information
rkurchin authored Jun 28, 2021
2 parents c5bde65 + 4def213 commit 6123ed5
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 9 deletions.
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ makedocs(
"Atoms Objects"=>"types/atoms.md",
"Feature Descriptors"=>"types/feature_descriptors.md",
"Featurization"=>"types/featurizations.md",
"Codec"=>"types/codecs.md",
],
"Contributing"=>"contributing.md",
"Changelog"=>"changelog.md",
Expand Down
5 changes: 5 additions & 0 deletions docs/src/types/codecs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Codecs

```@docs
Codec.OneHotOneCold
```
4 changes: 2 additions & 2 deletions src/ChemistryFeaturization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ export ElementFeatureDescriptor

include("atoms/atoms.jl")
export Atoms
using .Atoms: AtomGraph
export AtomGraph
using .Atoms: AtomGraph, visualize
export AtomGraph, visualize

include("featurizations/featurizations.jl")
export Featurization
Expand Down
24 changes: 23 additions & 1 deletion src/atoms/atomgraph.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,28 @@ AtomGraph(
AtomGraph(adj::Array{R}, elements::Vector{String}, id = "") where {R<:Real} =
AtomGraph(SimpleWeightedGraph(adj), elements, id)

"""
AtomGraph(input_file_path; id="", output_file_path=nothing, featurization=nothing, overwrite_file=false, use_voronoi=false, cutoff_radius=8.0, max_num_nbr=12, dist_decay_func=inverse_square, normalize_weights=true)
Construct an AtomGraph object from a structure file.
# Required Arguments
- `input_file_path::String`: path to file containing structure (must be readable by ASE.io.read)
# Optional Arguments
- `id::String=""`: ID associated with structure (e.g. identifier from online database)
- `output_file_path=nothing`: If provided, structure will be serialized to file at this location
- `featurization`: If provided, features will be encoded using it
- `overwrite_file::Bool=false`: whether to overwrite an existing file at `output_file_path`
- `use_voronoi::Bool=false`: Whether to build neighbor lists using Voronoi decompositions
- `cutoff_radius::Real=8.0`: If not using Voronoi neighbor lists, longest allowable distance to a neighbor, in Angstroms
- `max_num_nbr::Integer=12`: If not using Voronoi neighbor lists, largest allowable number of neighbors
- `dist_decay_func=inverse_square`: Function by which to assign edge weights according to distance between neighbors
- `normalize_weights::Bool=true`: Whether to normalize weights such that the largest is 1.0
# Note
`max_num_nbr` is a "soft" limit – if multiple neighbors are at the same distance, the full neighbor list may be longer.
"""
function AtomGraph(
input_file_path::String,
id::String = "",
Expand Down Expand Up @@ -258,7 +280,7 @@ end
"Visualize a given graph."
function visualize(ag::AtomGraph)
# gplot doesn't work on weighted graphs
sg = SimpleGraph(adjacency_matrix(ag))
sg = SimpleGraph(adjacency_matrix(ag.graph))
plt = gplot(
sg,
nodefillc = graph_colors(ag.elements),
Expand Down
41 changes: 35 additions & 6 deletions src/features/elementfeature.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,14 @@ include("abstractfeatures.jl")

# TODO: figure out what scheme would look like that is flexible to direct-value encoding (may just need a different feature type since it'll have to handle normalization, etc. too)
"""
ElementFeatureDescriptor(feature_name, encode_f, decode_f, categorical, contextual, length, encodable_elements)
ElementFeatureDescriptor
Construct a feature object that encodes features associated with individual atoms that depend only upon their elemental identity (if you want to encode a feature that depends upon an atom's environment, you shold use SpeciesFeatureDescriptor!)
Describe features associated with individual atoms that depend only upon their elemental identity
## Arguments
- `name::String`: the name of the feature
## Fields
- `name::String`: Name of the feature
- `encoder_decoder::AbstractCodec`: Codec defined which handles the feature's encoding and decoding logic
- `categorical::Bool`: flag for whether the feature is categorical or continuous-valued
- `length::Int`: length of encoded vector
- `logspaced::Bool`: whether onehot-style bins should be logarithmically spaced or not
- `lookup_table::DataFrame`: table containing values of feature for every encodable element
"""
struct ElementFeatureDescriptor <: AbstractAtomFeatureDescriptor
Expand All @@ -29,6 +28,21 @@ struct ElementFeatureDescriptor <: AbstractAtomFeatureDescriptor
lookup_table::DataFrame
end


"""
ElementFeatureDescriptor(feature_name, lookup_table, categorical, contextual, length, encodable_elements)
Construct a feature object that encodes features associated with individual atoms that depend only upon their elemental identity.
If a Codec isn't explicity specified, [OneHotOneCold](@ref) with [default_efd_encode](@ref) and [default_efd_decode](@ref)
as the encoding and decoding functions respectively is the default choice.
## Arguments
- `name::String`: the name of the feature
- `lookup_table::DataFrame`: table containing values of feature for every encodable element
- `nbins::Integer`: Number of bins to use for one-cold decoding of continuous-valued features
- `logspaced::Bool`: whether onehot-style bins should be logarithmically spaced or not
- `categorical::Bool`: flag for whether the feature is categorical or continuous-valued
"""
function ElementFeatureDescriptor(
feature_name::String,
lookup_table::DataFrame = atom_data_df;
Expand Down Expand Up @@ -91,6 +105,12 @@ function (ed::OneHotOneCold)(
end
end

"""
output_shape(efd::ElementFeatureDescriptor)
Get the output-shape for an ElementFeatureDescriptor object using the logic assoicated with its
Codec.
"""
output_shape(efd::ElementFeatureDescriptor) = output_shape(efd, efd.encoder_decoder)

function output_shape(efd::ElementFeatureDescriptor, ed::OneHotOneCold)
Expand All @@ -102,6 +122,11 @@ function (ed::OneHotOneCold)(efd::ElementFeatureDescriptor, encoded_feature)
ed.decode_f(efd, encoded_feature, ed.nbins, ed.logspaced)
end

"""
default_efd_encode(efd::ElementFeatureDescriptor, a::AbstractAtoms, nbins::Integer, logspaced::Bool)
Default one-hot encoding function for an ElementFeatureDescriptor object.
"""
function default_efd_encode(
efd::ElementFeatureDescriptor,
a::AbstractAtoms,
Expand All @@ -124,7 +149,11 @@ function default_efd_encode(
)
end

"""
default_efd_decode(efd::ElementFeatureDescriptor, encoded_feature, nbins::Integer, logspaced::Bool)
Default one-cold decoding logic for an ElementFeatureDescriptor object.
"""
default_efd_decode(efd::ElementFeatureDescriptor, encoded_feature, nbins, logspaced) =
onecold_decoder(
encoded_feature,
Expand Down
32 changes: 32 additions & 0 deletions test/module_tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using Test
using ChemistryFeaturization.AbstractType: AbstractFeaturization, AbstractFeatureDescriptor

@testset "Modules and Abstract methods" begin
@testset "top-level module" begin
# testing on `nothing` as example of ::Any
@test_throws MethodError encodable_elements(nothing)
@test_throws MethodError decode(nothing, nothing)
end

@testset "featurizations module" begin
struct FakeFeaturization <: AbstractFeaturization end
ff = FakeFeaturization()

F2_atom = AtomGraph(Float32.([0 1; 1 0]), ["F", "F"])

@test_throws MethodError encodable_elements(ff)
@test_throws MethodError featurize!(F2_atom, ff)
@test_throws MethodError decode(ff, nothing)
end

# @testset "atoms module" begin
# TBD cleanest way to test generic decode(::AbstractAtoms) - either another "fake" class, or maybe the `invoke` function
# end

@testset "features module" begin
struct FakeFD <: AbstractFeatureDescriptor end
fd = FakeFD()
@test_throws MethodError encodable_elements(fd)
@test_throws MethodError decode(fd, nothing)
end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using Test
const testdir = dirname(@__FILE__)

tests = [
"module_tests",
"utils/ElementFeatureUtils_tests",
"utils/GraphBuilding_tests",
"atoms/AtomGraph_tests",
Expand Down

0 comments on commit 6123ed5

Please sign in to comment.