diff --git a/Project.toml b/Project.toml index 4cfc01d0..b7a07370 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ChemistryFeaturization" uuid = "6c925690-434a-421d-aea7-51398c5b007a" authors = ["Rachel Kurchin ", "Sean Sun"] -version = "0.3.0" +version = "0.3.1" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" diff --git a/src/ChemistryFeaturization.jl b/src/ChemistryFeaturization.jl index 2212c482..9b231f3f 100644 --- a/src/ChemistryFeaturization.jl +++ b/src/ChemistryFeaturization.jl @@ -2,6 +2,9 @@ module ChemistryFeaturization using SimpleWeightedGraphs +encodable_elements(a::Any) = throw(MethodError(encodable_elements, a)) +decode(a::Any, encoded_features) = throw(MethodError(decode, a)) + include("utils/Utils.jl") export Utils @@ -11,20 +14,20 @@ export AbstractType include("codecs/codecs.jl") export Codec -encodable_elements(a::Any) = throw(MethodError(encodable_elements, a)) -decode(a::Any, encoded_features) = throw(MethodError(decode, a)) - include("features/features.jl") export FeatureDescriptor +using .FeatureDescriptor: ElementFeatureDescriptor export ElementFeatureDescriptor include("atoms/atoms.jl") export Atoms +using .Atoms: AtomGraph export AtomGraph include("featurizations/featurizations.jl") export Featurization -export GraphNodeFeaturization +using .Featurization: GraphNodeFeaturization, featurize! +export GraphNodeFeaturization, featurize! export encodable_elements, decode diff --git a/src/atoms/atomgraph.jl b/src/atoms/atomgraph.jl index 44db0a73..560146b1 100644 --- a/src/atoms/atomgraph.jl +++ b/src/atoms/atomgraph.jl @@ -62,16 +62,18 @@ function AtomGraph( num_atoms = size(graph)[1] @assert length(elements) == num_atoms "Element list length doesn't match graph size!" - # check that features is the right dimensions (# features x # nodes) - expected_feature_length = sum(f.num_bins for f in featurization) - @assert size(features) == (expected_feature_length, num_atoms) "Feature matrix is of wrong dimension! It should be of size (# features, # nodes)" + # TO CONSIDER: add `validate_features` function or something like that for when this constructor is used + # that we can then dispatch on different featurization types. Alternatively, remove this constructor? + + # check that features is the right dimensions (# features x # nodes) -> commented out because doesn't work with generic fzn + # expected_feature_length = sum(f.num_bins for f in featurization) + # @assert size(features) == (expected_feature_length, num_atoms) "Feature matrix is of wrong dimension! It should be of size (# features, # nodes)" # if all these are good, calculate laplacian and build the thing laplacian = normalized_laplacian(graph) AtomGraph(graph, elements, laplacian, features, featurization, id) end - # one without features or featurization initialized yet function AtomGraph( graph::SimpleWeightedGraph{A,B}, diff --git a/src/featurizations/graphnodefeaturization.jl b/src/featurizations/graphnodefeaturization.jl index d61c0b04..d94d3b28 100644 --- a/src/featurizations/graphnodefeaturization.jl +++ b/src/featurizations/graphnodefeaturization.jl @@ -34,7 +34,7 @@ end function GraphNodeFeaturization( feature_names::Vector{String}, lookup_table::Union{DataFrame,Nothing} = nothing; - nbins::Union{Vector{Integer},Integer,Nothing} = nothing, + nbins::Union{Vector{<:Integer},Integer,Nothing} = nothing, logspaced::Union{Vector{Bool},Bool,Nothing} = nothing, categorical::Union{Vector{Bool},Bool,Nothing} = nothing, ) diff --git a/test/featurizations/GraphNodeFeaturization_tests.jl b/test/featurizations/GraphNodeFeaturization_tests.jl index 6d23036d..d6fc2321 100644 --- a/test/featurizations/GraphNodeFeaturization_tests.jl +++ b/test/featurizations/GraphNodeFeaturization_tests.jl @@ -28,9 +28,14 @@ using ChemistryFeaturization.Featurization featurize!(F2, fzn3) decoded_matrix = decode(fzn3, F2.encoded_features) decoded_ag = decode(F2) + enc1 = F2.encoded_features @test all( map(d -> d[1]["Block"] == d[2]["Block"] == "p", [decoded_matrix, decoded_ag]), ) + fzn4 = GraphNodeFeaturization(fnames, nbins = [2, 4, 2]) + F2 = AtomGraph(Float32.([0 1; 1 0]), ["F", "F"]) + featurize!(F2, fzn4) + @test all(F2.encoded_features .== enc1) end # encodable_elements diff --git a/test/utils/ElementFeatureUtils_tests.jl b/test/utils/ElementFeatureUtils_tests.jl index 5e91764c..98f210c7 100644 --- a/test/utils/ElementFeatureUtils_tests.jl +++ b/test/utils/ElementFeatureUtils_tests.jl @@ -11,12 +11,6 @@ using ChemistryFeaturization.Utils.ElementFeatureUtils @test fea_minmax("Group") == [1, 18] @test fea_minmax("MeaningOfLife", df) == [-1, 42] - # default_log - @test default_log("Block") == false # not numbers - @test default_log("MeaningOfLife", df) == false # values span 0 - @test default_log("Valence") == false # extremal value 0 - @test default_log("Atomic mass") == true - @test default_log("Atomic mass", threshold = 3) == false # default_log @test default_log("Block") == false # not numbers @test default_log("MeaningOfLife", df) == false # values span 0