diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index ea7b806..484d775 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -15,10 +15,7 @@ jobs:
       matrix:
         julia-version: ['1']
         julia-arch: [x64, x86]
-        os: [ubuntu-latest, macOS-latest]
-        exclude:
-          - os: macOS-latest
-            julia-arch: x86
+        os: [ubuntu-latest]
       fail-fast: false
     steps:
       - uses: actions/checkout@v2
diff --git a/.github/workflows/format_check.yml b/.github/workflows/format_check.yml
new file mode 100644
index 0000000..7438212
--- /dev/null
+++ b/.github/workflows/format_check.yml
@@ -0,0 +1,41 @@
+name: format-check
+
+on:
+  push:
+    branches:
+      - 'main'
+    tags: '*'
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        julia-version: [1.6.0]
+        julia-arch: [x86]
+        os: [ubuntu-latest]
+    steps:
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.julia-version }}
+
+      - uses: actions/checkout@v2
+      - name: Install JuliaFormatter and format
+        # This will use the latest version by default but you can set the version like so:
+        #
+        # julia  -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter", version="0.13.0"))'
+        run: |
+          julia  -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter"))'
+          julia  -e 'using JuliaFormatter; format(".", verbose=true)'
+      - name: Format check
+        run: |
+          julia -e '
+          out = Cmd(`git diff --name-only`) |> read |> String
+          if out == ""
+              exit(0)
+          else
+              @error "Some files have not been formatted !!!"
+              write(stdout, out)
+              exit(1)
+          end'
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..58e041c
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,51 @@
+# Contribution Guide
+
+We very happily accept contributions from the community to make our packages better! For the smoothest experience, please read this document and follow the guidelines and we can hopefully get your PR merged in a jiffy! We've tried to keep the guidelines lightweight, reasonable, and not too onerous. :)
+
+(Don't know what a PR is? Know how to write Julia code but never contributed to a package before? Refer to the [Getting Started](#getting-started) section further on down the page.)
+
+Thanks to the [OpenMM contribution guide](https://github.com/openmm/openmm/blob/master/CONTRIBUTING.md) and the [SciML ColPrac document](http://colprac.sciml.ai), which were the main inspirations/starting points for the suggestions contained herein.
+
+## Guidelines
+
+* Commit frequently and make the commit messages detailed! Ideally specifying directory/file as well as nature of changes. A sample commit message format could be:
+```
+directory of file affected: changes introduced
+
+...commit message explicitly stating the changes made. this should be concise, and crisp enough, that the maintainers must be able to understand the changes this commit introduces without having to go through the diff-logs... 
+
+Signed-off/Co-authored with/Suggested-by messages for credit where it's due
+```
+* In general, unless a change is very minor (e.g. fixing a typo), open an issue before opening a pull request that fixes that issue. This allows open discussion, collaboration, and prioritization of changes to the code. Please also label the issue appropriately. We use a set of labels that is slightly expanded from the [GitHub standard set](https://docs.github.com/en/github/managing-your-work-on-github/managing-labels#about-default-labels):
+
+| Label              | Description                                                                                                                    |
+| -------------      | -------------                                                                                                                  |
+| `breaking`         | Indicates a pull request that introduces breaking changes                                                                      |
+| `bug`              | Indicates an unexpected problem or unintended behavior                                                                         |
+| `documentation`    | Indicates a need for improvements or additions to documentation                                                                |
+| `duplicate`        | Indicates similar issues or pull requests                                                                                      |
+| `enhancement`      | Indicates new feature requests                                                                                                 |
+| `good first issue` | Indicates a good issue for first-time contributors                                                                             |
+| `help wanted`      | Indicates that a maintainer wants help on an issue or pull request                                                             |
+| `invalid`          | Indicates that an issue or pull request is no longer relevant                                                                  |
+| `longterm`         | Indicates a feature that we intend to implement, but is not high-priority right now (generally only to be used by maintainers) |
+| `performance`      | Indicates an issue/PR to improve code performance.                                                                             |
+| `priority`         | Indicates an issue that is high-priority (generally only to be used by maintainers)                                            |
+| `question`         | Indicates that an issue or pull request needs more information                                                                 |
+| `wontfix`          | Indicates that work won't continue on an issue or pull request                                                                 |
+
+* If you are adding/changing features, make sure to add/update tests (DO NOT comment out tests!) and documentation accordingly! Ideally, if relevant, include example usage.
+* Keep things modular! If you are fixing/adding multiple things, do so via separate issues/PR's to streamline review and merging.
+* It is recommended that you set up [JuliaFormatter](https://domluna.github.io/JuliaFormatter.jl/dev/) (also see [VSCode extension](https://marketplace.visualstudio.com/items?itemName=singularitti.vscode-julia-formatter)). A GitHub action will check that code adheres to the style guide.
+
+## Getting Started
+
+We welcome contributions of well-written code from folks with all levels of software engineering experience! There are a TON of great guides out there for all aspects of collaborative development, so rather than reinventing the wheel, here are a few starting points for common things folks need/want to learn:
+
+* [How to Contribute to Open Source](https://opensource.guide/how-to-contribute/): An awesome high-level introduction covering philosophy, communication, and some best practices, and including links to other more detailed resources.
+
+* [`first-contributions`](https://github.com/firstcontributions/first-contributions): A GitHub project designed to walk beginners through making a first contribution!
+
+* [Resources to learn Git](https://try.github.io) compiled by GitHub.
+
+* Is something missing here? Open a PR to add it! :slightly_smiling_face:
diff --git a/README.md b/README.md
index 5d1cb3f..ce240dd 100755
--- a/README.md
+++ b/README.md
@@ -8,14 +8,14 @@ Documentation is in progress [over here](https://aced-differentiate.github.io/At
 
 ## Getting Started
 
-1. Clone this package to wherever you want to play.
+1. To install the latest tagged version, in your Julia REPL, do `]add AtomicGraphNets`. However, you can also play with the latest version on the `main` branch by skipping to step 2 and then doing `]add /path/to/repo` where you replace the dummy path with the location of your clone.
 
-2. Go and try out the example in examples/example1/ – it has its own README file with detailed instructions.
+2. Clone this package to wherever you want to play.
 
-* more network architectures (see issues for some ideas)
+3. Go and try out the example in examples/example1/ – it has its own README file with detailed instructions.
 
-## Contact
-Please feel free to fork and play, and reach out here on GitHub or to rkurchin [at] cmu [dot] edu with suggestions, etc.!
+## Contributing
+We welcome community contributions! Please refer to [contribution guide](CONTRIBUTING.md) for suggestions on how to go about things.
 
 ## Acknowledgements
 Many thanks to [Dhairya Gandhi](https://github.com/DhairyaLGandhi) for helping out with some adjoints to actually make these layers trainable! :D
diff --git a/docs/make.jl b/docs/make.jl
index 49f7d5a..91426b3 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -11,32 +11,31 @@ if haskey(ENV, "DOCSARGS")
 end
 
 makedocs(
-	sitename = "AtomicGraphNets.jl",
-	modules = [AtomicGraphNets],
-	pages = Any[
-	    "Home"                     => "index.md",
-	    "Basic Graph Theory"       => "graph_theory.md",
-	    "GCNNs"                    => "gcnns.md",
-	    "Comparison with cgcnn.py" => "comparison.md",
-	    "Examples"                 => Any[
-		"Example 1" => "examples/example_1.md",
-		"Example 2" => "examples/example_2.md"],
-            "Functions"                => Any[
-                "Layers"    => "functions/layers.md",
-                "Models"    => "functions/models.md"],
-            "Changelog"                => "changelog.md"
-	],
-	format = Documenter.HTML(
-		# Use clean URLs, unless built as a "local" build
-		prettyurls = !("local" in ARGS),
-		canonical = "https://aced-differentiate.github.io/AtomicGraphNets.jl/stable/",	
-	),
-	linkcheck = "linkcheck" in ARGS,
+    sitename = "AtomicGraphNets.jl",
+    modules = [AtomicGraphNets],
+    pages = Any[
+        "Home"=>"index.md",
+        "Basic Graph Theory"=>"graph_theory.md",
+        "GCNNs"=>"gcnns.md",
+        "Comparison with cgcnn.py"=>"comparison.md",
+        "Examples"=>Any[
+            "Example 1"=>"examples/example_1.md",
+            "Example 2"=>"examples/example_2.md",
+        ],
+        "Functions"=>Any["Layers"=>"functions/layers.md", "Models"=>"functions/models.md"],
+        "Changelog"=>"changelog.md",
+    ],
+    format = Documenter.HTML(
+        # Use clean URLs, unless built as a "local" build
+        prettyurls = !("local" in ARGS),
+        canonical = "https://aced-differentiate.github.io/AtomicGraphNets.jl/stable/",
+    ),
+    linkcheck = "linkcheck" in ARGS,
 )
 deploydocs(
-	repo = "github.com/aced-differentiate/AtomicGraphNets.jl.git",
-	target = "build",
-        branch = "gh-pages",
-        devbranch = "main",
-	push_preview = true
+    repo = "github.com/aced-differentiate/AtomicGraphNets.jl.git",
+    target = "build",
+    branch = "gh-pages",
+    devbranch = "main",
+    push_preview = true,
 )
diff --git a/examples/1_formation_energy/formation_energy.jl b/examples/1_formation_energy/formation_energy.jl
index 4885567..8a3956e 100755
--- a/examples/1_formation_energy/formation_energy.jl
+++ b/examples/1_formation_energy/formation_energy.jl
@@ -30,7 +30,8 @@ num_bins = [18, 9, 4, 16, 10, 10]
 num_features = sum(num_bins) # we'll use this later
 logspaced = [false, false, false, true, true, false]
 # returns actual vectors (in a dict with keys of elements) plus Vector of AtomFeat objects describing featurization metadata
-atom_feature_vecs, featurization = make_feature_vectors(features, nbins=num_bins, logspaced=logspaced)
+atom_feature_vecs, featurization =
+    make_feature_vectors(features, nbins = num_bins, logspaced = logspaced)
 
 # model hyperparameters – keeping it pretty simple for now
 num_conv = 3 # how many convolutional layers?
@@ -39,12 +40,12 @@ num_hidden_layers = 1 # how many fully-connected layers after convolution and po
 opt = ADAM(0.001) # optimizer
 
 # dataset...first, read in outputs
-info = CSV.read(string(datadir,prop,".csv"), DataFrame)
+info = CSV.read(string(datadir, prop, ".csv"), DataFrame)
 y = Array(Float32.(info[!, Symbol(prop)]))
 
 # shuffle data and pick out subset
-indices = shuffle(1:size(info,1))[1:num_pts]
-info = info[indices,:]
+indices = shuffle(1:size(info, 1))[1:num_pts]
+info = info[indices, :]
 output = y[indices]
 
 # next, make graphs and build input features (matrices of dimension (# features, # nodes))
@@ -70,13 +71,25 @@ train_data = zip(train_input, train_output)
 
 # build the model
 println("Building the network...")
-model = build_CGCNN(num_features, num_conv=num_conv, atom_conv_feature_length=crys_fea_len, pooled_feature_length=(Int(crys_fea_len/2)), num_hidden_layers=1)
+model = build_CGCNN(
+    num_features,
+    num_conv = num_conv,
+    atom_conv_feature_length = crys_fea_len,
+    pooled_feature_length = (Int(crys_fea_len / 2)),
+    num_hidden_layers = 1,
+)
 
 # define loss function and a callback to monitor progress
-loss(x,y) = Flux.Losses.mse(model(x), y)
+loss(x, y) = Flux.Losses.mse(model(x), y)
 evalcb() = @show(mean(loss.(test_input, test_output)))
 evalcb()
 
 # train
 println("Training!")
-@epochs num_epochs Flux.train!(loss, params(model), train_data, opt, cb = Flux.throttle(evalcb, 5))
+@epochs num_epochs Flux.train!(
+    loss,
+    params(model),
+    train_data,
+    opt,
+    cb = Flux.throttle(evalcb, 5),
+)
diff --git a/examples/2_qm9/qm9.jl b/examples/2_qm9/qm9.jl
index 79c547b..185d961 100644
--- a/examples/2_qm9/qm9.jl
+++ b/examples/2_qm9/qm9.jl
@@ -34,7 +34,8 @@ num_bins = [18, 9, 4, 16, 10, 10]
 num_features = sum(num_bins) # we'll use this later
 logspaced = [false, false, false, true, true, false]
 # returns actual vectors (in a dict with keys of elements) plus Vector of AtomFeat objects describing featurization metadata
-atom_feature_vecs, featurization = make_feature_vectors(features, nbins=num_bins, logspaced=logspaced)
+atom_feature_vecs, featurization =
+    make_feature_vectors(features, nbins = num_bins, logspaced = logspaced)
 
 # model hyperparameters – keeping it pretty simple for now
 num_conv = 5 # how many convolutional layers?
@@ -45,8 +46,8 @@ num_hidden_layers = 2 # how many fully-connected layers after convolution and po
 opt = ADAM(0.003) # optimizer
 
 # shuffle data and pick out subset
-indices = shuffle(1:size(info,1))[1:num_pts]
-info = info[indices,:]
+indices = shuffle(1:size(info, 1))[1:num_pts]
+info = info[indices, :]
 output = y[indices]
 
 # next, make graphs and build input features (matrices of dimension (# features, # nodes))
@@ -74,14 +75,27 @@ train_data = zip(train_input, train_output)
 
 # build the model
 println("Building the network...")
-model = Xie_model(num_features, num_conv=num_conv, atom_conv_feature_length=atom_fea_len, pool_type=pool_type, pooled_feature_length=crys_fea_len, num_hidden_layers=num_hidden_layers)
+model = Xie_model(
+    num_features,
+    num_conv = num_conv,
+    atom_conv_feature_length = atom_fea_len,
+    pool_type = pool_type,
+    pooled_feature_length = crys_fea_len,
+    num_hidden_layers = num_hidden_layers,
+)
 
 # define loss function
-loss(x,y) = Flux.Losses.mse(model(x), y)
+loss(x, y) = Flux.Losses.mse(model(x), y)
 # and a callback to see training progress
 evalcb() = @show(mean(loss.(test_input, test_output)))
 evalcb()
 
 # train
 println("Training!")
-@epochs num_epochs Flux.train!(loss, params(model), train_data, opt, cb = Flux.throttle(evalcb, 10))
+@epochs num_epochs Flux.train!(
+    loss,
+    params(model),
+    train_data,
+    opt,
+    cb = Flux.throttle(evalcb, 10),
+)
diff --git a/examples/4_sgcnn/sgcnn.jl b/examples/4_sgcnn/sgcnn.jl
index c9c4da0..9a65b90 100644
--- a/examples/4_sgcnn/sgcnn.jl
+++ b/examples/4_sgcnn/sgcnn.jl
@@ -10,18 +10,16 @@ using Random
 using ChemistryFeaturization
 using AtomicGraphNets
 using Serialization
-<<<<<<< HEAD
 
 #cd(@__DIR__)
 
 graph_dir = "../../../data/OCP/traj_test_graphs/"
 bulk_graph_dir = "../../../data/OCP/traj_test_bulk_graphs/"
 
-bulk_graphs_files = readdir(bulk_graph_dir, join=true)
-surf_graphs_files = readdir(graph_dir, join=true)
+bulk_graphs_files = readdir(bulk_graph_dir, join = true)
+surf_graphs_files = readdir(graph_dir, join = true)
 
 # read in the graphs
-=======
 using CSV, DataFrames
 using Statistics
 
@@ -44,17 +42,17 @@ info = CSV.File(csv_path) |> DataFrame
 y = Array(Float32.(info[!, Symbol("energy")]))
 
 # and the graphs
-bulk_graphs_files = readdir(bulk_graph_dir, join=true)
-surf_graphs_files = readdir(graph_dir, join=true)
+bulk_graphs_files = readdir(bulk_graph_dir, join = true)
+surf_graphs_files = readdir(graph_dir, join = true)
 
 bulk_graphs = read_graphs_batch(bulk_graph_dir)
 surf_graphs = read_graphs_batch(graph_dir)
 
 # pick out the indices for which we have bulk graphs constructed
 keep_inds = []
-for i in 1:length(surf_graphs_files)
+for i = 1:length(surf_graphs_files)
     fn = splitpath(surf_graphs_files[i])[end]
-    if isfile(joinpath(bulk_graph_dir, fn)) && fn[end-3:end]==".jls"
+    if isfile(joinpath(bulk_graph_dir, fn)) && fn[end-3:end] == ".jls"
         append!(keep_inds, [i])
     end
 end
@@ -64,12 +62,12 @@ y = y[keep_inds]
 
 keep_inds = []
 # now cut out any with NaN laplacians in either set
-for i in 1:length(bulk_graphs)
-    
+for i = 1:length(bulk_graphs)
+
 end
 
 # shuffle data and pick out subset
-indices = shuffle(1:size(info,1))[1:num_pts]
+indices = shuffle(1:size(info, 1))[1:num_pts]
 info = info[indices, :]
 output = y[indices]
 bulk_graphs = bulk_graphs[indices]
@@ -80,7 +78,8 @@ features = Symbol.(["Group", "Row", "Block", "Atomic mass", "Atomic radius", "X"
 num_bins = [18, 9, 4, 16, 10, 10]
 num_features = sum(num_bins) # we'll use this later
 logspaced = [false, false, false, true, true, false]
-atom_feature_vecs, featurization = make_feature_vectors(features, nbins=num_bins, logspaced=logspaced)
+atom_feature_vecs, featurization =
+    make_feature_vectors(features, nbins = num_bins, logspaced = logspaced)
 
 # add the features to the graphs
 for ag in surf_graphs
@@ -91,7 +90,7 @@ for ag in bulk_graphs
 end
 
 # now "tuple them up"
-@assert length(surf_graphs)==length(bulk_graphs) "List lengths don't match up, something has gone wrong! :("
+@assert length(surf_graphs) == length(bulk_graphs) "List lengths don't match up, something has gone wrong! :("
 
 inputs = zip(bulk_graphs, surf_graphs)
 
@@ -107,12 +106,17 @@ train_data = zip(train_input, train_output)
 
 # define model, loss, etc.
 model = build_SGCNN(num_features)
-loss(x,y) = Flux.mse(model(x), y)
+loss(x, y) = Flux.mse(model(x), y)
 evalcb() = @show(mean(loss.(test_input, test_output)))
 evalcb()
 
 # train
 println("Training!")
 #Flux.train!(loss, params(model), train_data, opt)
-@epochs num_epochs Flux.train!(loss, params(model), train_data, opt, cb = Flux.throttle(evalcb, 5))
->>>>>>> 14e39047ce8abd34f8a1a5692169371ae22397ae
+@epochs num_epochs Flux.train!(
+    loss,
+    params(model),
+    train_data,
+    opt,
+    cb = Flux.throttle(evalcb, 5),
+)
diff --git a/src/layers.jl b/src/layers.jl
index 18a4ef6..5a67abc 100755
--- a/src/layers.jl
+++ b/src/layers.jl
@@ -10,9 +10,9 @@ using ChemistryFeaturization
 #using DifferentialEquations, DiffEqSensitivity
 
 # regularized norm fcn, cut out the dims part
-function reg_norm(x::AbstractArray, ϵ=sqrt(eps(Float32)))
+function reg_norm(x::AbstractArray, ϵ = sqrt(eps(Float32)))
     μ′ = mean(x)
-    σ′ = std(x, mean = μ′, corrected=false)
+    σ′ = std(x, mean = μ′, corrected = false)
     return Float32.((x .- μ′) ./ (σ′ + ϵ))
 end
 
@@ -42,7 +42,13 @@ struct AGNConv{T,F}
     σ::F
 end
 
-function AGNConv(ch::Pair{<:Integer,<:Integer}, σ=softplus; initW=glorot_uniform, initb=zeros, T::DataType=Float32)
+function AGNConv(
+    ch::Pair{<:Integer,<:Integer},
+    σ = softplus;
+    initW = glorot_uniform,
+    initb = zeros,
+    T::DataType = Float32,
+)
     selfweight = T.(initW(ch[2], ch[1]))
     convweight = T.(initW(ch[2], ch[1]))
     b = T.(initb(ch[2], 1))
@@ -60,24 +66,33 @@ end
 function (l::AGNConv)(ag::AtomGraph)
     lapl = ag.lapl
     X = ag.features
-    out_mat = Float32.(reg_norm(l.σ.(l.convweight * X * lapl + l.selfweight * X + reduce(hcat,l.bias for i in 1:size(X, 2)))))
+    out_mat =
+        Float32.(
+            reg_norm(
+                l.σ.(
+                    l.convweight * X * lapl +
+                    l.selfweight * X +
+                    reduce(hcat, l.bias for i = 1:size(X, 2)),
+                ),
+            ),
+        )
     AtomGraph(ag.graph, ag.elements, ag.lapl, out_mat, AtomFeat[], ag.id)
 end
 
 # fixes from Dhairya so backprop works
 @adjoint function SparseMatrixCSC{T,N}(arr) where {T,N}
-  SparseMatrixCSC{T,N}(arr), Δ -> (collect(Δ),)
+    SparseMatrixCSC{T,N}(arr), Δ -> (collect(Δ),)
 end
 @nograd LinearAlgebra.diagm
 
 @adjoint function Broadcast.broadcasted(Float32, a::SparseMatrixCSC{T,N}) where {T,N}
-  Float32.(a), Δ -> (nothing, T.(Δ), )
+    Float32.(a), Δ -> (nothing, T.(Δ))
 end
 @nograd issymmetric
 
 @adjoint function softplus(x::Real)
-  y = softplus(x)
-  return y, Δ -> (Δ * σ(x),)
+    y = softplus(x)
+    return y, Δ -> (Δ * σ(x),)
 end
 
 """
@@ -90,61 +105,79 @@ struct AGNPool
     dim::Int64
     str::Int64
     pad::Int64
-    function AGNPool(pool_type::String, in_num_features::Int64, out_num_features::Int64, pool_width_frac::Float64)
+    function AGNPool(
+        pool_type::String,
+        in_num_features::Int64,
+        out_num_features::Int64,
+        pool_width_frac::Float64,
+    )
         @assert in_num_features >= out_num_features "I don't think you actually want to pool to a LONGER vector, do you?"
-        dim, str, pad = compute_pool_params(in_num_features, out_num_features, Float32(pool_width_frac))
-        if pool_type=="max"
+        dim, str, pad =
+            compute_pool_params(in_num_features, out_num_features, Float32(pool_width_frac))
+        if pool_type == "max"
             pool_func = Flux.maxpool
-        elseif pool_type=="mean"
+        elseif pool_type == "mean"
             pool_func = Flux.meanpool
         end
         new(pool_func, dim, str, pad)
     end
 end
 
-pool_out_features(num_f::Int64, dim::Int64, stride::Int64, pad::Int64) = Int64(floor((num_f + 2 * pad - dim) / stride + 1))
+pool_out_features(num_f::Int64, dim::Int64, stride::Int64, pad::Int64) =
+    Int64(floor((num_f + 2 * pad - dim) / stride + 1))
 
 """
 Helper function to work out dim, pad, and stride for desired number of output features, given a fixed pooling width.
 """
-function compute_pool_params(num_f_in::Int64, num_f_out::Int64, dim_frac::Float32; start_dim=Int64(round(dim_frac*num_f_in)), start_str=Int64(floor(num_f_in/num_f_out)))
+function compute_pool_params(
+    num_f_in::Int64,
+    num_f_out::Int64,
+    dim_frac::Float32;
+    start_dim = Int64(round(dim_frac * num_f_in)),
+    start_str = Int64(floor(num_f_in / num_f_out)),
+)
     # take starting guesses
     dim = start_dim
     str = start_str
-    p_numer = str*(num_f_out-1) - (num_f_in - dim)
+    p_numer = str * (num_f_out - 1) - (num_f_in - dim)
     if p_numer < 0
         p_numer == -1 ? dim = dim + 1 : str = str + 1
     end
-    p_numer = str*(num_f_out-1) - (num_f_in - dim)
+    p_numer = str * (num_f_out - 1) - (num_f_in - dim)
     if p_numer < 0
         error("problem, negative p!")
     end
     if p_numer % 2 == 0
-        pad = Int64(p_numer/2)
+        pad = Int64(p_numer / 2)
     else
         dim = dim - 1
-        pad = Int64((str*(num_f_out-1) - (num_f_in - dim))/2)
+        pad = Int64((str * (num_f_out - 1) - (num_f_in - dim)) / 2)
     end
     out_fea_len = pool_out_features(num_f_in, dim, str, pad)
-    if !(out_fea_len==num_f_out)
+    if !(out_fea_len == num_f_out)
         print("problem, output feature wrong length!")
     end
     # check if pad gets comparable to width...
-    if pad >= 0.8*dim
+    if pad >= 0.8 * dim
         @warn "specified pooling width was hard to satisfy without nonsensically large padding relative to width, had to increase from desired width"
-        dim, str, pad  = compute_pool_params(num_f_in, num_f_out, dim_frac, start_dim=Int64(round(1.2*start_dim)))
+        dim, str, pad = compute_pool_params(
+            num_f_in,
+            num_f_out,
+            dim_frac,
+            start_dim = Int64(round(1.2 * start_dim)),
+        )
     end
     dim, str, pad
 end
 
 function (m::AGNPool)(ag::AtomGraph)
-      # compute what pad and stride need to be...
-      x = ag.features
-      x = reshape(x, (size(x)..., 1, 1))
-      # do mean pooling across feature direction, average across all nodes in graph
-      # TODO: decide if this approach makes sense or if there's a smarter way
-      pdims = PoolDims(x, (m.dim,1); padding=(m.pad,0), stride=(m.str,1))
-      mean(m.pool_func(x, pdims), dims=2)[:,:,1,1]
+    # compute what pad and stride need to be...
+    x = ag.features
+    x = reshape(x, (size(x)..., 1, 1))
+    # do mean pooling across feature direction, average across all nodes in graph
+    # TODO: decide if this approach makes sense or if there's a smarter way
+    pdims = PoolDims(x, (m.dim, 1); padding = (m.pad, 0), stride = (m.str, 1))
+    mean(m.pool_func(x, pdims), dims = 2)[:, :, 1, 1]
 end
 
 # following commented out for now because it only runs suuuuper slowly but slows down precompilation a lot
diff --git a/src/models.jl b/src/models.jl
index 559fab5..14c07e4 100755
--- a/src/models.jl
+++ b/src/models.jl
@@ -1,5 +1,5 @@
 using Flux
-using Flux:glorot_uniform
+using Flux: glorot_uniform
 using ChemistryFeaturization
 using AtomicGraphNets
 
@@ -24,9 +24,45 @@ Network has convolution layers, then pooling to some fixed length, followed by D
 - `output_length::Integer`: length of output vector
 - `initW::F`: function to use to initialize weights in trainable layers
 """
-function build_CGCNN(input_feature_length; num_conv=2, conv_activation=softplus, atom_conv_feature_length=80, pool_type="mean", pool_width=0.1, pooled_feature_length=40, num_hidden_layers=1, hidden_layer_activation=softplus, output_layer_activation=identity, output_length=1, initW=glorot_uniform)
+function build_CGCNN(
+    input_feature_length;
+    num_conv = 2,
+    conv_activation = softplus,
+    atom_conv_feature_length = 80,
+    pool_type = "mean",
+    pool_width = 0.1,
+    pooled_feature_length = 40,
+    num_hidden_layers = 1,
+    hidden_layer_activation = softplus,
+    output_layer_activation = identity,
+    output_length = 1,
+    initW = glorot_uniform,
+)
     @assert atom_conv_feature_length >= pooled_feature_length "Feature length after pooling must be <= feature length before pooling!"
-    model = Chain(AGNConv(input_feature_length=>atom_conv_feature_length, conv_activation, initW=initW), [AGNConv(atom_conv_feature_length=>atom_conv_feature_length, conv_activation, initW=initW) for i in 1:num_conv-1]..., AGNPool(pool_type, atom_conv_feature_length, pooled_feature_length, pool_width), [Dense(pooled_feature_length, pooled_feature_length, hidden_layer_activation, initW=initW) for i in 1:num_hidden_layers-1]..., Dense(pooled_feature_length, output_length, output_layer_activation, initW=initW))
+    model = Chain(
+        AGNConv(
+            input_feature_length => atom_conv_feature_length,
+            conv_activation,
+            initW = initW,
+        ),
+        [
+            AGNConv(
+                atom_conv_feature_length => atom_conv_feature_length,
+                conv_activation,
+                initW = initW,
+            ) for i = 1:num_conv-1
+        ]...,
+        AGNPool(pool_type, atom_conv_feature_length, pooled_feature_length, pool_width),
+        [
+            Dense(
+                pooled_feature_length,
+                pooled_feature_length,
+                hidden_layer_activation,
+                initW = initW,
+            ) for i = 1:num_hidden_layers-1
+        ]...,
+        Dense(pooled_feature_length, output_length, output_layer_activation, initW = initW),
+    )
 end
 
 
@@ -46,7 +82,12 @@ end
 """
 This is a helper function to the main model builder below. Takes in the inputs and models for the two "parallel" CGCNN-like models at the start of the SGCNN architecture and outputs the concatenated final result.
 """
-function slab_graph_layer(bulk_graph::AtomGraph, bulk_model, surface_graph::AtomGraph, surface_model)
+function slab_graph_layer(
+    bulk_graph::AtomGraph,
+    bulk_model,
+    surface_graph::AtomGraph,
+    surface_model,
+)
     bulk_output = bulk_model(bulk_graph)
     surface_output = surface_model(surface_graph)
     vcat(bulk_output, surface_output)
@@ -62,11 +103,69 @@ TODO: change this to use the new Flux.Parallel construct, once v0.12 is released
 # Arguments:
 Same as [`build_CGCNN`](@ref) except for additional parameter of `hidden_layer_width`
 """
-function build_SGCNN(input_feature_length::Integer; num_conv=2, conv_activation=softplus, atom_conv_feature_length=80, pool_type="mean", pool_width=0.1, pooled_feature_length=40, hidden_layer_width=40, num_hidden_layers=3, hidden_layer_activation=softplus, output_layer_activation=identity, output_length=1, initW=glorot_uniform)
-    bulk_model = Chain(AGNConv(input_feature_length=>atom_conv_feature_length, conv_activation, initW=initW), [AGNConv(atom_conv_feature_length=>atom_conv_feature_length, conv_activation, initW=initW) for i in 1:num_conv-1]..., AGNPool(pool_type, atom_conv_feature_length, pooled_feature_length, pool_width))
-    surface_model = Chain(AGNConv(input_feature_length=>atom_conv_feature_length, conv_activation, initW=initW), [AGNConv(atom_conv_feature_length=>atom_conv_feature_length, conv_activation, initW=initW) for i in 1:num_conv-1]..., AGNPool(pool_type, atom_conv_feature_length, pooled_feature_length, pool_width))
-    model = Chain(graphs->slab_graph_layer(graphs[1], bulk_model, graphs[2], surface_model), Dense(2*pooled_feature_length, hidden_layer_width, hidden_layer_activation, initW=initW), [Dense(hidden_layer_width, hidden_layer_width, hidden_layer_activation, initW=initW) for i in 1:num_hidden_layers-1]..., Dense(hidden_layer_width, output_length, output_layer_activation, initW=initW))
+function build_SGCNN(
+    input_feature_length::Integer;
+    num_conv = 2,
+    conv_activation = softplus,
+    atom_conv_feature_length = 80,
+    pool_type = "mean",
+    pool_width = 0.1,
+    pooled_feature_length = 40,
+    hidden_layer_width = 40,
+    num_hidden_layers = 3,
+    hidden_layer_activation = softplus,
+    output_layer_activation = identity,
+    output_length = 1,
+    initW = glorot_uniform,
+)
+    bulk_model = Chain(
+        AGNConv(
+            input_feature_length => atom_conv_feature_length,
+            conv_activation,
+            initW = initW,
+        ),
+        [
+            AGNConv(
+                atom_conv_feature_length => atom_conv_feature_length,
+                conv_activation,
+                initW = initW,
+            ) for i = 1:num_conv-1
+        ]...,
+        AGNPool(pool_type, atom_conv_feature_length, pooled_feature_length, pool_width),
+    )
+    surface_model = Chain(
+        AGNConv(
+            input_feature_length => atom_conv_feature_length,
+            conv_activation,
+            initW = initW,
+        ),
+        [
+            AGNConv(
+                atom_conv_feature_length => atom_conv_feature_length,
+                conv_activation,
+                initW = initW,
+            ) for i = 1:num_conv-1
+        ]...,
+        AGNPool(pool_type, atom_conv_feature_length, pooled_feature_length, pool_width),
+    )
+    model = Chain(
+        graphs -> slab_graph_layer(graphs[1], bulk_model, graphs[2], surface_model),
+        Dense(
+            2 * pooled_feature_length,
+            hidden_layer_width,
+            hidden_layer_activation,
+            initW = initW,
+        ),
+        [
+            Dense(
+                hidden_layer_width,
+                hidden_layer_width,
+                hidden_layer_activation,
+                initW = initW,
+            ) for i = 1:num_hidden_layers-1
+        ]...,
+        Dense(hidden_layer_width, output_length, output_layer_activation, initW = initW),
+    )
     # when Flux v0.12 is out, use this instead of line above
     #model = Chain(Join(vcat, surface_model, bulk_model), Dense(2*pooled_feature_length, hidden_layer_width, hidden_layer_activation, initW=initW), [Dense(hidden_layer_width, hidden_layer_width, hidden_layer_activation, initW=initW) for i in 1:num_hidden_layers-1]..., Dense(hidden_layer_width, output_length, output_layer_activation, initW=initW))
 end
-
diff --git a/test/layer_tests.jl b/test/layer_tests.jl
index fa0f492..d3a7449 100755
--- a/test/layer_tests.jl
+++ b/test/layer_tests.jl
@@ -8,20 +8,30 @@ using .layers: AGNConv, AGNPool
 @testset "AGNConv" begin
     # create simple line graph, populate it with feature of all ones
     adjmat = Float32.([0 1 0; 1 0 1; 0 1 0])
-    dummyfzn = [AtomFeat(:feat, [0]) for i in 1:4]
-    ag = AtomGraph(SimpleWeightedGraph{Int32, Float32}(adjmat), ["C", "C", "C"], ones(Float32, 4,3), dummyfzn)
+    dummyfzn = [AtomFeat(:feat, [0]) for i = 1:4]
+    ag = AtomGraph(
+        SimpleWeightedGraph{Int32,Float32}(adjmat),
+        ["C", "C", "C"],
+        ones(Float32, 4, 3),
+        dummyfzn,
+    )
     # create a conv layer, initialize weights with ones
-    l = AGNConv(4=>4, initW=ones)
+    l = AGNConv(4 => 4, initW = ones)
 
     # test output looks as expected
     output_fea = l(ag).features
-    @test output_fea[:,1]==output_fea[:,3]
-    @test isapprox(output_fea[:,1].+output_fea[:,3], .-output_fea[:,2])
+    @test output_fea[:, 1] == output_fea[:, 3]
+    @test isapprox(output_fea[:, 1] .+ output_fea[:, 3], .-output_fea[:, 2])
 
     # and now for a loop
     adjmat = Float32.([0 1 1; 1 0 1; 1 1 0])
-    ag = AtomGraph(SimpleWeightedGraph{Int32, Float32}(adjmat), ["C", "C", "C"], ones(Float32, 4,3), dummyfzn)
-    l = AGNConv(4=>4, initW=ones)
+    ag = AtomGraph(
+        SimpleWeightedGraph{Int32,Float32}(adjmat),
+        ["C", "C", "C"],
+        ones(Float32, 4, 3),
+        dummyfzn,
+    )
+    l = AGNConv(4 => 4, initW = ones)
 
     @test all(isapprox.(l(ag).features, zero(Float32)))
 end
@@ -29,9 +39,14 @@ end
 @testset "pooling" begin
     # keep our little line graph, but give it more features
     adjmat = Float32.([0 1 0; 1 0 1; 0 1 0])
-    feat = ones(Float32, 50,3)
-    dummyfzn = [AtomFeat(:feat, [0]) for i in 1:50]
-    ag = AtomGraph(SimpleWeightedGraph{Int32, Float32}(adjmat), ["C", "C", "C"], feat, dummyfzn)
+    feat = ones(Float32, 50, 3)
+    dummyfzn = [AtomFeat(:feat, [0]) for i = 1:50]
+    ag = AtomGraph(
+        SimpleWeightedGraph{Int32,Float32}(adjmat),
+        ["C", "C", "C"],
+        feat,
+        dummyfzn,
+    )
 
     # make some pooling layers
     meanpool = AGNPool("mean", 50, 10, 0.1)
@@ -42,16 +57,16 @@ end
     @test maxpool(ag) == ones(Float32, 10, 1)
 
     # one level up
-    feat[:,2] .= 0.0
+    feat[:, 2] .= 0.0
     add_features!(ag, feat, dummyfzn)
     # they're still the same here because maxpool takes max along features but averages across nodes right now
-    @test all(isapprox.(meanpool(ag), 2/3))
-    @test all(isapprox.(maxpool(ag), 2/3))
+    @test all(isapprox.(meanpool(ag), 2 / 3))
+    @test all(isapprox.(maxpool(ag), 2 / 3))
 
     # and now make them different
-    feat[1:5:50,2] .= 4.0
+    feat[1:5:50, 2] .= 4.0
     add_features!(ag, feat, dummyfzn)
-    @test all(isapprox.(meanpool(ag), 14/15))
+    @test all(isapprox.(meanpool(ag), 14 / 15))
     @test all(maxpool(ag) .== 2.0)
 
     # make sure it complains when it should
diff --git a/test/model_tests.jl b/test/model_tests.jl
index fa2ab2e..15e6e08 100755
--- a/test/model_tests.jl
+++ b/test/model_tests.jl
@@ -8,25 +8,43 @@ using SimpleWeightedGraphs
     in_fea_len = 40
     conv_fea_len = 20
     pool_fea_len = 10
-    model = build_CGCNN(in_fea_len, atom_conv_feature_length=conv_fea_len, pooled_feature_length=pool_fea_len, num_hidden_layers=2, initW=ones)
+    model = build_CGCNN(
+        in_fea_len,
+        atom_conv_feature_length = conv_fea_len,
+        pooled_feature_length = pool_fea_len,
+        num_hidden_layers = 2,
+        initW = ones,
+    )
 
     # check that everything is the size it should be
-    @test length(model)==5
-    @test size(model[1].convweight) == size(model[1].selfweight) == (conv_fea_len, in_fea_len)
-    @test size(model[2].convweight) == size(model[2].selfweight) == (conv_fea_len, conv_fea_len)
+    @test length(model) == 5
+    @test size(model[1].convweight) ==
+          size(model[1].selfweight) ==
+          (conv_fea_len, in_fea_len)
+    @test size(model[2].convweight) ==
+          size(model[2].selfweight) ==
+          (conv_fea_len, conv_fea_len)
     @test size(model[4].W) == (pool_fea_len, pool_fea_len)
     @test size(model[5].W) == (1, pool_fea_len)
 
     # check that it evaluates to the right things, basically
-    dummyfzn = [AtomFeat(:feat, [0]) for i in 1:40]
-    input = AtomGraph(SimpleWeightedGraph{Int32,Float32}([0 1; 1 0]), ["H", "O"], ones(Float32, 40, 2), dummyfzn)
+    dummyfzn = [AtomFeat(:feat, [0]) for i = 1:40]
+    input = AtomGraph(
+        SimpleWeightedGraph{Int32,Float32}([0 1; 1 0]),
+        ["H", "O"],
+        ones(Float32, 40, 2),
+        dummyfzn,
+    )
     output1 = model[1](input)
     #println(output1.features)
-    int_mat = model[2].convweight * output1.features * output1.lapl + model[2].selfweight * output1.features + hcat([model[2].bias for i in 1:size(output1.features, 2)]...)
+    int_mat =
+        model[2].convweight * output1.features * output1.lapl +
+        model[2].selfweight * output1.features +
+        hcat([model[2].bias for i = 1:size(output1.features, 2)]...)
     #println(int_mat)
     #println(model[2].σ.(int_mat))
     #println(AtomicGraphNets.reg_norm(model[2].σ.(int_mat)))
     # TODO: figure out why the reg_norm step gives different results in REPL than in testing
-    @test all(isapprox.(model[1:2](input).features, zeros(Float32, 20,2), atol=2e-3))
-    @test isapprox(model(input)[1], 6.9, atol=3e-2)
+    @test all(isapprox.(model[1:2](input).features, zeros(Float32, 20, 2), atol = 2e-3))
+    @test isapprox(model(input)[1], 6.9, atol = 3e-2)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 9d75fd4..573fe3a 100755
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -7,4 +7,4 @@ end
 
 @testset "model_tests" begin
     include("model_tests.jl")
-end
\ No newline at end of file
+end