diff --git a/.buildkite/README.md b/.buildkite/README.md
new file mode 100644
index 0000000000000..b3f74f2b23137
--- /dev/null
+++ b/.buildkite/README.md
@@ -0,0 +1,7 @@
+# Buildkite
+
+This directory contains the Buildkite configuration files for Base Julia CI.
+
+The rootfs image definitions are located in the [rootfs-images](https://github.com/JuliaCI/rootfs-images) repository.
+
+The documentation for the Base Julia CI setup is located in the [base-buildkite-docs](https://github.com/JuliaCI/base-buildkite-docs) repository.
diff --git a/.buildkite/coverage-linux64/0_webui.yml b/.buildkite/coverage-linux64/0_webui.yml
deleted file mode 100644
index b0df8ec41de68..0000000000000
--- a/.buildkite/coverage-linux64/0_webui.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-# This file represents what is put into the webUI.
-# It is purely for keeping track of the changes we make to the webUI configuration; modifying this file has no effect.
-# We use the `cryptic` buildkite plugin to provide secrets management, which requires some integration into the WebUI's steps.
-agents:
-  queue: "julia"
-  sandbox.jl: "true"
-
-steps:
-  - label: ":unlock: Unlock secrets, launch pipelines"
-    plugins:
-      - staticfloat/cryptic:
-          # Our signed pipelines must have a `signature` or `signature_file` parameter that
-          # verifies the treehash of the pipeline itself and the inputs listed in `inputs`
-          signed_pipelines:
-            - pipeline: .buildkite/coverage-linux64/pipeline.yml
-              signature: U2FsdGVkX190BiaBGCTT6GNBDe5cHX3ZAP0IXbBfOWo7ys+1IuD5ymf4ImJbRIeE8NQac+iqud+bqCPHjii6DKmqzX+Jz6vax1NY3AxROhYlO5spUClvKr1wdngUCQON
diff --git a/.buildkite/coverage-linux64/README.md b/.buildkite/coverage-linux64/README.md
deleted file mode 100644
index 8cfb31d698225..0000000000000
--- a/.buildkite/coverage-linux64/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Coverage pipeline
-
-We run coverage on a separate pipeline, that uses a scheduled build rather than webhooks.
-The pipeline is here: https://buildkite.com/julialang/julia-coverage-linux64
-
-It contains [its own webui steps](0_webuiy.ml) (listed here in this repository for clarity) and its own [pipeline.yml](pipeline.yml).
diff --git a/.buildkite/coverage-linux64/pipeline.yml b/.buildkite/coverage-linux64/pipeline.yml
deleted file mode 100644
index 218212d24ac2a..0000000000000
--- a/.buildkite/coverage-linux64/pipeline.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-# These steps should only run on `sandbox.jl` machines, not `docker`-isolated ones
-# since we need nestable sandboxing.  The rootfs images being used here are built from
-# the `.buildkite/rootfs_images/llvm-passes.jl` file.
-agents:
-  queue: "julia"
-  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
-  sandbox.jl: "true"
-  os: "linux"
-
-steps:
-  - label: ":unlock: :coverage: Run coverage test"
-    plugins:
-      - staticfloat/cryptic:
-          variables:
-            - CODECOV_TOKEN="U2FsdGVkX19l0fhdBabbuiEdysyEabkJLRHfxm7CNRkuGbnwPV365sxxC7Czs/CVcws0N1oB4pVwALRRMe36oA=="
-            - COVERALLS_TOKEN="U2FsdGVkX19zopI0hMNzzi2UUOvNVFD8Y0iisFnO/ryVxU7Tit8ZEaeN+gxodRx4CosUUh192F1+q3dTMWRIvw=="
-      - JuliaCI/julia#v1:
-          version: 1.6
-      - staticfloat/sandbox#v1:
-          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v1/llvm-passes.tar.gz
-          rootfs_treehash: "f3ed53f159e8f13edfba8b20ebdb8ece73c1b8a8"
-          uid: 1000
-          gid: 1000
-    commands: |
-      echo "--- Build Julia from source"
-      make -j 6
-
-      echo "--- Print Julia version info"
-      ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
-      ./julia -e '@info "" Sys.CPU_THREADS'
-      # this is necessary to make sure that the LibGit2 tests passes
-      git config --global init.defaultBranch master
-
-      echo "--- Run some Julia tests in serial"
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/codegen.jl
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/contextual.jl
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/inference.jl
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/inline.jl
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/interpreter_exec.jl
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/irpasses.jl
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/ssair.jl
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_serial.jl test/compiler/validation.jl
-
-      echo "--- Run Julia tests in parallel with code coverage enabled"
-      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/coverage-linux64/run_tests_parallel.jl
-
-      echo "--- Process and upload coverage information"
-      ./julia .buildkite/coverage-linux64/upload_coverage.jl
-    timeout_in_minutes: 600 # 600 minutes = 10 hours
-
-# We must accept the signed job id secret in order to propagate secrets
-env:
-  BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET: ${BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET?}
diff --git a/.buildkite/coverage-linux64/run_tests_parallel.jl b/.buildkite/coverage-linux64/run_tests_parallel.jl
deleted file mode 100644
index 5386828aa4e14..0000000000000
--- a/.buildkite/coverage-linux64/run_tests_parallel.jl
+++ /dev/null
@@ -1,36 +0,0 @@
-# When running this file, make sure to set the `--code-coverage=all` command-line flag.
-
-# Important note: even if one or more tests fail, we will still exit with status code 0.
-
-# The reason for this is that we always want to upload code coverage, even if some of the
-# tests fail. Therefore, even if the `coverage-linux64` pipeline passes, you should not
-# assume that all of the tests passed. If you want to know if all of the tests are passing,
-# please look at the status of the `tester_linux64` pipeline.
-
-const include_tests = String[]
-
-const exclude_tests = String[]
-
-empty!(Base.DEPOT_PATH)
-push!(Base.DEPOT_PATH, mktempdir(; cleanup = true))
-
-module ChooseTests
-    include(joinpath(dirname(dirname(@__DIR__)), "test", "choosetests.jl"))
-end
-
-const tests = ChooseTests.choosetests() |>
-              first |>
-              x -> setdiff(x, exclude_tests) |>
-              x -> vcat(x, include_tests) |>
-              unique |>
-              sort
-
-const ncores = min(Sys.CPU_THREADS, Threads.nthreads())
-
-@info "" ncores Sys.CPU_THREADS Threads.nthreads()
-
-try
-    Base.runtests(tests; ncores)
-catch ex
-    @error "" exception=(ex, catch_backtrace())
-end
diff --git a/.buildkite/coverage-linux64/run_tests_serial.jl b/.buildkite/coverage-linux64/run_tests_serial.jl
deleted file mode 100644
index 6d7380a55e402..0000000000000
--- a/.buildkite/coverage-linux64/run_tests_serial.jl
+++ /dev/null
@@ -1,16 +0,0 @@
-using Test
-
-const repository_root = dirname(dirname(@__DIR__))
-
-for filename in ARGS
-    path = joinpath(repository_root, filename)
-    @info "Starting $(filename)"
-    try
-        @testset "$(filename)" begin
-            include(path)
-        end
-    catch ex
-        @error "" exception=(ex, catch_backtrace())
-    end
-    @info "Finished $(filename)"
-end
diff --git a/.buildkite/cryptic_repo_keys/.gitignore b/.buildkite/cryptic_repo_keys/.gitignore
new file mode 100644
index 0000000000000..8d18931dbcf7c
--- /dev/null
+++ b/.buildkite/cryptic_repo_keys/.gitignore
@@ -0,0 +1,5 @@
+# Ignore the unencrypted repo_key
+repo_key
+
+# Ignore any agent keys (public or private) we have stored
+agent_key*
diff --git a/.buildkite/llvm_passes.yml b/.buildkite/llvm_passes.yml
deleted file mode 100644
index 7453af82c1e73..0000000000000
--- a/.buildkite/llvm_passes.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-# These steps should only run on `sandbox.jl` machines, not `docker`-isolated ones
-# since we need nestable sandboxing.  The rootfs images being used here are built from
-# the `.buildkite/rootfs_images/llvm-passes.jl` file.
-agents:
-  queue: "julia"
-  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
-  sandbox.jl: "true"
-  os: "linux"
-
-steps:
-  - label: "analyzegc"
-    key: "analyzegc"
-    plugins:
-      - JuliaCI/julia#v1:
-          version: 1.6
-      - staticfloat/sandbox#v1:
-          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v1/llvm-passes.tar.gz
-          rootfs_treehash: "f3ed53f159e8f13edfba8b20ebdb8ece73c1b8a8"
-          workspaces:
-            # Include `/cache/repos` so that our `git` version introspection works.
-            - "/cache/repos:/cache/repos"
-    commands: |
-      echo "--- Install in-tree LLVM dependencies"
-      make -j$${JULIA_NUM_CORES} -C deps install-llvm install-clang install-llvm-tools install-libuv install-utf8proc install-unwind
-      echo "+++ run clangsa/analyzegc"
-      make -j$${JULIA_NUM_CORES} -C test/clangsa
-      make -j$${JULIA_NUM_CORES} -C src analyzegc
-    timeout_in_minutes: 60
-    notify:
-      - github_commit_status:
-          context: "analyzegc"
-
-  - label: "llvmpasses"
-    key: "llvmpasses"
-    plugins:
-      - JuliaCI/julia#v1:
-          version: 1.6
-      - staticfloat/sandbox#v1:
-          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v1/llvm-passes.tar.gz
-          rootfs_treehash: "f3ed53f159e8f13edfba8b20ebdb8ece73c1b8a8"
-          uid: 1000
-          gid: 1000
-          workspaces:
-            - "/cache/repos:/cache/repos"
-    commands: |
-      echo "--- make release"
-      make -j$${JULIA_NUM_CORES} release JULIA_PRECOMPILE=0
-      echo "--- make src/install-analysis-deps"
-      make -j$${JULIA_NUM_CORES} -C src install-analysis-deps
-      echo "+++ make test/llvmpasses"
-      make -j$${JULIA_NUM_CORES} -C test/llvmpasses
-    timeout_in_minutes: 60
-    notify:
-      - github_commit_status:
-          context: "llvmpasses"
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
deleted file mode 100644
index 99473055f2eeb..0000000000000
--- a/.buildkite/pipeline.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-# This file launches all the build jobs that _don't_ require secrets access.
-# These jobs can pass their output off to jobs that do require secrets access,
-# but those privileged steps require signing before they can be run.
-#
-# Yes, this is creating another layer of indirection; the flow now looks like:
-#
-#   [webui] -> pipeline.yml -> llvm_passes.yml
-#
-# when we could theoretically just have the `webui` launch `llvm_passes.yml`,
-# however this raises the bar for contributors to add new (unsigned) steps to
-# our CI configuration, so I'd rather live with an extra layer of indirection
-# and only need to touch the webui configuration when we need to alter
-# something about the privileged steps.
-steps:
-  - label: ":buildkite: Launch unsigned pipelines"
-    commands: |
-      # We launch whitespace first, because we want that pipeline to finish as quickly as possible.
-      # The remaining unsigned pipelines are launched in alphabetical order.
-      buildkite-agent pipeline upload .buildkite/whitespace.yml
-      buildkite-agent pipeline upload .buildkite/embedding.yml
-      buildkite-agent pipeline upload .buildkite/llvm_passes.yml
-      buildkite-agent pipeline upload .buildkite/sanitizers.yml
-    agents:
-      queue: julia
diff --git a/.buildkite/pipelines/experimental/0_webui.yml b/.buildkite/pipelines/experimental/0_webui.yml
new file mode 100644
index 0000000000000..eee9743d39f6a
--- /dev/null
+++ b/.buildkite/pipelines/experimental/0_webui.yml
@@ -0,0 +1,22 @@
+# This file represents what is put into the webUI.
+# It is purely for keeping track of the changes we make to the webUI configuration; modifying this file has no effect.
+# We use the `cryptic` buildkite plugin to provide secrets management, which requires some integration into the WebUI's steps.
+agents:
+  queue: "julia"
+  sandbox.jl: "true"
+steps:
+  - label: ":unlock: Unlock secrets, launch pipelines"
+    plugins:
+      - staticfloat/cryptic:
+          # Our list of pipelines that should be launched (but don't require a signature)
+          # These pipelines can be modified by any contributor and CI will still run.
+          # Build secrets will not be available in these pipelines (or their children)
+          # but some of our signed pipelines can wait upon the completion of these unsigned
+          # pipelines.
+          unsigned_pipelines:
+            - .buildkite/pipelines/experimental/launch_unsigned_builders.yml
+          # Our signed pipelines must have a `signature` or `signature_file` parameter that
+          # verifies the treehash of the pipeline itself and the inputs listed in `inputs`
+          # signed_pipelines:
+          #   - pipeline: .buildkite/pipelines/experimental/misc/foo_bar_baz.yml
+          #     signature_file: .buildkite/pipelines/experimental/misc/foo_bar_baz.yml.signature
diff --git a/.buildkite/pipelines/experimental/README.md b/.buildkite/pipelines/experimental/README.md
new file mode 100644
index 0000000000000..f92aac7a1af02
--- /dev/null
+++ b/.buildkite/pipelines/experimental/README.md
@@ -0,0 +1,7 @@
+## Experimental pipeline (`master` branch only)
+
+This is the [`julia-master->experimental`](https://buildkite.com/julialang/julia-master-experimental) pipeline.
+
+We use this pipeline for builders that are not yet stable enough to go into the main pipeline.
+
+These builders are triggered by GitHub webhook events, such as pushes and pull requests.
diff --git a/.buildkite/pipelines/experimental/launch_unsigned_builders.yml b/.buildkite/pipelines/experimental/launch_unsigned_builders.yml
new file mode 100644
index 0000000000000..04d82a6e39a5e
--- /dev/null
+++ b/.buildkite/pipelines/experimental/launch_unsigned_builders.yml
@@ -0,0 +1,6 @@
+steps:
+  - label: ":buildkite: Launch unsigned pipelines"
+    commands: |
+      true
+    agents:
+      queue: julia
diff --git a/.buildkite/0_webui.yml b/.buildkite/pipelines/main/0_webui.yml
similarity index 80%
rename from .buildkite/0_webui.yml
rename to .buildkite/pipelines/main/0_webui.yml
index 440d2d443ce7e..af68158f9a51f 100644
--- a/.buildkite/0_webui.yml
+++ b/.buildkite/pipelines/main/0_webui.yml
@@ -4,7 +4,6 @@
 agents:
   queue: "julia"
   sandbox.jl: "true"
-
 steps:
   - label: ":unlock: Unlock secrets, launch pipelines"
     plugins:
@@ -15,10 +14,10 @@ steps:
           # but some of our signed pipelines can wait upon the completion of these unsigned
           # pipelines.
           unsigned_pipelines:
-            - .buildkite/pipeline.yml
+            - .buildkite/pipelines/main/launch_unsigned_builders.yml
 
           # Our signed pipelines must have a `signature` or `signature_file` parameter that
           # verifies the treehash of the pipeline itself and the inputs listed in `inputs`
           signed_pipelines:
-            - pipeline: .buildkite/signed_pipeline_test.yml
-              signature: "U2FsdGVkX18ZrMlORSIi0TvW99eZ3JyUEhntMyXjrOSJ9jAtUohgHc8+dMnWUc6qWjYt8k63wfPrth33SGWDiF814Bm1W5Zk3+R6HgVu6UCrQEI5sLm+SJPbrvET+Jkn"
+            - pipeline: .buildkite/pipelines/main/misc/signed_pipeline_test.yml
+              signature_file: .buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature
diff --git a/.buildkite/pipelines/main/README.md b/.buildkite/pipelines/main/README.md
new file mode 100644
index 0000000000000..6b9d67bd7cc3a
--- /dev/null
+++ b/.buildkite/pipelines/main/README.md
@@ -0,0 +1,15 @@
+## Main pipeline
+
+This is the main pipeline. It contains most of the builders. These builders are triggered by GitHub webhook events, such as pushes and pull requests.
+
+We have a different main pipeline for each permanent branch.
+
+For example:
+
+| Permanent Branch | Pipeline                                                                         |
+| ---------------- | -------------------------------------------------------------------------------- |
+| `master`         | [`julia-master`](https://buildkite.com/julialang/julia-master)                   |
+| `release-1.6`    | [`julia-release-1.6`](https://buildkite.com/julialang/julia-release-1-dot-6) |
+| `release-1.7`    | [`julia-release-1.7`](https://buildkite.com/julialang/julia-release-1-dot-7) |
+
+(This is not a complete list.)
diff --git a/.buildkite/pipelines/main/launch_unsigned_builders.yml b/.buildkite/pipelines/main/launch_unsigned_builders.yml
new file mode 100644
index 0000000000000..2b6794ed13bd1
--- /dev/null
+++ b/.buildkite/pipelines/main/launch_unsigned_builders.yml
@@ -0,0 +1,33 @@
+# This file launches all the build jobs that _don't_ require secrets access.
+# These jobs can pass their output off to jobs that do require secrets access,
+# but those privileged steps require signing before they can be run.
+#
+# Yes, this is creating another layer of indirection; the flow now looks like:
+#
+#   [webui] -> launch_unsigned_builders.yml -> misc/whitespace.yml
+#
+# when we could theoretically just have the `webui` launch `misc/whitespace.yml`,
+# however this raises the bar for contributors to add new (unsigned) steps to
+# our CI configuration, so I'd rather live with an extra layer of indirection
+# and only need to touch the webui configuration when we need to alter
+# something about the privileged steps.
+
+steps:
+  - label: ":buildkite: Launch unsigned jobs"
+    commands: |
+      # Launch the miscellaneous jobs in alphabetical order.
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/doctest.yml
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/embedding.yml
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/llvmpasses.yml
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/sanitizers.yml
+
+      # Launch all of the platform jobs.
+      bash .buildkite/utilities/platforms/platforms.sh .buildkite/pipelines/main/platforms/package_linux.arches .buildkite/pipelines/main/platforms/package_linux.yml
+      bash .buildkite/utilities/platforms/platforms.sh .buildkite/pipelines/main/platforms/tester_linux.arches .buildkite/pipelines/main/platforms/tester_linux.yml
+
+      # Launch the `whitespace` job last. Uploading it last actually causes it to start
+      # first. We want this job to start first because we want it to finish as quickly
+      # as possible.
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/whitespace.yml
+    agents:
+      queue: julia
diff --git a/.buildkite/pipelines/main/misc/doctest.yml b/.buildkite/pipelines/main/misc/doctest.yml
new file mode 100644
index 0000000000000..b83139ddc1f9b
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/doctest.yml
@@ -0,0 +1,34 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+steps:
+  - label: "doctest"
+    key: doctest
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/package_linux.x86_64.tar.gz
+          rootfs_treehash: "2a058481b567f0e91b9aa3ce4ad4f09e6419355a"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    commands: |
+      echo "--- Build Julia from source"
+      make --output-sync -j 6
+
+      echo "--- Print Julia version info"
+      ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+
+      echo "--- Build Julia docs"
+      make docs
+
+      echo "--- Run Julia doctests"
+      JULIA_NUM_THREADS=1 make -C doc doctest=true
+    timeout_in_minutes: 45
diff --git a/.buildkite/embedding.yml b/.buildkite/pipelines/main/misc/embedding.yml
similarity index 50%
rename from .buildkite/embedding.yml
rename to .buildkite/pipelines/main/misc/embedding.yml
index 5cf6b985573f5..bdd2a0a6065f5 100644
--- a/.buildkite/embedding.yml
+++ b/.buildkite/pipelines/main/misc/embedding.yml
@@ -1,21 +1,19 @@
-# These steps should only run on `sandbox.jl` machines, not `docker`-isolated ones
-# since we need nestable sandboxing.  The rootfs images being used here are built from
-# the `.buildkite/rootfs_images/llvm-passes.jl` file.
 agents:
   queue: "julia"
   # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
   sandbox.jl: "true"
   os: "linux"
-
 steps:
   - label: "embedding"
     key: "embedding"
     plugins:
       - JuliaCI/julia#v1:
-          version: 1.6
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
       - staticfloat/sandbox#v1:
-          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v1/llvm-passes.tar.gz
-          rootfs_treehash: "f3ed53f159e8f13edfba8b20ebdb8ece73c1b8a8"
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/package_linux.x86_64.tar.gz
+          rootfs_treehash: "2a058481b567f0e91b9aa3ce4ad4f09e6419355a"
           uid: 1000
           gid: 1000
           workspaces:
@@ -23,15 +21,11 @@ steps:
             - "/cache/repos:/cache/repos"
     commands: |
       prefix="/tmp/prefix"
-      echo "+++ Build julia, deploy to $${prefix}"
-      make -j$${JULIA_NUM_CORES} JULIA_PRECOMPILE=0 prefix=$${prefix} install
+      echo "+++ Build julia, deploy to $${prefix:?}"
+      make --output-sync -j$${JULIA_CPU_THREADS:?} JULIA_PRECOMPILE=0 prefix=$${prefix:?} install
 
       embedding_output="/tmp/embedding-test"
-      echo "+++ Run embedding tests, deploy to $${embedding_output}"
-      mkdir -p "$${embedding_output}"
-      make -j$${JULIA_NUM_CORES} -C test/embedding JULIA="$${prefix}/bin/julia" BIN="$${embedding_output}"
-
+      echo "+++ Run embedding tests, deploy to $${embedding_output:?}"
+      mkdir -p "$${embedding_output:?}"
+      make --output-sync -j$${JULIA_CPU_THREADS:?} -C test/embedding JULIA="$${prefix:?}/bin/julia" BIN="$${embedding_output:?}"
     timeout_in_minutes: 60
-    notify:
-      - github_commit_status:
-          context: "embedding"
diff --git a/.buildkite/pipelines/main/misc/llvmpasses.yml b/.buildkite/pipelines/main/misc/llvmpasses.yml
new file mode 100644
index 0000000000000..1f6d89014eb0d
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/llvmpasses.yml
@@ -0,0 +1,48 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+steps:
+  - label: "analyzegc"
+    key: "analyzegc"
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/llvm_passes.x86_64.tar.gz
+          rootfs_treehash: "c7a289a8cc544b234b1e2d7cbcce3e6815359ecd"
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    commands: |
+      echo "--- Install in-tree LLVM dependencies"
+      make --output-sync -j$${JULIA_CPU_THREADS:?} -C deps install-llvm install-clang install-llvm-tools install-libuv install-utf8proc install-unwind
+      echo "+++ run clangsa/analyzegc"
+      make --output-sync -j$${JULIA_CPU_THREADS:?} -C test/clangsa
+      make --output-sync -j$${JULIA_CPU_THREADS:?} -C src analyzegc
+    timeout_in_minutes: 60
+  - label: "llvmpasses"
+    key: "llvmpasses"
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/package_linux.x86_64.tar.gz
+          rootfs_treehash: "2a058481b567f0e91b9aa3ce4ad4f09e6419355a"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            - "/cache/repos:/cache/repos"
+    commands: |
+      echo "--- make release"
+      make --output-sync -j$${JULIA_CPU_THREADS:?} release JULIA_PRECOMPILE=0
+      echo "--- make src/install-analysis-deps"
+      make --output-sync -j$${JULIA_CPU_THREADS:?} -C src install-analysis-deps
+      echo "+++ make test/llvmpasses"
+      make --output-sync -j$${JULIA_CPU_THREADS:?} -C test/llvmpasses
+    timeout_in_minutes: 60
diff --git a/.buildkite/pipelines/main/misc/sanitizers.yml b/.buildkite/pipelines/main/misc/sanitizers.yml
new file mode 100644
index 0000000000000..a0c40dda7e12f
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/sanitizers.yml
@@ -0,0 +1,47 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+steps:
+  - label: "asan"
+    key: "asan"
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/llvm_passes.x86_64.tar.gz
+          rootfs_treehash: "c7a289a8cc544b234b1e2d7cbcce3e6815359ecd"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            - "/cache/repos:/cache/repos"
+    timeout_in_minutes: 120
+    if: | # We only run the `asan` job on Julia 1.8 and later.
+      (pipeline.slug != "julia-release-1-dot-6") && (pipeline.slug != "julia-release-1-dot-7")
+    soft_fail: true # TODO: delete this line (and thus disallow failures) once JuliaLang/julia#42540 is fixed
+    commands: |
+      echo "--- Build julia-debug with ASAN"
+      contrib/asan/build.sh ./tmp/test-asan -j$${JULIA_CPU_THREADS:?} debug
+  - label: "tsan"
+    key: "tsan"
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/llvm_passes.x86_64.tar.gz
+          rootfs_treehash: "c7a289a8cc544b234b1e2d7cbcce3e6815359ecd"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            - "/cache/repos:/cache/repos"
+    timeout_in_minutes: 120
+    if: | # We only run the `tsan` job on Julia 1.8 and later.
+      (pipeline.slug != "julia-release-1-dot-6") && (pipeline.slug != "julia-release-1-dot-7")
+    commands: |
+      echo "--- Build julia-debug runtime with TSAN"
+      contrib/tsan/build.sh ./tmp/test-tsan -j$${JULIA_CPU_THREADS:?} julia-src-debug
diff --git a/.buildkite/signed_pipeline_test.yml b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml
similarity index 63%
rename from .buildkite/signed_pipeline_test.yml
rename to .buildkite/pipelines/main/misc/signed_pipeline_test.yml
index fb13ac15a8d65..1d59253d43bce 100644
--- a/.buildkite/signed_pipeline_test.yml
+++ b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml
@@ -5,6 +5,10 @@ agents:
 ## pipeline that showcases decryption of environment variable
 steps:
   - label: ":lock: :rocket: Signed pipeline test"
+    # We must accept the signed job id secret in order to propagate secrets
+    env:
+      BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET: ${BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET?}
+    depends_on:
     plugins:
       - staticfloat/cryptic#v1:
           variables:
@@ -12,6 +16,3 @@ steps:
     commands: |
       echo "SECRET_KEY: $${SECRET_KEY}"
 
-# We must accept the signed job id secret in order to propagate secrets
-env:
-  BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET: ${BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET?}
diff --git a/.buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature
new file mode 100644
index 0000000000000..b0844748c486f
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature
@@ -0,0 +1 @@
+Salted__���J0�Q?���rۀ�g�~�d��ۛŧ�ө��o���Ujʀ���p�)�$�U$����y@gZM}{�m��,۠�K��e�r�
\ No newline at end of file
diff --git a/.buildkite/whitespace.yml b/.buildkite/pipelines/main/misc/whitespace.yml
similarity index 56%
rename from .buildkite/whitespace.yml
rename to .buildkite/pipelines/main/misc/whitespace.yml
index bd625e855c5b5..673221e54cfdc 100644
--- a/.buildkite/whitespace.yml
+++ b/.buildkite/pipelines/main/misc/whitespace.yml
@@ -1,26 +1,24 @@
-# These steps should only run on `sandbox.jl` machines, not `docker`-isolated ones
-# since we need nestable sandboxing.  The rootfs images being used here are built from
-# the `.buildkite/rootfs_images/llvm-passes.jl` file.
 agents:
   queue: "julia"
   # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
   sandbox.jl: "true"
   os: "linux"
-
 steps:
   - label: "whitespace"
     key: "whitespace"
     plugins:
       - JuliaCI/julia#v1:
-          version: 1.6
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
       - staticfloat/sandbox#v1:
-          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v1/llvm-passes.tar.gz
-          rootfs_treehash: "f3ed53f159e8f13edfba8b20ebdb8ece73c1b8a8"
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/package_linux.x86_64.tar.gz
+          rootfs_treehash: "2a058481b567f0e91b9aa3ce4ad4f09e6419355a"
           workspaces:
             - "/cache/repos:/cache/repos"
-    commands: |
-      make -j$${JULIA_NUM_CORES} check-whitespace
     timeout_in_minutes: 10
     notify:
       - github_commit_status:
           context: "whitespace"
+    commands: |
+      make --output-sync -j$${JULIA_CPU_THREADS:?} check-whitespace
diff --git a/.buildkite/pipelines/main/platforms/package_linux.arches b/.buildkite/pipelines/main/platforms/package_linux.arches
new file mode 100644
index 0000000000000..dec82f530a832
--- /dev/null
+++ b/.buildkite/pipelines/main/platforms/package_linux.arches
@@ -0,0 +1,7 @@
+# PLATFORM    LABEL       GROUP    ALLOW_FAIL    ARCH        ARCH_ROOTFS    MAKE_FLAGS     TIMEOUT_BK    TIMEOUT_RR     RETRIES    IS_RR    IS_ST    IS_MT    ROOTFS_TAG    ROOTFS_HASH
+linux         32          .        .             32          i686           .              .             .              .          .        .        .        v4.8          b6dffc772ab4c2cd7fd4f83459308f6f0d89b957
+linux         64          .        .             64          x86_64         .              .             .              .          .        .        .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+# linux       aarch64     .        .             aarch64     aarch64        .              .             .              .          .        .        .        ....          ........................................
+# linux       armv7l      .        .             armv7l      armv7l         .              .             .              .          .        .        .        ....          ........................................
+# linux       ppc64le     .        .             ppc64le     powerpc64le    .              .             .              .          .        .        .        ....          ........................................
+musl          64          .        .             64          x86_64         .              .             .              .          .        .        .        v4.8          d13a47c87c38005bd5d97132e51789cafd852f90
diff --git a/.buildkite/pipelines/main/platforms/package_linux.yml b/.buildkite/pipelines/main/platforms/package_linux.yml
new file mode 100644
index 0000000000000..ce778c393b064
--- /dev/null
+++ b/.buildkite/pipelines/main/platforms/package_linux.yml
@@ -0,0 +1,54 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+steps:
+  - label: "package_${PLATFORM?}${LABEL?}"
+    key: package_${PLATFORM?}${LABEL?}
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/${ROOTFS_TAG?}/package_${PLATFORM?}.${ARCH_ROOTFS?}.tar.gz
+          rootfs_treehash: "${ROOTFS_HASH?}"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    timeout_in_minutes: ${TIMEOUT_BK?}
+    commands: |
+      echo "--- Print the full and short commit hashes"
+      SHORT_COMMIT_LENGTH=10
+      SHORT_COMMIT=`echo $${BUILDKITE_COMMIT:?} | cut -c1-$${SHORT_COMMIT_LENGTH:?}`
+      ARTIFACT_FILE_EXTENSION="tar.gz"
+      ARTIFACT_FILENAME="julia-$${SHORT_COMMIT:?}-${PLATFORM?}${ARCH?}.$${ARTIFACT_FILE_EXTENSION:?}"
+      JULIA_BINARYDIST_FILENAME=`make print-JULIA_BINARYDIST_FILENAME ${MAKE_FLAGS?} | cut -c27- | tr -s ' '`
+      JULIA_BINARYDIST="$${JULIA_BINARYDIST_FILENAME:?}.$${ARTIFACT_FILE_EXTENSION:?}"
+
+      echo "The full commit is:                     $${BUILDKITE_COMMIT:?}"
+      echo "The short commit is:                    $${SHORT_COMMIT:?}"
+      echo "The artifact filename will be:    $${ARTIFACT_FILENAME:?}"
+
+      echo "--- Build Julia from source"
+      rm -rf $${ARTIFACT_FILENAME:?}
+      make --output-sync -j 8 ${MAKE_FLAGS?}
+
+      echo "--- Check that the working directory is clean"
+      if [ -z "$(git status --short)" ]; then echo "INFO: The working directory is clean."; else echo "ERROR: The working directory is dirty."; echo "Output of git status:"; git status; exit 1; fi
+
+      echo "--- Print Julia version info"
+      ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+
+      echo "--- Create build artifacts"
+      make --output-sync -j 8 binary-dist ${MAKE_FLAGS?}
+      ls -l $${JULIA_BINARYDIST:?}
+      if [[ "$${JULIA_BINARYDIST:?}" != "$${ARTIFACT_FILENAME:?}" ]]; then
+          mv $${JULIA_BINARYDIST:?} $${ARTIFACT_FILENAME:?}
+      fi
+      ls -l $${ARTIFACT_FILENAME:?}
+      echo "--- Upload build artifacts"
+      buildkite-agent artifact upload $${ARTIFACT_FILENAME:?}
diff --git a/.buildkite/pipelines/main/platforms/tester_linux.arches b/.buildkite/pipelines/main/platforms/tester_linux.arches
new file mode 100644
index 0000000000000..d1304563cb815
--- /dev/null
+++ b/.buildkite/pipelines/main/platforms/tester_linux.arches
@@ -0,0 +1,25 @@
+# PLATFORM    LABEL         GROUP    ALLOW_FAIL    ARCH        ARCH_ROOTFS    MAKE_FLAGS     TIMEOUT_BK    TIMEOUT_RR     RETRIES    IS_RR    IS_ST    IS_MT    ROOTFS_TAG    ROOTFS_HASH
+linux         32_g1         g1       .             32          i686           .              .             .              .          .        .        .        v4.8          b6dffc772ab4c2cd7fd4f83459308f6f0d89b957
+linux         32_g2         g2       .             32          i686           .              .             .              3          .        .        .        v4.8          b6dffc772ab4c2cd7fd4f83459308f6f0d89b957
+
+linux         64_g1_mt      g1       .             64          x86_64         .              .             .              .          .        .        yes      v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64_g2_mt      g2       .             64          x86_64         .              .             .              3          .        .        yes      v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+
+linux         64_g1_st      g1       .             64          x86_64         .              .             .              .          .        yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64_g2_st      g2       .             64          x86_64         .              .             .              3          .        yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+
+linux         64_g1_rrst    g1       .             64          x86_64         .              300           240            .          yes      yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64_g2_rrst    g2       .             64          x86_64         .              180           120            3          yes      yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64_g3_st      g3       .             64          x86_64         .              .             .              3          .        yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+
+# linux       aarch64_g1    g1       true          aarch64     aarch64        .              .             .              .          .        .        .        ----          ----------------------------------------
+# linux       aarch64_g2    g2       true          aarch64     aarch64        .              .             .              .          .        .        .        ----          ----------------------------------------
+
+# linux       armv7l_g1     g1       true          armv7l      armv7l         .              .             .              .          .        .        .        ----          ----------------------------------------
+# linux       armv7l_g2     g2       true          armv7l      armv7l         .              .             .              .          .        .        .        ----          ----------------------------------------
+
+# linux       ppc64le_g1    g1       true          ppc64le     powerpc64le    .              .             .              .          .        .        .        ----          ----------------------------------------
+# linux       ppc64le_g2    g2       true          ppc64le     powerpc64le    .              .             .              .          .        .        .        ----          ----------------------------------------
+
+musl          64_g1         g1       true          64          x86_64         .              .             .              .          .        .        .        v4.8          d13a47c87c38005bd5d97132e51789cafd852f90
+musl          64_g2         g2       true          64          x86_64         .              .             .              .          .        .        .        v4.8          d13a47c87c38005bd5d97132e51789cafd852f90
diff --git a/.buildkite/pipelines/main/platforms/tester_linux.yml b/.buildkite/pipelines/main/platforms/tester_linux.yml
new file mode 100644
index 0000000000000..150a00e098239
--- /dev/null
+++ b/.buildkite/pipelines/main/platforms/tester_linux.yml
@@ -0,0 +1,120 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+steps:
+  - label: "tester_${PLATFORM?}${LABEL?}"
+    key: tester_${PLATFORM?}${LABEL?}
+    depends_on: package_${PLATFORM?}${ARCH?}
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/${ROOTFS_TAG?}/package_${PLATFORM?}.${ARCH_ROOTFS?}.tar.gz
+          # rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/${ROOTFS_TAG?}/tester${PLATFORM?}.${ARCH_ROOTFS?}.tar.gz
+          rootfs_treehash: "${ROOTFS_HASH?}"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    env:
+      JULIA_SHELL: "/bin/bash"
+    timeout_in_minutes: ${TIMEOUT_BK?}
+    retry:
+      automatic:
+        - exit_status: "*"
+          limit: ${RETRIES?}
+    soft_fail: ${ALLOW_FAIL?}
+    commands: |
+      echo "--- Print the full and short commit hashes"
+      SHORT_COMMIT_LENGTH=10
+      SHORT_COMMIT=`echo $${BUILDKITE_COMMIT:?} | cut -c1-$${SHORT_COMMIT_LENGTH:?}`
+      JULIA_DIR="julia-$${SHORT_COMMIT:?}"
+      JULIA_BINARY="$${JULIA_DIR:?}/bin/julia"
+      ARTIFACT_FILE_EXTENSION="tar.gz"
+      ARTIFACT_FILENAME="julia-$${SHORT_COMMIT:?}-${PLATFORM?}${ARCH?}.$${ARTIFACT_FILE_EXTENSION:?}"
+      echo "The full commit is:                     $${BUILDKITE_COMMIT:?}"
+      echo "The short commit is:                    $${SHORT_COMMIT:?}"
+      echo "The artifact filename will be:    $${ARTIFACT_FILENAME:?}"
+      echo "The Julia directory name will be: $${JULIA_DIR:?}"
+      echo "The Julia binary will be:         $${JULIA_BINARY:?}"
+
+      echo "--- Download build artifacts"
+      rm -rf $${ARTIFACT_FILENAME:?}
+      buildkite-agent artifact download $${ARTIFACT_FILENAME:?} .
+
+      echo "--- Extract build artifacts"
+      rm -rf $${JULIA_DIR:?}/
+      tar xzf $${ARTIFACT_FILENAME:?} $${JULIA_DIR:?}/
+
+      echo "--- Print Julia version info"
+      $${JULIA_BINARY:?} -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+      echo "JULIA_CPU_THREADS is: $${JULIA_CPU_THREADS:?}"
+      $${JULIA_BINARY:?} -e '@info "" Sys.CPU_THREADS'
+
+      echo "--- Set some environment variables"
+      export OPENBLAS_NUM_THREADS=8
+      unset JULIA_DEPOT_PATH
+      unset JULIA_PKG_SERVER
+
+      # Make sure that temp files and temp directories are created in a location that is
+      # backed by real storage.
+      export TMPDIR="$(pwd)/tmp"
+      mkdir -p $${TMPDIR:?}
+
+      export NETWORK_RELATED_TESTS="Artifacts Downloads download LazyArtifacts LibGit2/online Pkg"
+
+      if [[   "${GROUP?}" == "all" ]]; then
+        export TESTS="all LibGit2/online --ci"
+      elif [[   "${GROUP?}" == "all_except_pkg" ]]; then
+        export TESTS="all LibGit2/online --ci --skip Pkg"
+      elif [[   "${GROUP?}" == "g1" ]]; then
+        # Group 1: ALL tests EXCEPT the network-related tests.
+        export TESTS="all --ci --skip $${NETWORK_RELATED_TESTS:?}"
+      elif [[ "${GROUP?}" == "g2" ]]; then
+        # Group 2: ONLY the network-related tests.
+        # In Group 2, we use whatever the default setting is with regards to the Pkg server.
+        export TESTS="$${NETWORK_RELATED_TESTS:?} --ci"
+      elif [[ "${GROUP?}" == "g3" ]]; then
+        # Group 3: only Pkg.
+        # In Group 3, we explicitly opt-out of the Pkg server.
+        # The purpose of group 3 is to test the non-Pkg-server codepaths of Pkg.
+        export TESTS="Pkg --ci"
+        export JULIA_PKG_SERVER=""
+      else
+        echo "Invalid value for GROUP: ${GROUP?}"
+        exit 1
+      fi
+
+      export JULIA_TEST_RR_TIMEOUT="${TIMEOUT_RR?}"
+
+      if [[ "${IS_RR?}" == "yes" ]]; then
+        export JULIA_CMD_FOR_TESTS="$${JULIA_BINARY:?} .buildkite/utilities/rr/rr_capture.jl $${JULIA_BINARY:?}"
+        export NCORES_FOR_TESTS="parse(Int, ENV[\"JULIA_RRCAPTURE_NUM_CORES\"])"
+      else
+        export JULIA_CMD_FOR_TESTS="$${JULIA_BINARY:?}"
+        export NCORES_FOR_TESTS="Sys.CPU_THREADS"
+      fi
+
+      if [[ "${IS_ST?}"   == "yes" ]]; then
+        export JULIA_NUM_THREADS=1
+      fi
+
+      if [[ "${IS_MT?}" == "yes" ]]; then
+        export JULIA_NUM_THREADS=16
+      fi
+
+      echo "--- Print the test group, list of test sets, and other useful environment variables"
+      echo "JULIA_CMD_FOR_TESTS is:    $${JULIA_CMD_FOR_TESTS:?}"
+      echo "JULIA_NUM_THREADS is:      $${JULIA_NUM_THREADS}" # Note: this environment variable might not be set
+      echo "NCORES_FOR_TESTS is:       $${NCORES_FOR_TESTS:?}"
+      echo "OPENBLAS_NUM_THREADS is:   $${OPENBLAS_NUM_THREADS:?}"
+      echo "GROUP is:                  ${GROUP?}"
+      echo "TESTS is:                  $${TESTS:?}"
+
+      echo "--- Run the Julia test suite"
+      $${JULIA_CMD_FOR_TESTS:?} -e "Base.runtests(\"$${TESTS:?}\"; ncores = $${NCORES_FOR_TESTS:?})"
diff --git a/.buildkite/pipelines/scheduled/0_webui.yml b/.buildkite/pipelines/scheduled/0_webui.yml
new file mode 100644
index 0000000000000..78031b49d9f17
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/0_webui.yml
@@ -0,0 +1,27 @@
+# This file represents what is put into the webUI.
+# It is purely for keeping track of the changes we make to the webUI configuration; modifying this file has no effect.
+# We use the `cryptic` buildkite plugin to provide secrets management, which requires some integration into the WebUI's steps.
+agents:
+  queue: "julia"
+  sandbox.jl: "true"
+steps:
+  - label: ":unlock: Unlock secrets, launch pipelines"
+    plugins:
+      - staticfloat/cryptic:
+          # Our list of pipelines that should be launched (but don't require a signature)
+          # These pipelines can be modified by any contributor and CI will still run.
+          # Build secrets will not be available in these pipelines (or their children)
+          # but some of our signed pipelines can wait upon the completion of these unsigned
+          # pipelines.
+          unsigned_pipelines:
+            - .buildkite/pipelines/scheduled/launch_unsigned_jobs.yml
+
+          # Our signed pipelines must have a `signature` or `signature_file` parameter that
+          # verifies the treehash of the pipeline itself and the inputs listed in `inputs`
+          signed_pipelines:
+            - pipeline: .buildkite/pipelines/scheduled/coverage/coverage_linux64.yml
+              signature_file: .buildkite/pipelines/scheduled/coverage/coverage_linux64.yml.signature
+              inputs:
+                - .buildkite/pipelines/scheduled/coverage/coverage_linux64.yml
+                - .buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl
+                - .buildkite/pipelines/scheduled/coverage/upload_coverage.jl
diff --git a/.buildkite/pipelines/scheduled/README.md b/.buildkite/pipelines/scheduled/README.md
new file mode 100644
index 0000000000000..ca071dceb2a44
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/README.md
@@ -0,0 +1,5 @@
+## Scheduled pipeline (`master` branch only)
+
+This is the [`julia-master->scheduled`](https://buildkite.com/julialang/julia-master-scheduled) pipeline.
+
+We use this pipeline for scheduled builds. The builders in this pipeline run on a schedule once per day. They are not triggered by GitHub webhooks.
diff --git a/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml b/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml
new file mode 100644
index 0000000000000..1ff88577e2be4
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml
@@ -0,0 +1,44 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+steps:
+  - label: ":unlock: :coverage: Run coverage test"
+    # We must accept the signed job id secret in order to propagate secrets
+    env:
+      BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET: ${BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET?}
+    depends_on:
+    plugins:
+      - staticfloat/cryptic:
+          variables:
+            - CODECOV_TOKEN="U2FsdGVkX19l0fhdBabbuiEdysyEabkJLRHfxm7CNRkuGbnwPV365sxxC7Czs/CVcws0N1oB4pVwALRRMe36oA=="
+            - COVERALLS_TOKEN="U2FsdGVkX19zopI0hMNzzi2UUOvNVFD8Y0iisFnO/ryVxU7Tit8ZEaeN+gxodRx4CosUUh192F1+q3dTMWRIvw=="
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: '1.6'
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v4.8/package_linux.x86_64.tar.gz
+          rootfs_treehash: "2a058481b567f0e91b9aa3ce4ad4f09e6419355a"
+          uid: 1000
+          gid: 1000
+    timeout_in_minutes: 360 # 360 minutes = 6 hours
+    commands: |
+      echo "--- Build Julia from source"
+      make --output-sync -j 6
+
+      echo "--- Print Julia version info"
+      ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+      ./julia -e '@info "" Sys.CPU_THREADS'
+      # this is necessary to make sure that the LibGit2 tests passes
+      git config --global init.defaultBranch master
+
+      echo "--- Run Julia tests in parallel with code coverage enabled"
+      export JULIA_NUM_THREADS=1
+      export JULIA_WORKER_TIMEOUT=1200 # 1200 seconds = 20 minutes
+      ./julia -e 'import Distributed; @info "" Distributed.worker_timeout()'
+      ./julia .buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl
+
+      echo "--- Process and upload coverage information"
+      ./julia .buildkite/pipelines/scheduled/coverage/upload_coverage.jl
diff --git a/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml.signature b/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml.signature
new file mode 100644
index 0000000000000..4ecec8e8bb72c
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml.signature
@@ -0,0 +1 @@
+Salted__�I�y�֌��>y�NckB�v�n�+�Hvrލ��Ƽ�r����/�uY�����u����I�iiE��(��v�L��!��?��v�
\ No newline at end of file
diff --git a/.buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl b/.buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl
new file mode 100644
index 0000000000000..b6eed225f652d
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl
@@ -0,0 +1,29 @@
+# Important note: even if one or more tests fail, we will still exit with status code 0.
+#
+# The reason for this is that we always want to upload code coverage, even if some of the
+# tests fail. Therefore, even if the `coverage_linux64` builder passes, you should not
+# assume that all of the tests passed. If you want to know if all of the tests are passing,
+# please look at the status of the `tester_*` builders (e.g. `tester_linux64`).
+
+const ncores = Sys.CPU_THREADS
+@info "" Sys.CPU_THREADS
+@info "" ncores
+
+script_native_yes = """
+    Base.runtests(["cmdlineargs"]; ncores = $(ncores))
+"""
+script_native_no = """
+    Base.runtests(["all", "--skip", "cmdlineargs"]; ncores = $(ncores))
+"""
+
+base_cmd       = `$(Base.julia_cmd()) --code-coverage=all`
+cmd_native_yes = `$(base_cmd) --sysimage-native-code=yes -e $(script_native_yes)`
+cmd_native_no  = `$(base_cmd) --sysimage-native-code=no  -e $(script_native_no)`
+
+@info "Running command" cmd_native_yes
+p1 = run(pipeline(cmd_native_yes; stdin, stdout, stderr); wait = false)
+wait(p1)
+
+@info "Running command" cmd_native_no
+p2 = run(pipeline(cmd_native_no; stdin, stdout, stderr); wait = false)
+wait(p2)
diff --git a/.buildkite/coverage-linux64/upload_coverage.jl b/.buildkite/pipelines/scheduled/coverage/upload_coverage.jl
similarity index 94%
rename from .buildkite/coverage-linux64/upload_coverage.jl
rename to .buildkite/pipelines/scheduled/coverage/upload_coverage.jl
index 8d14cded56140..d995e97fc17fb 100644
--- a/.buildkite/coverage-linux64/upload_coverage.jl
+++ b/.buildkite/pipelines/scheduled/coverage/upload_coverage.jl
@@ -91,7 +91,7 @@ function print_coverage_summary(
         cov_pct = floor(Int, cov_lines/tot_lines * 100)
     end
     @info "$(description): $(cov_pct)% ($(cov_lines)/$(tot_lines))"
-    return nothing
+    return (; cov_pct)
 end
 
 function buildkite_env(name::String)
@@ -198,7 +198,7 @@ end;
 sort!(fcs; by = fc -> fc.filename);
 
 print_coverage_summary.(fcs);
-print_coverage_summary(fcs, "Total")
+const total_cov_pct = print_coverage_summary(fcs, "Total").cov_pct
 
 let
     git_info = coveralls_buildkite_query_git_info()
@@ -217,3 +217,12 @@ let
     # In order to upload to Codecov, you need to have the `CODECOV_TOKEN` environment variable defined.
     Coverage.Codecov.submit_generic(fcs, kwargs)
 end
+
+if total_cov_pct < 50
+    msg = string(
+        "The total coverage is less than 50%. This should never happen, ",
+        "so it means that something has probably gone wrong with the code coverage job.",
+    )
+    @error msg total_cov_pct
+    throw(ErrorException(msg))
+end
diff --git a/.buildkite/pipelines/scheduled/launch_unsigned_jobs.yml b/.buildkite/pipelines/scheduled/launch_unsigned_jobs.yml
new file mode 100644
index 0000000000000..300c8d8466aea
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/launch_unsigned_jobs.yml
@@ -0,0 +1,8 @@
+steps:
+  - label: ":buildkite: Launch unsigned jobs"
+    commands: |
+      # Launch all of the `USE_BINARYBUILDER=0` jobs.
+      bash .buildkite/utilities/platforms/platforms.sh .buildkite/pipelines/scheduled/no_bb/no_bb_package_linux.arches .buildkite/pipelines/main/platforms/package_linux.yml
+      bash .buildkite/utilities/platforms/platforms.sh .buildkite/pipelines/scheduled/no_bb/no_bb_tester_linux.arches .buildkite/pipelines/main/platforms/tester_linux.yml
+    agents:
+      queue: julia
diff --git a/.buildkite/pipelines/scheduled/no_bb/no_bb_package_linux.arches b/.buildkite/pipelines/scheduled/no_bb/no_bb_package_linux.arches
new file mode 100644
index 0000000000000..dff2aab4591e2
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/no_bb/no_bb_package_linux.arches
@@ -0,0 +1,2 @@
+# PLATFORM    LABEL       GROUP    ALLOW_FAIL    ARCH        ARCH_ROOTFS    MAKE_FLAGS             TIMEOUT_BK    TIMEOUT_RR     RETRIES    IS_RR    IS_ST    IS_MT    ROOTFS_TAG    ROOTFS_HASH
+linux         64src       .        .             64src       x86_64         USE_BINARYBUILDER=0    180           .              .          .        .        .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
diff --git a/.buildkite/pipelines/scheduled/no_bb/no_bb_tester_linux.arches b/.buildkite/pipelines/scheduled/no_bb/no_bb_tester_linux.arches
new file mode 100644
index 0000000000000..0b1fbdf63b796
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/no_bb/no_bb_tester_linux.arches
@@ -0,0 +1,10 @@
+# PLATFORM    LABEL            GROUP    ALLOW_FAIL    ARCH     ARCH_ROOTFS    MAKE_FLAGS     TIMEOUT_BK    TIMEOUT_RR     RETRIES    IS_RR    IS_ST    IS_MT    ROOTFS_TAG    ROOTFS_HASH
+linux         64src_g1_mt      g1       .             64src    x86_64         .              .             .              .          .        .        yes      v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64src_g2_mt      g2       .             64src    x86_64         .              .             .              3          .        .        yes      v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+
+linux         64src_g1_st      g1       .             64src    x86_64         .              .             .              .          .        yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64src_g2_st      g2       .             64src    x86_64         .              .             .              3          .        yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+
+linux         64src_g1_rrst    g1       .             64src    x86_64         .              300           240            .          yes      yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64src_g2_rrst    g2       .             64src    x86_64         .              180           120             3          yes      yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
+linux         64src_g3_st      g3       .             64src    x86_64         .              .             .              3          .        yes      .        v4.8          2a058481b567f0e91b9aa3ce4ad4f09e6419355a
diff --git a/.buildkite/rootfs_images/Manifest.toml b/.buildkite/rootfs_images/Manifest.toml
deleted file mode 100644
index d24e9a4ee166e..0000000000000
--- a/.buildkite/rootfs_images/Manifest.toml
+++ /dev/null
@@ -1,134 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-[[ArgTools]]
-uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
-
-[[Artifacts]]
-uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-
-[[Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[Dates]]
-deps = ["Printf"]
-uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
-
-[[Downloads]]
-deps = ["ArgTools", "LibCURL", "NetworkOptions"]
-uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-
-[[InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[JLLWrappers]]
-deps = ["Preferences"]
-git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
-uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
-version = "1.3.0"
-
-[[LibCURL]]
-deps = ["LibCURL_jll", "MozillaCACerts_jll"]
-uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
-
-[[LibCURL_jll]]
-deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
-uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-
-[[LibGit2]]
-deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
-uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
-
-[[LibSSH2_jll]]
-deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
-uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-
-[[Libdl]]
-uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-
-[[Logging]]
-uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
-
-[[Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[MbedTLS_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-
-[[MozillaCACerts_jll]]
-uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-
-[[NetworkOptions]]
-uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
-
-[[Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[[Preferences]]
-deps = ["TOML"]
-git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
-uuid = "21216c6a-2e73-6563-6e65-726566657250"
-version = "1.2.2"
-
-[[Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
-uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
-
-[[Random]]
-deps = ["Serialization"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
-[[Scratch]]
-deps = ["Dates"]
-git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda"
-uuid = "6c6a2e73-6563-6170-7368-637461726353"
-version = "1.1.0"
-
-[[Serialization]]
-uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-
-[[Sockets]]
-uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[[TOML]]
-deps = ["Dates"]
-uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-
-[[Tar]]
-deps = ["ArgTools", "SHA"]
-uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
-
-[[UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
-[[Zlib_jll]]
-deps = ["Libdl"]
-uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-
-[[ghr_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "f5c8cb306d4fe2d1fff90443a088fc5ba536c134"
-uuid = "07c12ed4-43bc-5495-8a2a-d5838ef8d533"
-version = "0.13.0+1"
-
-[[nghttp2_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-
-[[p7zip_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/.buildkite/rootfs_images/Project.toml b/.buildkite/rootfs_images/Project.toml
deleted file mode 100644
index 1dbde5ed9df66..0000000000000
--- a/.buildkite/rootfs_images/Project.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-[deps]
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-Scratch = "6c6a2e73-6563-6170-7368-637461726353"
-ghr_jll = "07c12ed4-43bc-5495-8a2a-d5838ef8d533"
diff --git a/.buildkite/rootfs_images/README.md b/.buildkite/rootfs_images/README.md
deleted file mode 100644
index 1d3962c2bee3e..0000000000000
--- a/.buildkite/rootfs_images/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-## Rootfs images
-
-Our CI setup makes use of rootfs images that contain our build tools.
-These rootfs images are built using the fairly simple scripts held within this directory.
-Most images are based on Debian, making use of `debootstrap` to provide a quick and easy rootfs with packages installed through an initial `apt` invocation.
diff --git a/.buildkite/rootfs_images/llvm-passes.jl b/.buildkite/rootfs_images/llvm-passes.jl
deleted file mode 100755
index bc6d57eb2f87e..0000000000000
--- a/.buildkite/rootfs_images/llvm-passes.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env julia
-
-## This rootfs includes enough of a host toolchain to build the LLVM passes.
-## Eventually, this image will probably be replaced with the actual builder image,
-## as that will have the necessary toolchains as well, but that image is not built yet.
-
-include("rootfs_utils.jl")
-
-const tag_name, force_overwrite = get_arguments(ARGS, @__FILE__)
-
-# Build debian-based image with the following extra packages:
-packages = [
-    "bash",
-    "build-essential",
-    "cmake",
-    "curl",
-    "gfortran",
-    "git",
-    "less",
-    "libatomic1",
-    "m4",
-    "perl",
-    "pkg-config",
-    "python",
-    "python3",
-    "wget",
-]
-tarball_path = debootstrap("llvm-passes"; packages)
-
-# Upload it
-upload_rootfs_image(tarball_path; tag_name, force_overwrite)
diff --git a/.buildkite/rootfs_images/rootfs_utils.jl b/.buildkite/rootfs_images/rootfs_utils.jl
deleted file mode 100644
index 82baeec7a0933..0000000000000
--- a/.buildkite/rootfs_images/rootfs_utils.jl
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env julia
-
-# This is an example invocation of `debootstrap` to generate a Debian/Ubuntu-based rootfs
-using Scratch, Pkg, Pkg.Artifacts, ghr_jll, SHA, Dates
-
-# Utility functions
-getuid() = ccall(:getuid, Cint, ())
-getgid() = ccall(:getgid, Cint, ())
-
-function debootstrap(name::String; release::String="buster", variant::String="minbase",
-                     packages::Vector{String}=String[], force::Bool=false)
-    if Sys.which("debootstrap") === nothing
-        error("Must install `debootstrap`!")
-    end
-
-    tarball_path = joinpath(@get_scratch!("rootfs-images"), "$(name).tar.gz")
-    if !force && isfile(tarball_path)
-        @error("Refusing to overwrite tarball without `force` set", tarball_path)
-        error()
-    end
-
-    artifact_hash = create_artifact() do rootfs
-        packages_string = join(push!(packages, "locales"), ",")
-        @info("Running debootstrap", release, variant, packages)
-        run(`sudo debootstrap --variant=$(variant) --include=$(packages_string) $(release) "$(rootfs)"`)
-
-        # Remove special `dev` files
-        @info("Cleaning up `/dev`")
-        for f in readdir(joinpath(rootfs, "dev"); join=true)
-            # Keep the symlinks around (such as `/dev/fd`), as they're useful
-            if !islink(f)
-                run(`sudo rm -rf "$(f)"`)
-            end
-        end
-
-        # take ownership of the entire rootfs
-        @info("Chown'ing rootfs")
-        run(`sudo chown $(getuid()):$(getgid()) -R "$(rootfs)"`)
-
-        # Write out rootfs-info to contain a minimally-identifying string
-        open(joinpath(rootfs, "etc", "rootfs-info"), write=true) do io
-            write(io, """
-            rootfs_type=debootstrap
-            release=$(release)
-            variant=$(variant)
-            packages=$(packages_string)
-            build_date=$(Dates.now())
-            """)
-        end
-
-        # Write out a reasonable default resolv.conf
-        open(joinpath(rootfs, "etc", "resolv.conf"), write=true) do io
-            write(io, """
-            nameserver 1.1.1.1
-            nameserver 8.8.8.8
-            """)
-        end
-
-        # Remove `_apt` user so that `apt` doesn't try to `setgroups()`
-        @info("Removing `_apt` user")
-        open(joinpath(rootfs, "etc", "passwd"), write=true, read=true) do io
-            filtered_lines = filter(l -> !startswith(l, "_apt:"), readlines(io))
-            truncate(io, 0)
-            seek(io, 0)
-            for l in filtered_lines
-                println(io, l)
-            end
-        end
-
-        # Set up the one true locale
-        @info("Setting up UTF-8 locale")
-        open(joinpath(rootfs, "etc", "locale.gen"), "a") do io
-            println(io, "en_US.UTF-8 UTF-8")
-        end
-        run(`sudo chroot --userspec=$(getuid()):$(getgid()) $(rootfs) locale-gen`)
-    end
-
-    # Archive it into a `.tar.gz` file
-    @info("Archiving", tarball_path, artifact_hash)
-    archive_artifact(artifact_hash, tarball_path)
-
-    return tarball_path
-end
-
-function upload_rootfs_image(tarball_path::String;
-                             github_repo::String="JuliaCI/rootfs-images",
-                             tag_name::String,
-                             force_overwrite::Bool)
-    # Upload it to `github_repo`
-    tarball_url = "https://github.com/$(github_repo)/releases/download/$(tag_name)/$(basename(tarball_path))"
-    @info("Uploading to $(github_repo)@$(tag_name)", tarball_url)
-    cmd = ghr_jll.ghr()
-    append!(cmd.exec, ["-u", dirname(github_repo), "-r", basename(github_repo)])
-    force_overwrite && push!(cmd.exec, "-replace")
-    append!(cmd.exec, [tag_name, tarball_path])
-    run(cmd)
-    return tarball_url
-end
-
-# process command-line arguments
-
-function get_arguments(args::AbstractVector, script_file::AbstractString)
-    usage = "Usage: $(basename(script_file)) <tag_name> [--force-overwrite]"
-    length(args) < 1 && throw(ArgumentError(usage))
-    length(args) > 2 && throw(ArgumentError(usage))
-    tag_name        = get_tag_name(args; usage)
-    force_overwrite = get_force_overwrite(args; usage)
-    return (; tag_name, force_overwrite)
-end
-
-function get_tag_name(args::AbstractVector; usage::AbstractString)
-    tag_name = convert(String, strip(args[1]))::String
-    isempty(tag_name)          && throw(ArgumentError(usage))
-    startswith(tag_name, "--") && throw(ArgumentError(usage))
-    return tag_name
-end
-
-function get_force_overwrite(args::AbstractVector; usage::AbstractString)
-    force_overwrite_string = strip(get(args, 2, ""))
-    force_overwrite_string == ""                  && return false
-    force_overwrite_string == "--force-overwrite" && return true
-    throw(ArgumentError(usage))
-end
diff --git a/.buildkite/sanitizers.yml b/.buildkite/sanitizers.yml
deleted file mode 100644
index f29ed4e42a4a6..0000000000000
--- a/.buildkite/sanitizers.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-# These steps should only run on `sandbox.jl` machines, not `docker`-isolated ones
-# since we need nestable sandboxing.  The rootfs images being used here are built from
-# the `.buildkite/rootfs_images/llvm-passes.jl` file.
-agents:
-  queue: "julia"
-  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
-  sandbox.jl: "true"
-  os: "linux"
-
-steps:
-  - label: "asan"
-    key: asan
-    plugins:
-      - JuliaCI/julia#v1:
-          version: 1.6
-      - staticfloat/sandbox#v1:
-          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v1/llvm-passes.tar.gz
-          rootfs_treehash: "f3ed53f159e8f13edfba8b20ebdb8ece73c1b8a8"
-          uid: 1000
-          gid: 1000
-          workspaces:
-            - "/cache/repos:/cache/repos"
-      # `contrib/check-asan.jl` needs a `julia` binary:
-      - JuliaCI/julia#v1:
-          version: 1.6
-    commands: |
-      echo "--- Build julia-debug with ASAN"
-      contrib/asan/build.sh ./tmp/test-asan -j$${JULIA_NUM_CORES} debug
-      echo "--- Test that ASAN is enabled"
-      contrib/asan/check.jl ./tmp/test-asan/asan/usr/bin/julia-debug
-    timeout_in_minutes: 120
-    notify:
-      - github_commit_status:
-          context: "asan"
diff --git a/.buildkite/secrets/.gitignore b/.buildkite/secrets/.gitignore
new file mode 100644
index 0000000000000..2a84f48682a04
--- /dev/null
+++ b/.buildkite/secrets/.gitignore
@@ -0,0 +1,11 @@
+# Ignore everything
+*
+
+# Don't ignore this `.gitignore` file
+!.gitignore
+
+# Don't ignore encrypted files
+!*.encrypted
+
+# Don't ignore public keys, that's fine to include
+!*.pub
diff --git a/.buildkite/secrets/ssh_docs_deploy.encrypted b/.buildkite/secrets/ssh_docs_deploy.encrypted
new file mode 100644
index 0000000000000..8b7e2ffe27940
Binary files /dev/null and b/.buildkite/secrets/ssh_docs_deploy.encrypted differ
diff --git a/.buildkite/secrets/ssh_docs_deploy.pub b/.buildkite/secrets/ssh_docs_deploy.pub
new file mode 100644
index 0000000000000..eaea073e5257b
--- /dev/null
+++ b/.buildkite/secrets/ssh_docs_deploy.pub
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2HupO7+uq6NE//ZCCYS1szwDVutH8ZVtF0wjJJGmfRLzDe6l2Kcx+CY+i3k2HoxrfNlmri3RdWrfDbEruPZlWaz9HH/Hi8S8ZkyQO932dbag7u5JGKw3Mb/3x05O2QaL+0HGItUyfFMFT9NBZ1na+AH/ZPWamXR98PLh39ic1HFw2x2hacYA/4w0ylxwrojRfCqcjK/YVJUCdQ/XwsmSjs+0+rIfdVwSLbJKeHj5JYLX6CmF4zf4WzJKKDXx1k6gwaSS6oY5XOVit2I1u80cxZRiQhrMfYPKywY5+Y6gqjrGABLYSq/JJRKsgdJxs39V8O1ZjXVsGxbR+1r3F9ISH buildkite-docs-deploy
diff --git a/.buildkite/utilities/platforms/platforms.sh b/.buildkite/utilities/platforms/platforms.sh
new file mode 100755
index 0000000000000..9a47c18e9855b
--- /dev/null
+++ b/.buildkite/utilities/platforms/platforms.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+ARCHES="$1"
+YAML="$2"
+
+if [[ ! -f "${ARCHES:?}" ]] ; then
+  echo "Arches file does not exist: ${ARCHES:?}"
+  exit 1
+fi
+
+if [[ ! -f "${YAML:?}" ]] ; then
+  echo "YAML file does not exist: ${YAML:?}"
+  exit 1
+fi
+
+cat "${ARCHES:?}" | tr -s ' ' | while read _line; do
+  # Remove whitespace from the beginning and end of each line
+  line=`echo $_line | tr -s ' '`
+
+  # Skip any line that begins with the `#` character
+  if [[ $line == \#* ]]; then
+    continue
+  fi
+
+  # Skip any empty line
+  if [[ $line == "" ]]; then
+    continue
+  fi
+
+  export PLATFORM=`echo $line    | cut -d ' ' -f 1  | tr -s ' '`
+  export LABEL=`echo $line       | cut -d ' ' -f 2  | tr -s ' '`
+  export GROUP=`echo $line       | cut -d ' ' -f 3  | tr -s ' '`
+
+  export ALLOW_FAIL=`echo $line  | cut -d ' ' -f 4  | tr -s ' '`
+  export ARCH=`echo $line        | cut -d ' ' -f 5  | tr -s ' '`
+  export ARCH_ROOTFS=`echo $line | cut -d ' ' -f 6  | tr -s ' '`
+
+  export MAKE_FLAGS=`echo $line  | cut -d ' ' -f 7  | tr -s ' '`
+  export TIMEOUT_BK=`echo $line  | cut -d ' ' -f 8  | tr -s ' '`
+  export TIMEOUT_RR=`echo $line  | cut -d ' ' -f 9  | tr -s ' '`
+  export RETRIES=`echo $line     | cut -d ' ' -f 10 | tr -s ' '`
+  export IS_RR=`echo $line       | cut -d ' ' -f 11 | tr -s ' '`
+  export IS_ST=`echo $line       | cut -d ' ' -f 12 | tr -s ' '`
+  export IS_MT=`echo $line       | cut -d ' ' -f 13 | tr -s ' '`
+  export ROOTFS_TAG=`echo $line  | cut -d ' ' -f 14 | tr -s ' '`
+  export ROOTFS_HASH=`echo $line | cut -d ' ' -f 15 | tr -s ' '`
+
+  if [[   "${IS_ST:?}"   == "yes" ]]; then
+    if [[ "${IS_MT:?}"   == "yes" ]]; then
+      echo "You cannot set both IS_ST and IS_MT to yes"
+      exit 1
+    fi
+  fi
+
+  if [[ "${ALLOW_FAIL:?}" == "." ]]; then
+    export ALLOW_FAIL="false"
+  fi
+
+  if [[ "${MAKE_FLAGS:?}" == "." ]]; then
+    export MAKE_FLAGS=""
+  fi
+
+  if [[ "${TIMEOUT_BK:?}" == "." ]]; then
+    export TIMEOUT_BK="90" # minutes
+  fi
+
+  if [[ "${TIMEOUT_RR:?}" == "." ]]; then
+    export TIMEOUT_RR="60" # minutes
+  fi
+
+  if [[ "${RETRIES:?}" == "." ]]; then
+    export RETRIES="0"
+  fi
+
+  buildkite-agent pipeline upload "${YAML:?}"
+done
diff --git a/.buildkite/utilities/rr/rr_capture.jl b/.buildkite/utilities/rr/rr_capture.jl
new file mode 100644
index 0000000000000..37bf3ca124271
--- /dev/null
+++ b/.buildkite/utilities/rr/rr_capture.jl
@@ -0,0 +1,200 @@
+import Dates
+import Pkg
+import Tar
+
+function get_bool_from_env(name::AbstractString, default_value::Bool)
+    value = get(ENV, name, "$(default_value)") |> strip |> lowercase
+    result = parse(Bool, value)::Bool
+    return result
+end
+
+const is_buildkite         = get_bool_from_env("BUILDKITE",                  false)
+const always_save_rr_trace = get_bool_from_env("JULIA_ALWAYS_SAVE_RR_TRACE", false)
+
+function get_from_env(name::AbstractString)
+    if is_buildkite
+        value = ENV[name]
+    else
+        value = get(ENV, name, "")
+    end
+    result = convert(String, strip(value))::String
+    return result
+end
+
+function my_exit(process::Base.Process)
+    wait(process)
+
+    @info(
+        "",
+        process.exitcode,
+        process.termsignal,
+    )
+
+    # Pass the exit code back up
+    if process.termsignal != 0
+        ccall(:raise, Cvoid, (Cint,), process.termsignal)
+
+        # If for some reason the signal did not cause an exit, we'll exit manually.
+        # We need to make sure that we exit with a non-zero exit code.
+        if process.exitcode != 0
+            exit(process.exitcode)
+        else
+            exit(1)
+        end
+    end
+    exit(process.exitcode)
+end
+
+if Base.VERSION < v"1.6"
+    throw(ErrorException("The `$(basename(@__FILE__))` script requires Julia 1.6 or greater"))
+end
+
+if length(ARGS) < 1
+    throw(ErrorException("Usage: julia $(basename(@__FILE__)) [command...]"))
+end
+
+@info "We will run the command under rr"
+
+const build_number                      = get_from_env("BUILDKITE_BUILD_NUMBER")
+const job_name                          = get_from_env("BUILDKITE_STEP_KEY")
+const commit_full                       = get_from_env("BUILDKITE_COMMIT")
+const commit_short                      = first(commit_full, 10)
+const JULIA_TEST_RR_TIMEOUT             = get(ENV,  "JULIA_TEST_RR_TIMEOUT", "120")
+const timeout_minutes                   = parse(Int, JULIA_TEST_RR_TIMEOUT)
+const JULIA_TEST_NUM_CORES              = get(ENV,  "JULIA_TEST_NUM_CORES", "8")
+const julia_test_num_cores_int          = parse(Int, JULIA_TEST_NUM_CORES)
+const num_cores = min(
+    8,
+    Sys.CPU_THREADS,
+    julia_test_num_cores_int + 1,
+)
+
+ENV["JULIA_RRCAPTURE_NUM_CORES"] = "$(num_cores)"
+
+@info(
+    "",
+    build_number,
+    job_name,
+    commit_full,
+    commit_short,
+    timeout_minutes,
+    num_cores,
+)
+
+const dumps_dir       = joinpath(pwd(), "dumps")
+const temp_parent_dir = joinpath(pwd(), "temp_for_rr")
+
+mkpath(dumps_dir)
+mkpath(temp_parent_dir)
+
+proc = nothing
+
+mktempdir(temp_parent_dir) do dir
+    Pkg.activate(dir)
+    Pkg.add("rr_jll")
+    Pkg.add("Zstd_jll")
+
+    rr_jll = Base.require(Base.PkgId(Base.UUID((0xe86bdf43_55f7_5ea2_9fd0_e7daa2c0f2b4)), "rr_jll"))
+    zstd_jll = Base.require(Base.PkgId(Base.UUID((0x3161d3a3_bdf6_5164_811a_617609db77b4)), "Zstd_jll"))
+    rr(func) = Base.invokelatest(rr_jll.rr, func; adjust_LIBPATH=false)
+    rr() do rr_path
+        capture_script_path = joinpath(dir, "capture_output.sh")
+        loader = Sys.WORD_SIZE == 64 ? "/lib64/ld-linux-x86-64.so.2" : "/lib/ld-linux.so.2"
+        open(capture_script_path, "w") do io
+            write(io, """
+            #!/bin/bash
+
+            $(rr_path) record --nested=detach "\$@" > >(tee -a $(dir)/stdout.log) 2> >(tee -a $(dir)/stderr.log >&2)
+            """)
+        end
+        chmod(capture_script_path, 0o755)
+
+        new_env = copy(ENV)
+        new_env["_RR_TRACE_DIR"] = joinpath(dir, "rr_traces")
+        new_env["RR_LOG"]="all:debug"
+        new_env["RR_LOG_BUFFER"]="100000"
+        new_env["JULIA_RR"] = capture_script_path
+        t_start = time()
+        global proc = run(setenv(`$(rr_path) record --num-cores=$(num_cores) $ARGS`, new_env), (stdin, stdout, stderr); wait=false)
+
+        # Start asynchronous timer that will kill `rr`
+        @async begin
+            sleep(timeout_minutes * 60)
+
+            # If we've exceeded the timeout and `rr` is still running, kill it.
+            if isopen(proc)
+                println(stderr, "\n\nProcess timed out (with a timeout of $(timeout_minutes) minutes). Signalling `rr` for force-cleanup!")
+                kill(proc, Base.SIGTERM)
+
+                # Give `rr` a chance to cleanup and upload.
+                # Note: this time period includes the time to upload the `rr` trace files
+                # as Buildkite artifacts, so make sure it is long enough to allow the
+                # uploads to finish.
+                cleanup_minutes = 30
+                sleep(cleanup_minutes * 60)
+
+                if isopen(proc)
+                    println(stderr, "\n\n`rr` failed to cleanup and upload within $(cleanup_minutes) minutes, killing and exiting immediately!")
+                    kill(proc, Base.SIGKILL)
+                    exit(1)
+                end
+            end
+        end
+
+        # Wait for `rr` to finish, either through naturally finishing its run, or `SIGTERM`.
+        wait(proc)
+        process_failed = !success(proc)
+
+        if process_failed || always_save_rr_trace || is_buildkite
+            println(stderr, "`rr` returned $(proc.exitcode), packing and uploading traces...")
+
+            if !isdir(joinpath(dir, "rr_traces"))
+                println(stderr, "No `rr_traces` directory!  Did `rr` itself fail?")
+                exit(1)
+            end
+
+            # Clean up non-traces
+            rm(joinpath(dir, "rr_traces", "latest-trace"))
+            rm(joinpath(dir, "rr_traces", "cpu_lock"))
+
+            # Create a directory for the pack files to go
+            pack_dir = joinpath(dir, "pack")
+            mkdir(pack_dir)
+
+            # Pack all traces
+            trace_dirs = [joinpath(dir, "rr_traces", f) for f in readdir(joinpath(dir, "rr_traces"))]
+            filter!(isdir, trace_dirs)
+            run(ignorestatus(`$(rr_path) pack --pack-dir=$pack_dir $(trace_dirs)`))
+
+            # Tar it up
+            mkpath(dumps_dir)
+            date_str = Dates.format(Dates.now(), Dates.dateformat"yyyy_mm_dd_HH_MM_SS")
+            dst_file_name = string(
+                "rr",
+                "--build_$(build_number)",
+                "--$(job_name)",
+                "--commit_$(commit_short)",
+                "--$(date_str)",
+                ".tar.zst",
+            )
+            dst_full_path = joinpath(dumps_dir, dst_file_name)
+            zstd_jll.zstdmt() do zstdp
+                tarproc = open(`$(zstdp) -o $(dst_full_path)`, "w")
+                Tar.create(dir, tarproc)
+                close(tarproc.in)
+            end
+
+            @info "The `rr` trace file has been saved to: $(dst_full_path)"
+            if is_buildkite
+                @info "Since this is a Buildkite run, we will upload the `rr` trace file."
+                cd(dumps_dir) do
+                    run(`buildkite-agent artifact upload $(dst_file_name)`)
+                end
+            end
+        end
+
+    end
+end
+
+@info "Finished running the command under rr"
+my_exit(proc)
diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 0000000000000..35cde5cd5e854
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,4 @@
+coverage:
+  status:
+    project: off
+    patch: off
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6cab5b68b11e9..d2da8839ddb39 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,6 @@
-CODEOWNERS @JuliaLang/github-actions 
-/.github/ @JuliaLang/github-actions 
+CODEOWNERS @JuliaLang/github-actions
+/.github/ @JuliaLang/github-actions
 /.buildkite/ @JuliaLang/github-actions
+
+/.github/workflows/retry.yml @DilumAluthge
+/.github/workflows/statuses.yml @DilumAluthge
diff --git a/.github/workflows/rerun_failed.yml b/.github/workflows/rerun_failed.yml
new file mode 100644
index 0000000000000..7d022920658a9
--- /dev/null
+++ b/.github/workflows/rerun_failed.yml
@@ -0,0 +1,92 @@
+# Please ping @DilumAluthge when making any changes to this file.
+
+# Here are some steps that we take in this workflow file for security reasons:
+# 1. We do not checkout any code.
+# 2. We only run actions that are defined in a repository in the `JuliaLang` GitHub organization.
+# 3. We do not give the `GITHUB_TOKEN` any permissions.
+# 4. We only give the Buildkite API token (`BUILDKITE_API_TOKEN_RETRY`) the minimum necessary
+#    set of permissions.
+
+# Important note to Buildkite maintainers:
+# In order to make this work, you need to tell Buildkite that it should NOT create a brand-new
+# build when someone closes and reopens a pull request. To do so:
+# 1. Go to the relevant pipeline (e.g. https://buildkite.com/julialang/julia-master).
+# 2. Click on the "Pipeline Settings" button.
+# 3. In the left sidebar, under "Pipeline Settings", click on "GitHub".
+# 4. In the "GitHub Settings", under "Build Pull Requests", make sure that the "Skip pull
+#    request builds for existing commits" checkbox is checked. This is the setting that tells
+#    Buildkite that it should NOT create a brand-new build when someone closes and reopens a
+#    pull request.
+# 5. At the bottom of the page, click the "Save GitHub Settings" button.
+
+name: Rerun Failed Buildkite Jobs
+
+# There are two ways that a user can rerun the failed Buildkite jobs:
+# 1. Close and reopen the pull request.
+#    In order to use this approach, the user must be in one of the following three categories:
+#        (i)   Author of the pull request
+#        (ii)  Commit permissions
+#        (iii) Triage permissions
+# 2. Post a comment on the pull request with exactly the following contents: /buildkite rerun failed
+#    In order to use this approach, the user must be in the following category:
+#        - A member of the JuliaLang GitHub organization (the membership must be publicized)
+
+on:
+  # When using the `pull_request_target` event, all PRs will get access to secret environment
+  # variables (such as the `BUILDKITE_API_TOKEN_RETRY` secret environment variable), even if
+  # the PR is from a fork. Therefore, for security reasons, we do not checkout any code in
+  # this workflow.
+  pull_request_target:
+    types: [ reopened ]
+  issue_comment:
+    types: [ created ]
+
+# We do not give the `GITHUB_TOKEN` any permissions.
+# Therefore, the `GITHUB_TOKEN` only has the same access as any member of the public.
+permissions:
+  contents: none
+
+jobs:
+  rerun-failed-buildkite-jobs:
+    name: Rerun Failed Buildkite Jobs
+    runs-on: ubuntu-latest
+    if: (github.repository == 'JuliaLang/julia') && ((github.event_name == 'pull_request_target' && github.event.action == 'reopened') || (github.event_name == 'issue_comment' && github.event.issue.pull_request && github.event.comment.body == '/buildkite rerun failed'))
+    steps:
+      # For security reasons, we do not checkout any code in this workflow.
+      - name: Check organization membership
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
+            if [[ "${{ github.event.action }}" == "reopened" ]]; then
+              echo "This is a \"reopened\" event, so we do not need to check the user's organization membership."
+              echo "GOOD_TO_PROCEED=yes" >> ${GITHUB_ENV:?}
+              echo "PULL_REQUEST_NUMBER=${{ github.event.number }}" >> ${GITHUB_ENV:?}
+            else
+              echo "ERROR: The github.event_name is \"pull_request_target\", but the github.event.action is not \"reopened\"."
+              exit 1
+            fi
+          else
+            curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}"
+            curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}/orgs"
+            export USER_IS_ORGANIZATION_MEMBER=`curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}/orgs" | jq '[.[] | .login] | index("JuliaLang") != null' | tr -s ' '`
+            if [[ "${USER_IS_ORGANIZATION_MEMBER:?}"   == "true" ]]; then
+              echo "The \"${{ github.event.sender.login }}\" user is a public member of the JuliaLang organization."
+              echo "GOOD_TO_PROCEED=yes" >> ${GITHUB_ENV:?}
+              echo "PULL_REQUEST_NUMBER=${{ github.event.issue.number }}" >> ${GITHUB_ENV:?}
+            else
+              echo "ERROR: the \"${{ github.event.sender.login }}\" user is NOT a public member of the JuliaLang organization."
+              echo "If you are a member, please make sure that you have publicized your membership."
+              exit 1
+            fi
+          fi
+      - run: |
+          echo "GOOD_TO_PROCEED: ${{ env.GOOD_TO_PROCEED }}"
+          echo "PULL_REQUEST_NUMBER: ${{ env.PULL_REQUEST_NUMBER }}"
+      - uses: JuliaLang/buildkite-rerun-failed@057f6f2d37aa29a57b7679fd2af0df1d9f9188b4
+        if: env.GOOD_TO_PROCEED == 'yes'
+        with:
+          buildkite_api_token: ${{ secrets.BUILDKITE_API_TOKEN_RETRY }}
+          buildkite_organization_slug: 'julialang'
+          buildkite_pipeline_slug: 'julia-master'
+          pr_number: ${{ env.PULL_REQUEST_NUMBER }}
diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
new file mode 100644
index 0000000000000..16c07f0f040cc
--- /dev/null
+++ b/.github/workflows/statuses.yml
@@ -0,0 +1,66 @@
+# Please ping @DilumAluthge when making any changes to this file.
+
+# This is just a short-term solution until we have migrated all of CI to Buildkite.
+#
+# 1. TODO: delete this file once we have migrated all of CI to Buildkite.
+
+# Here are some steps that we take in this workflow file for security reasons:
+# 1. We do not checkout any code.
+# 2. We do not run any external actions.
+# 3. We only give the `GITHUB_TOKEN` the minimum necessary set of permissions.
+
+name: Create Buildbot Statuses
+
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release-*'
+  # When using the `pull_request_target` event, all PRs will get a `GITHUB_TOKEN` that has
+  # write permissions, even if the PR is from a fork.
+  # Therefore, for security reasons, we do not checkout any code in this workflow.
+  pull_request_target:
+    types: [opened, synchronize]
+    branches:
+      - 'master'
+      - 'release-*'
+
+# These are the permissions for the `GITHUB_TOKEN`.
+# We should only give the token the minimum necessary set of permissions.
+permissions:
+  statuses: write
+
+jobs:
+  create-buildbot-statuses:
+    name: Create Buildbot Statuses
+    runs-on: ubuntu-latest
+    if: github.repository == 'JuliaLang/julia'
+    steps:
+      # For security reasons, we do not checkout any code in this workflow.
+      - run: echo "SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
+        if: github.event_name == 'pull_request_target'
+      - run: echo "SHA=${{ github.sha }}" >> $GITHUB_ENV
+        if: github.event_name != 'pull_request_target'
+      - run: echo "The SHA is ${{ env.SHA }}"
+
+      # As we incrementally migrate individual jobs from Buildbot to Buildkite, we should
+      # remove them from the `context_list`.
+      - run: |
+          declare -a CONTEXT_LIST=(
+                "buildbot/tester_freebsd64"
+                "buildbot/tester_macos64"
+                "buildbot/tester_win32"
+                "buildbot/tester_win64"
+                )
+          for CONTEXT in "${CONTEXT_LIST[@]}"
+          do
+            curl \
+              -X POST \
+              -H "Authorization: token $GITHUB_TOKEN" \
+              -H "Accept: application/vnd.github.v3+json" \
+              -d "{\"context\": \"$CONTEXT\", \"state\": \"$STATE\"}" \
+            https://api.github.com/repos/JuliaLang/julia/statuses/${{ env.SHA }}
+          done
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          STATE: "pending"
diff --git a/.gitignore b/.gitignore
index 2c5ee63bc3ee9..ca14ec31874d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@
 *.so
 *.dylib
 *.dSYM
+*.h.gen
 *.jl.cov
 *.jl.*.cov
 *.jl.mem
@@ -32,3 +33,9 @@
 .DS_Store
 .idea/*
 .vscode/*
+
+# Buildkite: cryptic plugin
+# Ignore the unencrypted repo_key
+repo_key
+# Ignore any agent keys (public or private) we have stored
+agent_key*
diff --git a/.mailmap b/.mailmap
index bcb3c842a7605..204c08bb26b61 100644
--- a/.mailmap
+++ b/.mailmap
@@ -257,3 +257,6 @@ Curtis Vogt <curtis.vogt@gmail.com> <curtis.vogt@invenia.ca>
 
 Rafael Fourquet <fourquet.rafael@gmail.com> <fourquet.rafael@gmail.com>
 Rafael Fourquet <fourquet.rafael@gmail.com> <fourquet.rafael+github@gmail.com>
+
+Nathan Daly <NHDaly@gmail.com> <NHDaly@gmail.com>
+Nathan Daly <NHDaly@gmail.com> <nhDaly@gmail.com>
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000000000..a25d61b69d849
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,40 @@
+cff-version: 1.2.0
+message: "Cite this paper whenever you use Julia"
+authors:
+- family-names: "Bezanson"
+  given-names: "Jeff"
+- family-names: "Edelman"
+  given-names: "Alan"
+- family-names: "Karpinski"
+  given-names: "Stefan"
+- family-names: "Shah"
+  given-names: "Viral B."
+title: "Julia: A fresh approach to numerical computing"
+version: "v1"
+license: "MIT"
+doi: "10.1137/141000671"
+date-released: 2017-02-07
+url: "https://julialang.org"
+preferred-citation:
+  authors:
+    - family-names: "Bezanson"
+      given-names: "Jeff"
+    - family-names: "Edelman"
+      given-names: "Alan"
+    - family-names: "Karpinski"
+      given-names: "Stefan"
+    - family-names: "Shah"
+      given-names: "Viral B."
+  doi: "10.1137/141000671"
+  journal: "SIAM Review"
+  month: 9
+  start: 65
+  end: 98
+  pages: 33
+  title: "Julia: A fresh approach to numerical computing"
+  type: article
+  volume: 59
+  issue: 1
+  year: 2017
+  publisher:
+    - name: "SIAM"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 83d9bdd48f3a7..2c924b2cdabb9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -158,7 +158,9 @@ Examples written within docstrings can be used as testcases known as "doctests"
     "DOCSTRING TEST"
     ```
 
-A doctest needs to match an interactive REPL including the `julia>` prompt. To run doctests you need to run `make -C doc doctest=true` from the root directory. It is recommended to add the header `# Examples` above the doctests.
+A doctest needs to match an interactive REPL including the `julia>` prompt. It is recommended to add the header `# Examples` above the doctests.
+
+To run doctests you need to run `make -C doc doctest=true` from the root directory. You can use `make -C doc doctest=true revise=true` if you are modifying the doctests and don't want to rebuild Julia after each change (see details below about the Revise.jl workflow).
 
 #### News-worthy changes
 
@@ -246,6 +248,51 @@ process before running the corresponding test. This can be useful as a shortcut
 on the command line (since tests aren't always designed to be run outside the
 runtest harness).
 
+### Contributing to patch releases
+
+The process of creating a patch release is roughly as follows:
+
+1. Create a new branch (e.g. `backports-release-1.6`) against the relevant minor release
+   branch (e.g. `release-1.6`). Usually a corresponding pull request is created as well.
+
+2. Add commits, nominally from `master` (hence "backports"), to that branch.
+   See below for more information on this process.
+
+3. Run the [BaseBenchmarks.jl](https://github.com/JuliaCI/BaseBenchmarks.jl) benchmark
+   suite and [PkgEval.jl](https://github.com/JuliaCI/PkgEval.jl) package ecosystem
+   exerciser against that branch. Nominally BaseBenchmarks.jl and PkgEval.jl are
+   invoked via [Nanosoldier.jl](https://github.com/JuliaCI/Nanosoldier.jl) from
+   the pull request associated with the backports branch. Fix any issues.
+
+4. Once all test and benchmark reports look good, merge the backports branch into
+   the corresponding release branch (e.g. merge `backports-release-1.6` into
+   `release-1.6`).
+
+5. Open a pull request that bumps the version of the relevant minor release to the
+   next patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37718).
+
+6. Ping `@JuliaLang/releases` to tag the patch release and update the website.
+
+7. Open a pull request that bumps the version of the relevant minor release to the
+   next prerelase patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37724).
+
+Step 2 above, i.e. backporting commits to the `backports-release-X.Y` branch, has largely
+been automated via [`Backporter`](https://github.com/KristofferC/Backporter): Backporter
+searches for merged pull requests with the relevant `backport-X.Y` tag, and attempts to
+cherry-pick the commits from those pull requests onto the `backports-release-X.Y` branch.
+Some commits apply successfully without intervention, others not so much. The latter
+commits require "manual" backporting, with which help is generally much appreciated.
+Backporter generates a report identifying those commits it managed to backport automatically
+and those that require manual backporting; this report is usually copied into the first
+post of the pull request associated with `backports-release-X.Y` and maintained as
+additional commits are automatically and/or manually backported.
+
+When contributing a manual backport, if you have the necessary permissions, please push the
+backport directly to the `backports-release-X.Y` branch. If you lack the relevant
+permissions, please open a pull request against the `backports-release-X.Y` branch with the
+manual backport. Once the manual backport is live on the `backports-release-X.Y` branch,
+please remove the `backport-X.Y` tag from the originating pull request for the commits.
+
 ### Code Formatting Guidelines
 
 #### General Formatting Guidelines for Julia code contributions
@@ -287,6 +334,11 @@ runtest harness).
    - To remove whitespace relative to the `master` branch, run
      `git rebase --whitespace=fix master`.
 
+#### Git Recommendations For Pull Request Reviewers
+
+- When merging, we generally like `squash+merge`. Unless it is the rare case of a PR with carefully staged individual commits that you want in the history separately, in which case `merge` is acceptable, but usually prefer `squash+merge`.
+
+
 ## Resources
 
 * Julia
@@ -297,7 +349,7 @@ runtest harness).
   - **Code coverage:** <https://coveralls.io/r/JuliaLang/julia>
 
 * Design of Julia
-  - [Julia: A Fresh Approach to Numerical Computing](https://julialang.org/research/julia-fresh-approach-BEKS.pdf)
+  - [Julia: A Fresh Approach to Numerical Computing](https://julialang.org/assets/research/julia-fresh-approach-BEKS.pdf)
   - [Julia: Dynamism and Performance Reconciled by Design](http://janvitek.org/pubs/oopsla18b.pdf)
   - [All Julia Publications](https://julialang.org/research)
 
diff --git a/HISTORY.md b/HISTORY.md
index b5ebc03bbf1d9..74d15b48d1b28 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -37,7 +37,7 @@ Language changes
 * The default random number generator has changed, so all random numbers will be different (even with the
   same seed) unless an explicit RNG object is used.
   See the section on the `Random` standard library below ([#40546]).
-* `Iterators.peel(itr)` now returns `nothing` when `itr` is empty instead of throwing a `BoundsError` ([#39569]).
+* `Iterators.peel(itr)` now returns `nothing` when `itr` is empty instead of throwing a `BoundsError` ([#39607]).
 * Multiple successive semicolons in an array expresion were previously ignored (e.g., `[1 ;; 2] == [1 ; 2]`).
   This syntax is now used to separate dimensions (see **New language features**).
 
@@ -75,7 +75,7 @@ New library functions
 ---------------------
 
 * Two argument methods `findmax(f, domain)`, `argmax(f, domain)` and the corresponding
-  `min` versions ([#27613]).
+  `min` versions ([#35316]).
 * `isunordered(x)` returns true if `x` is a value that is normally unordered, such as
   `NaN` or `missing` ([#35316]).
 * New `keepat!(vector, inds)` function which is the inplace equivalent of `vector[inds]`
@@ -143,6 +143,8 @@ Standard library changes
 * `replace(::String)` now accepts multiple patterns, which will be applied left-to-right simultaneously,
   so only one pattern will be applied to any character, and the patterns will only be applied to the input
   text, not the replacements ([#40484]).
+* New `replace` methods to replace elements of a `Tuple` ([#38216]).
+
 
 #### Package Manager
 
@@ -296,7 +298,6 @@ Tooling Improvements
 
 
 <!--- generated by NEWS-update.jl: -->
-[#27613]: https://github.com/JuliaLang/julia/issues/27613
 [#29901]: https://github.com/JuliaLang/julia/issues/29901
 [#30676]: https://github.com/JuliaLang/julia/issues/30676
 [#31829]: https://github.com/JuliaLang/julia/issues/31829
@@ -311,6 +312,7 @@ Tooling Improvements
 [#37971]: https://github.com/JuliaLang/julia/issues/37971
 [#37978]: https://github.com/JuliaLang/julia/issues/37978
 [#38041]: https://github.com/JuliaLang/julia/issues/38041
+[#38216]: https://github.com/JuliaLang/julia/issues/38216
 [#38379]: https://github.com/JuliaLang/julia/issues/38379
 [#38438]: https://github.com/JuliaLang/julia/issues/38438
 [#38574]: https://github.com/JuliaLang/julia/issues/38574
@@ -333,9 +335,9 @@ Tooling Improvements
 [#39436]: https://github.com/JuliaLang/julia/issues/39436
 [#39455]: https://github.com/JuliaLang/julia/issues/39455
 [#39463]: https://github.com/JuliaLang/julia/issues/39463
-[#39569]: https://github.com/JuliaLang/julia/issues/39569
 [#39588]: https://github.com/JuliaLang/julia/issues/39588
 [#39594]: https://github.com/JuliaLang/julia/issues/39594
+[#39607]: https://github.com/JuliaLang/julia/issues/39607
 [#39710]: https://github.com/JuliaLang/julia/issues/39710
 [#39758]: https://github.com/JuliaLang/julia/issues/39758
 [#39794]: https://github.com/JuliaLang/julia/issues/39794
diff --git a/LICENSE.md b/LICENSE.md
index 79127224d049b..1083622cdc2eb 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -23,4 +23,4 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 end of terms and conditions
 
-Please see THIRDPARTY.md for license information for other software used in this project.
+Please see [THIRDPARTY.md](./THIRDPARTY.md) for license information for other software used in this project.
diff --git a/Make.inc b/Make.inc
index 19c9629e0423e..154faa029abd6 100644
--- a/Make.inc
+++ b/Make.inc
@@ -41,6 +41,7 @@ USE_SYSTEM_LIBM:=0
 USE_SYSTEM_OPENLIBM:=0
 UNTRUSTED_SYSTEM_LIBM:=0
 USE_SYSTEM_DSFMT:=0
+USE_SYSTEM_LIBBLASTRAMPOLINE:=0
 USE_SYSTEM_BLAS:=0
 USE_SYSTEM_LAPACK:=0
 USE_SYSTEM_GMP:=0
@@ -54,6 +55,7 @@ USE_SYSTEM_NGHTTP2:=0
 USE_SYSTEM_CURL:=0
 USE_SYSTEM_LIBGIT2:=0
 USE_SYSTEM_PATCHELF:=0
+USE_SYSTEM_LIBWHICH:=0
 USE_SYSTEM_ZLIB:=0
 USE_SYSTEM_P7ZIP:=0
 
@@ -75,6 +77,9 @@ HAVE_SSP := 0
 WITH_GC_VERIFY := 0
 WITH_GC_DEBUG_ENV := 0
 
+# Enable DTrace support
+WITH_DTRACE := 0
+
 # Prevent picking up $ARCH from the environment variables
 ARCH:=
 
@@ -87,6 +92,10 @@ endef
 COMMA:=,
 SPACE:=$(eval) $(eval)
 
+# force a sane / stable configuration
+export LC_ALL=C
+export LANG=C
+
 # We need python for things like BB triplet recognition and relative path computation.
 # We don't really care about version, generally, so just find something that works:
 PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo "{python|python3|python2} not found")"
@@ -328,6 +337,7 @@ BUILD_LLVM_CLANG := 0
 # set to 1 to get lldb (often does not work, no chance with llvm3.2 and earlier)
 # see http://lldb.llvm.org/build.html for dependencies
 BUILD_LLDB := 0
+BUILD_LIBCXX := 0
 
 # Options to enable Polly and its code-generation options
 USE_POLLY := 0
@@ -440,32 +450,14 @@ endif
 
 # Compiler specific stuff
 
-ifeq ($(USEMSVC), 1)
-USEGCC := 0
-USECLANG := 0
-USEICC := 0
-else
-USEMSVC := 0
 ifeq ($(USECLANG), 1)
 USEGCC := 0
-USEICC := 0
-else
-ifeq ($(USEICC), 1)
-USEGCC := 0
-USECLANG := 0
 else  # default to gcc
 USEGCC := 1
 USECLANG := 0
-USEICC := 0
-endif
-endif
 endif
 
-ifeq ($(USEIFC), 1)
-FC := ifort
-else
 FC := $(CROSS_COMPILE)gfortran
-endif
 
 ifeq ($(OS), FreeBSD)
 USEGCC := 0
@@ -521,19 +513,6 @@ JCPPFLAGS += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
 endif
 endif
 
-ifeq ($(USEICC),1)
-ifeq ($(SANITIZE),1)
-$(error Sanitizers only supported with clang. Try setting SANITIZE=0)
-endif
-CC  := icc
-CXX := icpc
-JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -fp-model precise -fp-model except -no-ftz
-JCPPFLAGS :=
-JCXXFLAGS := -pipe $(fPIC) -fno-rtti -std=c++14
-DEBUGFLAGS := -O0 -g -DJL_DEBUG_BUILD -fstack-protector
-SHIPFLAGS := -O3 -g -falign-functions
-endif
-
 ifeq ($(USECCACHE), 1)
 # Expand CC, CXX and FC here already because we want the original definition and not the ccache version.
 CC_ARG   := $(CC)
@@ -569,29 +548,14 @@ FC_ARG := $(shell echo $(FC) | cut -s -d' ' -f2-)
 endif
 
 JFFLAGS := -O2 $(fPIC)
-ifneq ($(USEMSVC),1)
 CPP := $(CC) -E
 AR := $(CROSS_COMPILE)ar
 AS := $(CROSS_COMPILE)as
 LD := $(CROSS_COMPILE)ld
-else #USEMSVC
-CPP := $(CC) -EP
-AR := lib
-ifeq ($(ARCH),x86_64)
-AS := ml64
-else
-AS := ml
-endif #ARCH
-LD := link
-endif #USEMSVC
 RANLIB := $(CROSS_COMPILE)ranlib
 OBJCOPY := $(CROSS_COMPILE)objcopy
 
-ifneq ($(USEMSVC), 1)
 CPP_STDOUT := $(CPP) -P
-else
-CPP_STDOUT := $(CPP) -E
-endif
 
 # file extensions
 ifeq ($(OS), WINNT)
@@ -626,7 +590,7 @@ define SONAME_FLAGS
 endef
 endif
 
-ifeq ($(OS)$(USEMSVC),WINNT0)
+ifeq ($(OS),WINNT)
 define IMPLIB_FLAGS
   -Wl,--out-implib,$(build_libdir)/$(notdir $1).a
 endef
@@ -755,6 +719,13 @@ JCXXFLAGS += -DGC_DEBUG_ENV
 JCFLAGS += -DGC_DEBUG_ENV
 endif
 
+ifeq ($(WITH_DTRACE), 1)
+JCXXFLAGS += -DUSE_DTRACE
+JCFLAGS += -DUSE_DTRACE
+DTRACE := dtrace
+else
+endif
+
 # ===========================================================================
 
 # Select the cpu architecture to target, or automatically detects the user's compiler
@@ -920,6 +891,7 @@ OPENBLAS_DYNAMIC_ARCH:=0
 OPENBLAS_TARGET_ARCH:=ARMV8
 USE_BLAS64:=1
 BINARY:=64
+HAVE_SSP:=1
 ifeq ($(OS),Darwin)
 # Apple Chips are all at least A12Z
 MCPU:=apple-a12
@@ -969,7 +941,6 @@ JCXXFLAGS += -D_FILE_OFFSET_BITS=64
 endif
 
 # Set some ARCH-specific flags
-ifneq ($(USEICC),1)
 ifeq ($(ISX86),1)
 CC += -m$(BINARY)
 CXX += -m$(BINARY)
@@ -978,7 +949,6 @@ CC_ARG += -m$(BINARY)
 CXX_ARG += -m$(BINARY)
 FC_ARG += -m$(BINARY)
 endif
-endif
 
 ifeq ($(OS),WINNT)
 ifneq ($(ARCH),x86_64)
@@ -1059,6 +1029,12 @@ else
 PATCHELF := $(build_depsbindir)/patchelf
 endif
 
+ifeq ($(USE_SYSTEM_LIBWHICH), 1)
+LIBWHICH := libwhich
+else
+LIBWHICH := $(build_depsbindir)/libwhich
+endif
+
 # On aarch64 and powerpc64le, we assume the page size is 64K.  Our binutils linkers
 # and such already assume this, but `patchelf` seems to be behind the times.  We
 # explicitly tell it to use this large page size so that when we rewrite rpaths and
@@ -1168,7 +1144,10 @@ BB_TRIPLET := $(subst $(SPACE),-,$(filter-out cxx%,$(filter-out libgfortran%,$(s
 LIBGFORTRAN_VERSION := $(subst libgfortran,,$(filter libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN))))
 
 # This is the set of projects that BinaryBuilder dependencies are hooked up for.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP CSL
+# Note: we explicitly _do not_ define `CSL` here, since it requires some more
+# advanced techniques to decide whether it should be installed from a BB source
+# or not.  See `deps/csl.mk` for more detail.
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1225,19 +1204,19 @@ else ifeq ($(OS), Darwin)
   RPATH := -Wl,-rpath,'@executable_path/$(build_libdir_rel)'
   RPATH_ORIGIN := -Wl,-rpath,'@loader_path/'
   RPATH_ESCAPED_ORIGIN := $(RPATH_ORIGIN)
-  RPATH_LIB := -Wl,-rpath,'@loader_path/julia/' -Wl,-rpath,'@loader_path/'
+  RPATH_LIB := -Wl,-rpath,'@loader_path/'
 else
   RPATH := -Wl,-rpath,'$$ORIGIN/$(build_libdir_rel)' -Wl,-rpath,'$$ORIGIN/$(build_private_libdir_rel)' -Wl,-rpath-link,$(build_shlibdir) -Wl,-z,origin
   RPATH_ORIGIN := -Wl,-rpath,'$$ORIGIN' -Wl,-z,origin
   RPATH_ESCAPED_ORIGIN := -Wl,-rpath,'\$$\$$ORIGIN' -Wl,-z,origin -Wl,-rpath-link,$(build_shlibdir)
-  RPATH_LIB := -Wl,-rpath,'$$ORIGIN/julia' -Wl,-rpath,'$$ORIGIN' -Wl,-z,origin
+  RPATH_LIB := -Wl,-rpath,'$$ORIGIN/' -Wl,-z,origin
 endif
 
 # --whole-archive
 ifeq ($(OS), Darwin)
   WHOLE_ARCHIVE := -Xlinker -all_load
   NO_WHOLE_ARCHIVE :=
-else ifneq ($(USEMSVC), 1)
+else
   WHOLE_ARCHIVE := -Wl,--whole-archive
   NO_WHOLE_ARCHIVE := -Wl,--no-whole-archive
 endif
@@ -1292,18 +1271,13 @@ JLIBLDFLAGS := -Wl,-compatibility_version,$(SOMAJOR) -Wl,-current_version,$(JULI
 endif
 
 ifeq ($(OS), WINNT)
-ifneq ($(USEMSVC), 1)
 HAVE_SSP := 1
 OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \
-	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32
+	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic
 JLDFLAGS := -Wl,--stack,8388608
 ifeq ($(ARCH),i686)
 JLDFLAGS += -Wl,--large-address-aware
 endif
-else #USEMSVC
-OSLIBS += kernel32.lib ws2_32.lib psapi.lib advapi32.lib iphlpapi.lib shell32.lib winmm.lib userenv.lib secur32.lib
-JLDFLAGS := -stack:8388608
-endif
 JCPPFLAGS += -D_WIN32_WINNT=0x0502
 UNTRUSTED_SYSTEM_LIBM := 1
 endif
@@ -1353,6 +1327,7 @@ endif
 
 # Custom libcxx
 ifeq ($(BUILD_CUSTOM_LIBCXX),1)
+$(error BUILD_CUSTOM_LIBCXX is currently not supported, BUILD_LIBCXX will provide LIBCXX but not link it)
 LDFLAGS += -L$(build_libdir)
 CXXLDFLAGS += -L$(build_libdir) -lc++abi -lc++
 ifeq ($(USECLANG),1)
@@ -1363,11 +1338,6 @@ $(error BUILD_CUSTOM_LIBCXX is currently only supported with Clang. Try setting
 endif
 endif # Clang
 CUSTOM_LD_LIBRARY_PATH := LD_LIBRARY_PATH="$(build_libdir)"
-ifeq ($(USEICC),1)
-CXXFLAGS += -cxxlib-nostd -static-intel
-CLDFLAGS += -static-intel
-LDFLAGS += -cxxlib-nostd -static-intel
-endif
 endif
 
 # Some special restrictions on BB usage:
@@ -1481,6 +1451,12 @@ LIBJULIAINTERNAL_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlib
 LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
 LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
 
+LIBJULIACODEGEN_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
+
+LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
+
 ifeq ($(OS),WINNT)
 ifeq ($(BINARY),32)
 LIBGCC_NAME := libgcc_s_sjlj-1.$(SHLIB_EXT)
@@ -1520,16 +1496,19 @@ LIBM_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBMN
 # We list:
 #  * libgcc_s, because FreeBSD needs to load ours, not the system one.
 #  * libopenlibm, because Windows has an untrustworthy libm, and we want to use ours more than theirs
-#  * libjulia, which must always come last.
+#  * libjulia-internal, which must always come second-to-last.
+#  * libjulia-codegen, which must always come last
 #
 # We need these four separate variables because:
 #  * debug builds must link against libjuliadebug, not libjulia
 #  * install time relative paths are not equal to build time relative paths (../lib vs. ../lib/julia)
 # That second point will no longer be true for most deps once they are placed within Artifacts directories.
-LOADER_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):$(LIBJULIAINTERNAL_BUILD_DEPLIB)
-LOADER_DEBUG_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):$(LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB)
-LOADER_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):$(LIBJULIAINTERNAL_INSTALL_DEPLIB)
-LOADER_DEBUG_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):$(LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB)
+# Note that we prefix `libjulia-codegen` and `libjulia-internal` with `@` to signify to the loader that it
+# should not automatically dlopen() it in its loading loop.
+LOADER_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):@$(LIBJULIAINTERNAL_BUILD_DEPLIB):@$(LIBJULIACODEGEN_BUILD_DEPLIB):
+LOADER_DEBUG_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):@$(LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB):@$(LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB):
+LOADER_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):@$(LIBJULIAINTERNAL_INSTALL_DEPLIB):@$(LIBJULIACODEGEN_INSTALL_DEPLIB):
+LOADER_DEBUG_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):@$(LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB):@$(LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB):
 
 # Colors for make
 ifndef VERBOSE
@@ -1548,6 +1527,7 @@ LINKCOLOR:="\033[34;1m"
 PERLCOLOR:="\033[35m"
 FLISPCOLOR:="\033[32m"
 JULIACOLOR:="\033[32;1m"
+DTRACECOLOR:="\033[32;1m"
 
 SRCCOLOR:="\033[33m"
 BINCOLOR:="\033[37;1m"
@@ -1561,6 +1541,7 @@ PRINT_LINK = printf '    %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$(GOAL)
 PRINT_PERL = printf '    %b %b\n' $(PERLCOLOR)PERL$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 PRINT_FLISP = printf '    %b %b\n' $(FLISPCOLOR)FLISP$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 PRINT_JULIA = printf '    %b %b\n' $(JULIACOLOR)JULIA$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
+PRINT_DTRACE = printf '    %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 
 else
 QUIET_MAKE =
@@ -1570,6 +1551,7 @@ PRINT_LINK = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1)
+PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1)
 
 endif
 
diff --git a/Makefile b/Makefile
index 51bb09b094f1c..3452310148fcf 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ all: debug release
 # sort is used to remove potential duplicates
 DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/stdlib $(build_man1dir))
 ifneq ($(BUILDROOT),$(JULIAHOME))
-BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/embedding test/llvmpasses)
+BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/llvmpasses)
 BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk
 DIRS := $(DIRS) $(BUILDDIRS)
 $(BUILDDIRMAKE): | $(BUILDDIRS)
@@ -165,13 +165,14 @@ JL_TARGETS += julia-debug
 endif
 
 # private libraries, that are installed in $(prefix)/lib/julia
-JL_PRIVATE_LIBS-0 := libccalltest libllvmcalltest libjulia-internal libblastrampoline
+JL_PRIVATE_LIBS-0 := libccalltest libllvmcalltest libjulia-internal libjulia-codegen
 ifeq ($(BUNDLE_DEBUG_LIBS),1)
-JL_PRIVATE_LIBS-0 += libjulia-internal-debug
+JL_PRIVATE_LIBS-0 += libjulia-internal-debug libjulia-codegen-debug
 endif
 ifeq ($(USE_GPL_LIBS), 1)
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
 endif
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBBLASTRAMPOLINE) += libblastrampoline
 JL_PRIVATE_LIBS-$(USE_SYSTEM_PCRE) += libpcre2-8
 JL_PRIVATE_LIBS-$(USE_SYSTEM_DSFMT) += libdSFMT
 JL_PRIVATE_LIBS-$(USE_SYSTEM_GMP) += libgmp libgmpxx
@@ -455,8 +456,10 @@ endif
 	-ls stdlib/srccache/*.tar.gz >> light-source-dist.tmp
 	-ls stdlib/*/StdlibArtifacts.toml >> light-source-dist.tmp
 
-	# Exclude git, github and CI config files
-	git ls-files | sed -E -e '/^\..+/d' -e '/\/\..+/d' -e '/appveyor.yml/d' >> light-source-dist.tmp
+	# Include all git-tracked filenames
+	git ls-files >> light-source-dist.tmp
+	
+	# Include documentation filenames
 	find doc/_build/html >> light-source-dist.tmp
 
 # Make tarball with only Julia code + stdlib tarballs
@@ -573,3 +576,6 @@ endif
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
+
+print-locale:
+	@locale
diff --git a/NEWS.md b/NEWS.md
index c6feadc7e2f3c..3ca8f0aa540c4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,21 +5,46 @@ Julia v1.8 Release Notes
 New language features
 ---------------------
 
-* `Module(:name, false, false)` can be used to create a `module` that does not import `Core`. ([#40110])
-* `@inline` and `@noinline` annotations may now be used in function bodies. ([#41312])
+* `Module(:name, false, false)` can be used to create a `module` that contains no names (it does not import `Base` or `Core` and does not contain a reference to itself). ([#40110, #42154])
+* `@inline` and `@noinline` annotations can be used within a function body to give an extra
+  hint about the inlining cost to the compiler. ([#41312])
+* `@inline` and `@noinline` annotations can now be applied to a function callsite or block
+  to enforce the involved function calls to be (or not to be) inlined. ([#41312])
 * The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551])
+* New function `eachsplit(str)` for iteratively performing `split(str)`.
+* `∀`, `∃`, and `∄` are now allowed as identifier characters ([#42314]).
+* `try`-blocks can now optionally have an `else`-block which is executed right after the main body only if
+  no errors were thrown. ([#42211])
 
 Language changes
 ----------------
 
+* Newly created Task objects (`@spawn`, `@async`, etc.) now adopt the world-age for methods from their parent
+  Task upon creation, instead of using the global latest world at start. This is done to enable inference to
+  eventually optimize these calls. Places that wish for the old behavior may use `Base.invokelatest`. ([#41449])
+* `@time` and `@timev` now take an optional description to allow annotating the source of time reports.
+  i.e. `@time "Evaluating foo" foo()` ([#42431])
+* New `@showtime` macro to show both the line being evaluated and the `@time` report ([#42431])
+* Iterating an `Iterators.Reverse` now falls back on reversing the eachindex interator, if possible ([#43110]).
+* Unbalanced Unicode bidirectional formatting directives are now disallowed within strings and comments,
+  to mitigate the ["trojan source"](https://www.trojansource.codes) vulnerability ([#42918]).
 
 Compiler/Runtime improvements
 -----------------------------
 
+* The LLVM-based compiler has been separated from the run-time library into a new library,
+  `libjulia-codegen`. It is loaded by default, so normal usage should see no changes.
+  In deployments that do not need the compiler (e.g. system images where all needed code
+  is precompiled), this library (and its LLVM dependency) can simply be excluded ([#41936]).
 
 Command-line option changes
 ---------------------------
 
+* New option `--strip-metadata` to remove docstrings, source location information, and local
+  variable names when building a system image ([#42513]).
+* New option `--strip-ir` to remove the compiler's IR (intermediate representation) of source
+  code when building a system image. The resulting image will only work if `--compile=all` is
+  used, or if all needed code is precompiled ([#42925]).
 
 Multi-threading changes
 -----------------------
@@ -32,22 +57,40 @@ Build system changes
 New library functions
 ---------------------
 
+* `hardlink(src, dst)` can be used to create hard links. ([#41639])
+* `diskstat(path=pwd())` can be used to return statistics about the disk. ([#42248])
 
 New library features
 --------------------
 
+* `@test_throws "some message" triggers_error()` can now be used to check whether the displayed error text
+  contains "some message" regardless of the specific exception type.
+  Regular expressions, lists of strings, and matching functions are also supported. ([#41888])
+* `@testset foo()` can now be used to create a test set from a given function. The name of the test set
+  is the name of the called function. The called function can contain `@test` and other `@testset`
+  definitions, including to other function calls, while recording all intermediate test results. ([#42518])
+* Keys with value `nothing` are now removed from the environment in `addenv` ([#43271]).
 
 Standard library changes
 ------------------------
 
+* `range` accepts either `stop` or `length` as a sole keyword argument ([#39241])
+* `precision` and `setprecision` now accept a `base` keyword ([#42428]).
 * The `length` function on certain ranges of certain specific element types no longer checks for integer
   overflow in most cases. The new function `checked_length` is now available, which will try to use checked
   arithmetic to error if the result may be wrapping. Or use a package such as SaferIntegers.jl when
   constructing the range. ([#40382])
+* TCP socket objects now expose `closewrite` functionality and support half-open mode usage ([#40783]).
+* Intersect returns a result with the eltype of the type-promoted eltypes of the two inputs ([#41769]).
+* `Iterators.countfrom` now accepts any type that defines `+`. ([#37747])
+
+#### InteractiveUtils
+* A new macro `@time_imports` for reporting any time spent importing packages and their dependencies ([#41612])
 
 #### Package Manager
 
 #### LinearAlgebra
+* The BLAS submodule now supports the level-2 BLAS subroutine `spr!` ([#42830]).
 
 * `cholesky[!]` now supports `LinearAlgebra.PivotingStrategy` (singleton type) values
   as its optional `pivot` argument: the default is `cholesky(A, NoPivot())` (vs.
@@ -58,9 +101,25 @@ Standard library changes
 #### Printf
 * Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]).
 
+#### Profile
+* Profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
+  grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering.
+  Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at
+  each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external
+  profiling data consumers, but can be included with the `include_meta` kwarg. ([#41742])
+
 #### Random
 
 #### REPL
+* `RadioMenu` now supports optional `keybindings` to directly select options ([#41576]).
+* ` ?(x, y` followed by TAB displays all methods that can be called
+  with arguments `x, y, ...`. (The space at the beginning prevents entering help-mode.)
+  `MyModule.?(x, y` limits the search to `MyModule`. TAB requires that at least one
+  argument have a type more specific than `Any`; use SHIFT-TAB instead of TAB
+  to allow any compatible methods.
+
+* New `err` global variable in `Main` set when an expression throws an exception, akin to `ans`. Typing `err` reprints
+  the exception information.
 
 #### SparseArrays
 
@@ -86,6 +145,14 @@ Standard library changes
 * The standard log levels `BelowMinLevel`, `Debug`, `Info`, `Warn`, `Error`,
   and `AboveMaxLevel` are now exported from the Logging stdlib ([#40980]).
 
+#### Unicode
+* Added function `isequal_normalized` to check for Unicode equivalence without
+  explicitly constructing normalized strings ([#42493]).
+* The `Unicode.normalize` function now accepts a `chartransform` keyword that can
+  be used to supply custom character mappings, and a `Unicode.julia_chartransform`
+  function is provided to reproduce the mapping used in identifier normalization
+  by the Julia parser ([#42561]).
+
 
 Deprecated or removed
 ---------------------
diff --git a/README.md b/README.md
index 1021f58331087..b48c7092df848 100644
--- a/README.md
+++ b/README.md
@@ -6,18 +6,30 @@
 </div>
 
 Documentation:
-[![version 1][docs-img]](https://docs.julialang.org)
+[![Documentation][docs-img]][docs-url]
 
-Code coverage:
-[![coveralls][coveralls-img]](https://coveralls.io/r/JuliaLang/julia?branch=master)
-[![codecov][codecov-img]](https://codecov.io/github/JuliaLang/julia?branch=master)
+[docs-img]: https://img.shields.io/badge/docs-v1-blue.svg "Documentation (version 1)"
+[docs-url]: https://docs.julialang.org
+
+Continuous integration:
+[![Continuous integration (master)][buildkite-master-img]][buildkite-master-url]
 
-Continuous integration: [![Build status](https://badge.buildkite.com/f28e0d28b345f9fad5856ce6a8d64fffc7c70df8f4f2685cd8.svg?branch=master)](https://buildkite.com/julialang/julia)
+<!--
+To change the badge to point to a different pipeline, it is not sufficient to simply change the `?branch=` part.
+You need to go to the Buildkite website and get the SVG URL for the correct pipeline.
+-->
+[buildkite-master-img]: https://badge.buildkite.com/f28e0d28b345f9fad5856ce6a8d64fffc7c70df8f4f2685cd8.svg?branch=master "Continuous integration (master)"
+[buildkite-master-url]: https://buildkite.com/julialang/julia-master
+
+Code coverage:
+[![Code coverage (Coveralls)][coveralls-img]][coveralls-url]
+[![Code coverage (Codecov)][codecov-img]][codecov-url]
 
-[docs-img]: https://img.shields.io/badge/docs-v1-blue.svg
-[coveralls-img]: https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls
-[codecov-img]: https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov
+[coveralls-img]: https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls "Code coverage (Coveralls)"
+[coveralls-url]: https://coveralls.io/r/JuliaLang/julia?branch=master
 
+[codecov-img]: https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov "Code coverage (Codecov)"
+[codecov-url]: https://codecov.io/github/JuliaLang/julia?branch=master
 
 ## The Julia Language
 
@@ -71,17 +83,17 @@ recommend you use the official Julia binaries instead.
 ## Building Julia
 
 First, make sure you have all the [required
-dependencies](https://github.com/JuliaLang/julia/blob/master/doc/build/build.md#required-build-tools-and-external-libraries) installed.
+dependencies](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/build.md#required-build-tools-and-external-libraries) installed.
 Then, acquire the source code by cloning the git repository:
 
     git clone https://github.com/JuliaLang/julia.git
 
 By default you will be building the latest unstable version of
-Julia. However, most users should use the most recent stable version
+Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by changing to the Julia directory
 and running:
 
-    git checkout v1.6.1
+    git checkout v1.7.0
 
 Now run `make` to build the `julia` executable.
 
@@ -103,8 +115,8 @@ You can read about [getting
 started](https://docs.julialang.org/en/v1/manual/getting-started/)
 in the manual.
 
-In case this default build path did not work, detailed build instructions
-are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/build).
+Detailed build instructions, should they be necessary,
+are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/).
 
 ### Uninstalling Julia
 
@@ -122,10 +134,9 @@ The Julia source code is organized as follows:
 | `base/`           | source code for the Base module (part of Julia's standard library) |
 | `stdlib/`         | source code for other standard library packages                    |
 | `cli/`            | source for the command line interface/REPL                         |
-| `contrib/`        | editor support for Julia source, miscellaneous scripts             |
+| `contrib/`        | miscellaneous scripts                                              |
 | `deps/`           | external dependencies                                              |
-| `doc/src/manual/` | source for the user manual                                         |
-| `doc/build/`      | detailed notes for building Julia                                  |
+| `doc/src/`        | source for the user manual                                         |
 | `src/`            | source for Julia language core                                     |
 | `test/`           | test suites                                                        |
 | `usr/`            | binaries and shared libraries loaded by Julia's standard libraries |
@@ -146,7 +157,8 @@ Support for editing Julia is available for many
 [Sublime Text](https://github.com/JuliaEditorSupport/Julia-sublime), and many
 others.
 
-Supported IDEs include: [julia-vscode](https://github.com/JuliaEditorSupport/julia-vscode) (VS
-Code plugin), [Juno](http://junolab.org/) (Atom plugin). [Jupyter](https://jupyter.org/)
-notebooks are available through the [IJulia](https://github.com/JuliaLang/IJulia.jl) package, and
-[Pluto](https://github.com/fonsp/Pluto.jl) notebooks through the Pluto.jl package.
+For users who prefer IDEs, we recommend using VS Code with the
+[julia-vscode](https://www.julia-vscode.org/) plugin.
+For notebook users, [Jupyter](https://jupyter.org/) notebook support is available through the
+[IJulia](https://github.com/JuliaLang/IJulia.jl) package, and
+the [Pluto.jl](https://github.com/fonsp/Pluto.jl) package provides Pluto notebooks.
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
index 87304437183d6..9112c3e6278ad 100644
--- a/THIRDPARTY.md
+++ b/THIRDPARTY.md
@@ -1,51 +1,51 @@
-The Julia language is licensed under the MIT License (see `LICENSE.md`). The "language" consists
+The Julia language is licensed under the MIT License (see [LICENSE.md](./LICENSE.md) ). The "language" consists
 of the compiler (the contents of src/), most of the standard library (base/),
 and some utilities (most of the rest of the files in this repository). See below
 for exceptions.
 
 - [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
 - [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
-- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/jitlayers.cpp and src/disasm.cpp) [BSD-3, effectively]
-- [MUSL](https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT) (for getopt implementation on Windows) [MIT]
+- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC]
 - [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
 - [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
-- [Python](https://docs.python.org/3/license.html) (for strtod and joinpath implementation on Windows) [BSD-3, effectively]
-- [Google Benchmark](https://github.com/google/benchmark) (for cyclecount implementation) [Apache 2.0]
+- [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF]
+- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
 
 The following components included in Julia `Base` have their own separate licenses:
 
 - base/ryu/* [Boost] (see [ryu](https://github.com/ulfjack/ryu/blob/master/LICENSE-Boost))
-- base/grisu/* [BSD-3] (see [double-conversion](https://github.com/google/double-conversion/blob/master/LICENSE))
-- base/special/{exp,rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
+- base/special/{rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
 
 The Julia language links to the following external libraries, which have their
 own licenses:
 
-- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
-- [LIBUNWIND](https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=blob_plain;f=LICENSE;hb=master) [MIT]
-- [LIBUV](https://github.com/joyent/libuv/blob/master/LICENSE) [MIT]
-- [LLVM](https://releases.llvm.org/6.0.0/LICENSE.TXT) [BSD-3, effectively]
+- [LIBUNWIND](https://github.com/libunwind/libunwind/blob/master/LICENSE) [MIT]
+- [LIBUV](https://github.com/JuliaLang/libuv/blob/julia-uv2-1.39.0/LICENSE) [MIT]
+- [LLVM](https://releases.llvm.org/6.0.0/LICENSE.TXT) [UIUC]
 - [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
 
 Julia's `stdlib` uses the following external libraries, which have their own licenses:
 
-- [DSFMT](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/LICENSE.txt) [BSD-3]
+- [DSFMT](https://github.com/MersenneTwister-Lab/dSFMT/blob/master/LICENSE.txt) [BSD-3]
 - [OPENLIBM](https://github.com/JuliaMath/openlibm/blob/master/LICENSE.md) [MIT, BSD-2, ISC]
 - [GMP](https://gmplib.org/manual/Copying.html#Copying) [LGPL3+ or GPL2+]
 - [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
 - [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
 - [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
-- [MBEDTLS](https://tls.mbed.org/how-to-get) [either GPLv2 or Apache 2.0]
+- [MBEDTLS](https://github.com/ARMmbed/mbedtls/blob/development/LICENSE) [Apache 2.0]
 - [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
 - [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
 - [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
 - [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
-- [SUITESPARSE](http://suitesparse.com) [mix of LGPL2+ and GPL2+; see individual module licenses]
+- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of LGPL2+ and GPL2+; see individual module licenses]
+- [LIBBLASTRAMPOLINE](https://github.com/staticfloat/libblastrampoline/blob/main/LICENSE) [MIT]
+- [NGHTTP2](https://github.com/nghttp2/nghttp2/blob/master/COPYING) [MIT]
 
 Julia's build process uses the following external tools:
 
 - [PATCHELF](https://nixos.org/patchelf.html)
 - [OBJCONV](https://www.agner.org/optimize/#objconv)
+- [LIBWHICH](https://github.com/vtjnash/libwhich/blob/master/LICENSE) [MIT]
 
 Julia bundles the following external programs and libraries:
 
diff --git a/base/Base.jl b/base/Base.jl
index 42a506479326b..91094f94ec213 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -20,46 +20,42 @@ include(path::String) = include(Base, path)
 const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
 
-# The real @inline macro is not available until after array.jl, so this
-# internal macro splices the meta Expr directly into the function body.
-macro _inline_meta()
-    Expr(:meta, :inline)
-end
-macro _noinline_meta()
-    Expr(:meta, :noinline)
-end
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
 
 # Try to help prevent users from shooting them-selves in the foot
 # with ambiguities by defining a few common and critical operations
 # (and these don't need the extra convert code)
-getproperty(x::Module, f::Symbol) = (@_inline_meta; getfield(x, f))
+getproperty(x::Module, f::Symbol) = (@inline; getfield(x, f))
 setproperty!(x::Module, f::Symbol, v) = setfield!(x, f, v) # to get a decent error
-getproperty(x::Type, f::Symbol) = (@_inline_meta; getfield(x, f))
+getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
 setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
-getproperty(x::Tuple, f::Int) = (@_inline_meta; getfield(x, f))
+getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
 setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
 
-getproperty(x, f::Symbol) = (@_inline_meta; getfield(x, f))
+getproperty(x, f::Symbol) = (@inline; getfield(x, f))
 setproperty!(x, f::Symbol, v) = setfield!(x, f, convert(fieldtype(typeof(x), f), v))
 
 dotgetproperty(x, f) = getproperty(x, f)
 
-getproperty(x::Module, f::Symbol, order::Symbol) = (@_inline_meta; getfield(x, f, order))
+getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
 setproperty!(x::Module, f::Symbol, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
-getproperty(x::Type, f::Symbol, order::Symbol) = (@_inline_meta; getfield(x, f, order))
+getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
 setproperty!(x::Type, f::Symbol, v, order::Symbol) = error("setfield! fields of Types should not be changed")
-getproperty(x::Tuple, f::Int, order::Symbol) = (@_inline_meta; getfield(x, f, order))
+getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
 setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
 
-getproperty(x, f::Symbol, order::Symbol) = (@_inline_meta; getfield(x, f, order))
-setproperty!(x, f::Symbol, v, order::Symbol) = (@_inline_meta; setfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x, f::Symbol, v, order::Symbol) = (@inline; setfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
 
 swapproperty!(x, f::Symbol, v, order::Symbol=:notatomic) =
-    (@_inline_meta; Core.swapfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+    (@inline; Core.swapfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
 modifyproperty!(x, f::Symbol, op, v, order::Symbol=:notatomic) =
-    (@_inline_meta; Core.modifyfield!(x, f, op, v, order))
+    (@inline; Core.modifyfield!(x, f, op, v, order))
 replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:notatomic, fail_order::Symbol=success_order) =
-    (@_inline_meta; Core.replacefield!(x, f, expected, convert(fieldtype(typeof(x), f), desired), success_order, fail_order))
+    (@inline; Core.replacefield!(x, f, expected, convert(fieldtype(typeof(x), f), desired), success_order, fail_order))
 
 
 include("coreio.jl")
@@ -107,6 +103,9 @@ include("options.jl")
 include("promotion.jl")
 include("tuple.jl")
 include("expr.jl")
+Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B} = (@inline; Pair{A, B}(convert(A, a)::A, convert(B, b)::B))
+#Pair{Any, B}(@nospecialize(a::Any), b) where {B} = (@inline; Pair{Any, B}(a, Base.convert(B, b)::B))
+#Pair{A, Any}(a, @nospecialize(b::Any)) where {A} = (@inline; Pair{A, Any}(Base.convert(A, a)::A, b))
 include("pair.jl")
 include("traits.jl")
 include("range.jl")
@@ -213,8 +212,6 @@ let os = ccall(:jl_get_UNAME, Any, ())
     if os === :Darwin || os === :Apple
         if Base.DARWIN_FRAMEWORK
             push!(DL_LOAD_PATH, "@loader_path/Frameworks")
-        else
-            push!(DL_LOAD_PATH, "@loader_path/julia")
         end
         push!(DL_LOAD_PATH, "@loader_path")
     end
@@ -317,7 +314,7 @@ let SOURCE_PATH = ""
 end
 
 # reduction along dims
-include("reducedim.jl")  # macros in this file relies on string.jl
+include("reducedim.jl")  # macros in this file rely on string.jl
 include("accumulate.jl")
 
 include("permuteddimsarray.jl")
@@ -462,19 +459,6 @@ end
 
 if is_primary_base_module
 function __init__()
-    # try to ensuremake sure OpenBLAS does not set CPU affinity (#1070, #9639)
-    if !haskey(ENV, "OPENBLAS_MAIN_FREE") && !haskey(ENV, "GOTOBLAS_MAIN_FREE")
-        ENV["OPENBLAS_MAIN_FREE"] = "1"
-    end
-    # And try to prevent openblas from starting too many threads, unless/until specifically requested
-    if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
-        cpu_threads = Sys.CPU_THREADS::Int
-        if cpu_threads > 8 # always at most 8
-            ENV["OPENBLAS_NUM_THREADS"] = "8"
-        elseif haskey(ENV, "JULIA_CPU_THREADS") # or exactly as specified
-            ENV["OPENBLAS_NUM_THREADS"] = cpu_threads
-        end # otherwise, trust that openblas will pick CPU_THREADS anyways, without any intervention
-    end
     # for the few uses of Libc.rand in Base:
     Libc.srand()
     # Base library init
diff --git a/base/Makefile b/base/Makefile
index 56e1cbebf21bf..82954b85c348e 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -164,7 +164,7 @@ endif
 
 define symlink_system_library
 libname_$2 := $$(notdir $(call versioned_libname,$2,$3))
-libpath_$2 := $$(shell $$(call spawn,$$(build_depsbindir)/libwhich) -p $$(libname_$2) 2>/dev/null)
+libpath_$2 := $$(shell $$(call spawn,$$(LIBWHICH)) -p $$(libname_$2) 2>/dev/null)
 symlink_$2: $$(build_private_libdir)/$$(libname_$2)
 $$(build_private_libdir)/$$(libname_$2):
 	@if [ -e "$$(libpath_$2)" ]; then \
@@ -185,6 +185,19 @@ SYMLINK_SYSTEM_LIBRARIES += symlink_$2
 endif
 endef
 
+# libexec executables
+symlink_p7zip: $(build_bindir)/7z$(EXE)
+
+ifneq ($(USE_SYSTEM_P7ZIP),0)
+SYMLINK_SYSTEM_LIBRARIES += symlink_p7zip
+7Z_PATH := $(shell which 7z$(EXE))
+endif
+
+$(build_bindir)/7z$(EXE):
+	[ -e "$(7Z_PATH)" ] && \
+	([ ! -e "$@" ] || rm "$@") && \
+	ln -svf "$(7Z_PATH)" "$@"
+
 # the following excludes: libuv.a, libutf8proc.a
 
 ifneq ($(USE_SYSTEM_LIBM),0)
@@ -205,6 +218,7 @@ $(eval $(call symlink_system_library,CSL,libatomic,1,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,CSL,libgomp,1,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,PCRE,libpcre2-8))
 $(eval $(call symlink_system_library,DSFMT,libdSFMT))
+$(eval $(call symlink_system_library,LIBBLASTRAMPOLINE,libblastrampoline))
 $(eval $(call symlink_system_library,BLAS,$(LIBBLASNAME)))
 ifneq ($(LIBLAPACKNAME),$(LIBBLASNAME))
 $(eval $(call symlink_system_library,LAPACK,$(LIBLAPACKNAME)))
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index d5d47fe855bd5..2733b52222e37 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -70,7 +70,7 @@ ix[(begin+1):end]  # works for generalized indexes
 ```
 """
 function axes(A::AbstractArray{T,N}, d) where {T,N}
-    @_inline_meta
+    @inline
     d::Integer <= N ? axes(A)[d] : OneTo(1)
 end
 
@@ -91,7 +91,7 @@ julia> axes(A)
 ```
 """
 function axes(A)
-    @_inline_meta
+    @inline
     map(oneto, size(A))
 end
 
@@ -113,7 +113,7 @@ require_one_based_indexing(A...) = !has_offset_axes(A...) || throw(ArgumentError
 # for d=1. 1d arrays are heavily used, and the first dimension comes up
 # in other applications.
 axes1(A::AbstractArray{<:Any,0}) = OneTo(1)
-axes1(A::AbstractArray) = (@_inline_meta; axes(A)[1])
+axes1(A::AbstractArray) = (@inline; axes(A)[1])
 axes1(iter) = oneto(length(iter))
 
 """
@@ -212,6 +212,12 @@ eltype(::Type{<:AbstractArray{E}}) where {E} = @isdefined(E) ? E : Any
 Compute the memory stride in bytes between consecutive elements of `eltype`
 stored inside the given `type`, if the array elements are stored densely with a
 uniform linear stride.
+
+# Examples
+```jldoctest
+julia> Base.elsize(rand(Float32, 10))
+4
+```
 """
 elsize(A::AbstractArray) = elsize(typeof(A))
 
@@ -270,13 +276,13 @@ julia> length([1 2; 3 4])
 4
 ```
 """
-length(t::AbstractArray) = (@_inline_meta; prod(size(t)))
+length(t::AbstractArray) = (@inline; prod(size(t)))
 
 # `eachindex` is mostly an optimization of `keys`
 eachindex(itrs...) = keys(itrs...)
 
 # eachindex iterates over all indices. IndexCartesian definitions are later.
-eachindex(A::AbstractVector) = (@_inline_meta(); axes1(A))
+eachindex(A::AbstractVector) = (@inline(); axes1(A))
 
 
 @noinline function throw_eachindex_mismatch_indices(::IndexLinear, inds...)
@@ -300,7 +306,7 @@ If you supply more than one `AbstractArray` argument, `eachindex` will create an
 iterable object that is fast for all arguments (a [`UnitRange`](@ref)
 if all inputs have fast linear indexing, a [`CartesianIndices`](@ref)
 otherwise).
-If the arrays have different sizes and/or dimensionalities, a DimensionMismatch exception
+If the arrays have different sizes and/or dimensionalities, a `DimensionMismatch` exception
 will be thrown.
 # Examples
 ```jldoctest
@@ -321,27 +327,27 @@ CartesianIndex(1, 1)
 CartesianIndex(2, 1)
 ```
 """
-eachindex(A::AbstractArray) = (@_inline_meta(); eachindex(IndexStyle(A), A))
+eachindex(A::AbstractArray) = (@inline(); eachindex(IndexStyle(A), A))
 
 function eachindex(A::AbstractArray, B::AbstractArray)
-    @_inline_meta
+    @inline
     eachindex(IndexStyle(A,B), A, B)
 end
 function eachindex(A::AbstractArray, B::AbstractArray...)
-    @_inline_meta
+    @inline
     eachindex(IndexStyle(A,B...), A, B...)
 end
-eachindex(::IndexLinear, A::AbstractArray) = (@_inline_meta; oneto(length(A)))
-eachindex(::IndexLinear, A::AbstractVector) = (@_inline_meta; axes1(A))
+eachindex(::IndexLinear, A::AbstractArray) = (@inline; oneto(length(A)))
+eachindex(::IndexLinear, A::AbstractVector) = (@inline; axes1(A))
 function eachindex(::IndexLinear, A::AbstractArray, B::AbstractArray...)
-    @_inline_meta
+    @inline
     indsA = eachindex(IndexLinear(), A)
     _all_match_first(X->eachindex(IndexLinear(), X), indsA, B...) ||
         throw_eachindex_mismatch_indices(IndexLinear(), eachindex(A), eachindex.(B)...)
     indsA
 end
 function _all_match_first(f::F, inds, A, B...) where F<:Function
-    @_inline_meta
+    @inline
     (inds == f(A)) & _all_match_first(f, inds, B...)
 end
 _all_match_first(f::F, inds) where F<:Function = true
@@ -369,8 +375,8 @@ julia> lastindex(rand(3,4,5), 2)
 4
 ```
 """
-lastindex(a::AbstractArray) = (@_inline_meta; last(eachindex(IndexLinear(), a)))
-lastindex(a, d) = (@_inline_meta; last(axes(a, d)))
+lastindex(a::AbstractArray) = (@inline; last(eachindex(IndexLinear(), a)))
+lastindex(a, d) = (@inline; last(axes(a, d)))
 
 """
     firstindex(collection) -> Integer
@@ -392,8 +398,8 @@ julia> firstindex(rand(3,4,5), 2)
 1
 ```
 """
-firstindex(a::AbstractArray) = (@_inline_meta; first(eachindex(IndexLinear(), a)))
-firstindex(a, d) = (@_inline_meta; first(axes(a, d)))
+firstindex(a::AbstractArray) = (@inline; first(eachindex(IndexLinear(), a)))
+firstindex(a, d) = (@inline; first(axes(a, d)))
 
 first(a::AbstractArray) = a[first(eachindex(a))]
 
@@ -583,7 +589,7 @@ function trailingsize(inds::Indices, n)
 end
 # This version is type-stable even if inds is heterogeneous
 function trailingsize(inds::Indices)
-    @_inline_meta
+    @inline
     prod(map(length, inds))
 end
 
@@ -631,18 +637,18 @@ false
 ```
 """
 function checkbounds(::Type{Bool}, A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     checkbounds_indices(Bool, axes(A), I)
 end
 
 # Linear indexing is explicitly allowed when there is only one (non-cartesian) index
 function checkbounds(::Type{Bool}, A::AbstractArray, i)
-    @_inline_meta
+    @inline
     checkindex(Bool, eachindex(IndexLinear(), A), i)
 end
 # As a special extension, allow using logical arrays that match the source array exactly
 function checkbounds(::Type{Bool}, A::AbstractArray{<:Any,N}, I::AbstractArray{Bool,N}) where N
-    @_inline_meta
+    @inline
     axes(A) == axes(I)
 end
 
@@ -652,7 +658,7 @@ end
 Throw an error if the specified indices `I` are not in bounds for the given array `A`.
 """
 function checkbounds(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     checkbounds(Bool, A, I...) || throw_boundserror(A, I)
     nothing
 end
@@ -678,17 +684,17 @@ of `IA`.
 See also [`checkbounds`](@ref).
 """
 function checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple)
-    @_inline_meta
+    @inline
     checkindex(Bool, IA[1], I[1])::Bool & checkbounds_indices(Bool, tail(IA), tail(I))
 end
 function checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple)
-    @_inline_meta
+    @inline
     checkindex(Bool, OneTo(1), I[1])::Bool & checkbounds_indices(Bool, (), tail(I))
 end
-checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@_inline_meta; all(x->length(x)==1, IA))
+checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, IA))
 checkbounds_indices(::Type{Bool}, ::Tuple{}, ::Tuple{}) = true
 
-throw_boundserror(A, I) = (@_noinline_meta; throw(BoundsError(A, I)))
+throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
 
 # check along a single dimension
 """
@@ -722,7 +728,7 @@ end
 checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractVector{Bool}) = indx == axes1(I)
 checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractArray{Bool}) = false
 function checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractArray)
-    @_inline_meta
+    @inline
     b = true
     for i in I
         b &= checkindex(Bool, inds, i)
@@ -752,7 +758,7 @@ neither mutable nor support 2 dimensions:
 
 ```julia-repl
 julia> similar(1:10, 1, 4)
-1×4 Array{Int64,2}:
+1×4 Matrix{Int64}:
  4419743872  4374413872  4419743888  0
 ```
 
@@ -771,7 +777,7 @@ different element type it will create a regular `Array` instead:
 
 ```julia-repl
 julia> similar(falses(10), Float64, 2, 4)
-2×4 Array{Float64,2}:
+2×4 Matrix{Float64}:
  2.18425e-314  2.18425e-314  2.18425e-314  2.18425e-314
  2.18425e-314  2.18425e-314  2.18425e-314  2.18425e-314
 ```
@@ -1163,7 +1169,7 @@ end
 # convenience in cases that work.
 pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, x)
 function pointer(x::AbstractArray{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     unsafe_convert(Ptr{T}, x) + Int(_memory_offset(x, i))::Int
 end
 
@@ -1221,7 +1227,7 @@ end
 @propagate_inbounds getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
 
 function unsafe_getindex(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     @inbounds r = getindex(A, I...)
     r
 end
@@ -1240,7 +1246,7 @@ _getindex(::IndexStyle, A::AbstractArray, I...) =
 _getindex(::IndexLinear, A::AbstractVector, i::Int) = (@_propagate_inbounds_meta; getindex(A, i))  # ambiguity resolution in case packages specialize this (to be avoided if at all possible, but see Interpolations.jl)
 _getindex(::IndexLinear, A::AbstractArray, i::Int) = (@_propagate_inbounds_meta; getindex(A, i))
 function _getindex(::IndexLinear, A::AbstractArray, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...) # generally _to_linear_index requires bounds checking
     @inbounds r = getindex(A, _to_linear_index(A, I...))
     r
@@ -1248,11 +1254,11 @@ end
 _to_linear_index(A::AbstractArray, i::Integer) = i
 _to_linear_index(A::AbstractVector, i::Integer, I::Integer...) = i
 _to_linear_index(A::AbstractArray) = first(LinearIndices(A))
-_to_linear_index(A::AbstractArray, I::Integer...) = (@_inline_meta; _sub2ind(A, I...))
+_to_linear_index(A::AbstractArray, I::Integer...) = (@inline; _sub2ind(A, I...))
 
 ## IndexCartesian Scalar indexing: Canonical method is full dimensionality of Ints
 function _getindex(::IndexCartesian, A::AbstractArray, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...) # generally _to_subscript_indices requires bounds checking
     @inbounds r = getindex(A, _to_subscript_indices(A, I...)...)
     r
@@ -1261,13 +1267,13 @@ function _getindex(::IndexCartesian, A::AbstractArray{T,N}, I::Vararg{Int, N}) w
     @_propagate_inbounds_meta
     getindex(A, I...)
 end
-_to_subscript_indices(A::AbstractArray, i::Integer) = (@_inline_meta; _unsafe_ind2sub(A, i))
-_to_subscript_indices(A::AbstractArray{T,N}) where {T,N} = (@_inline_meta; fill_to_length((), 1, Val(N)))
+_to_subscript_indices(A::AbstractArray, i::Integer) = (@inline; _unsafe_ind2sub(A, i))
+_to_subscript_indices(A::AbstractArray{T,N}) where {T,N} = (@inline; fill_to_length((), 1, Val(N)))
 _to_subscript_indices(A::AbstractArray{T,0}) where {T} = ()
 _to_subscript_indices(A::AbstractArray{T,0}, i::Integer) where {T} = ()
 _to_subscript_indices(A::AbstractArray{T,0}, I::Integer...) where {T} = ()
 function _to_subscript_indices(A::AbstractArray{T,N}, I::Integer...) where {T,N}
-    @_inline_meta
+    @inline
     J, Jrem = IteratorsMD.split(I, Val(N))
     _to_subscript_indices(A, J, Jrem)
 end
@@ -1275,15 +1281,15 @@ _to_subscript_indices(A::AbstractArray, J::Tuple, Jrem::Tuple{}) =
     __to_subscript_indices(A, axes(A), J, Jrem)
 function __to_subscript_indices(A::AbstractArray,
         ::Tuple{AbstractUnitRange,Vararg{AbstractUnitRange}}, J::Tuple, Jrem::Tuple{})
-    @_inline_meta
+    @inline
     (J..., map(first, tail(_remaining_size(J, axes(A))))...)
 end
 _to_subscript_indices(A, J::Tuple, Jrem::Tuple) = J # already bounds-checked, safe to drop
 _to_subscript_indices(A::AbstractArray{T,N}, I::Vararg{Int,N}) where {T,N} = I
 _remaining_size(::Tuple{Any}, t::Tuple) = t
-_remaining_size(h::Tuple, t::Tuple) = (@_inline_meta; _remaining_size(tail(h), tail(t)))
+_remaining_size(h::Tuple, t::Tuple) = (@inline; _remaining_size(tail(h), tail(t)))
 _unsafe_ind2sub(::Tuple{}, i) = () # _ind2sub may throw(BoundsError()) in this case
-_unsafe_ind2sub(sz, i) = (@_inline_meta; _ind2sub(sz, i))
+_unsafe_ind2sub(sz, i) = (@inline; _ind2sub(sz, i))
 
 ## Setindex! is defined similarly. We first dispatch to an internal _setindex!
 # function that allows dispatch on array storage
@@ -1315,7 +1321,7 @@ function setindex!(A::AbstractArray, v, I...)
     _setindex!(IndexStyle(A), A, v, to_indices(A, I)...)
 end
 function unsafe_setindex!(A::AbstractArray, v, I...)
-    @_inline_meta
+    @inline
     @inbounds r = setindex!(A, v, I...)
     r
 end
@@ -1333,7 +1339,7 @@ _setindex!(::IndexStyle, A::AbstractArray, v, I...) =
 ## IndexLinear Scalar indexing
 _setindex!(::IndexLinear, A::AbstractArray, v, i::Int) = (@_propagate_inbounds_meta; setindex!(A, v, i))
 function _setindex!(::IndexLinear, A::AbstractArray, v, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_linear_index(A, I...))
     r
@@ -1345,7 +1351,7 @@ function _setindex!(::IndexCartesian, A::AbstractArray{T,N}, v, I::Vararg{Int, N
     setindex!(A, v, I...)
 end
 function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_subscript_indices(A, I...)...)
     r
@@ -1412,7 +1418,7 @@ much more common case where aliasing does not occur. By default,
 `Base.unaliascopy(A)`.
 """
 unaliascopy(A::Array) = copy(A)
-unaliascopy(A::AbstractArray)::typeof(A) = (@_noinline_meta; _unaliascopy(A, copy(A)))
+unaliascopy(A::AbstractArray)::typeof(A) = (@noinline; _unaliascopy(A, copy(A)))
 _unaliascopy(A::T, C::T) where {T} = C
 _unaliascopy(A, C) = throw(ArgumentError("""
     an array of type `$(typename(typeof(A)).wrapper)` shares memory with another argument
@@ -2550,12 +2556,12 @@ end
 # _sub2ind and _ind2sub
 # fallbacks
 function _sub2ind(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     _sub2ind(axes(A), I...)
 end
 
 function _ind2sub(A::AbstractArray, ind)
-    @_inline_meta
+    @inline
     _ind2sub(axes(A), ind)
 end
 
@@ -2563,25 +2569,25 @@ end
 _sub2ind(::Tuple{}) = 1
 _sub2ind(::DimsInteger) = 1
 _sub2ind(::Indices) = 1
-_sub2ind(::Tuple{}, I::Integer...) = (@_inline_meta; _sub2ind_recurse((), 1, 1, I...))
+_sub2ind(::Tuple{}, I::Integer...) = (@inline; _sub2ind_recurse((), 1, 1, I...))
 
 # Generic cases
-_sub2ind(dims::DimsInteger, I::Integer...) = (@_inline_meta; _sub2ind_recurse(dims, 1, 1, I...))
-_sub2ind(inds::Indices, I::Integer...) = (@_inline_meta; _sub2ind_recurse(inds, 1, 1, I...))
+_sub2ind(dims::DimsInteger, I::Integer...) = (@inline; _sub2ind_recurse(dims, 1, 1, I...))
+_sub2ind(inds::Indices, I::Integer...) = (@inline; _sub2ind_recurse(inds, 1, 1, I...))
 # In 1d, there's a question of whether we're doing cartesian indexing
 # or linear indexing. Support only the former.
 _sub2ind(inds::Indices{1}, I::Integer...) =
     throw(ArgumentError("Linear indexing is not defined for one-dimensional arrays"))
-_sub2ind(inds::Tuple{OneTo}, I::Integer...) = (@_inline_meta; _sub2ind_recurse(inds, 1, 1, I...)) # only OneTo is safe
+_sub2ind(inds::Tuple{OneTo}, I::Integer...) = (@inline; _sub2ind_recurse(inds, 1, 1, I...)) # only OneTo is safe
 _sub2ind(inds::Tuple{OneTo}, i::Integer)    = i
 
 _sub2ind_recurse(::Any, L, ind) = ind
 function _sub2ind_recurse(::Tuple{}, L, ind, i::Integer, I::Integer...)
-    @_inline_meta
+    @inline
     _sub2ind_recurse((), L, ind+(i-1)*L, I...)
 end
 function _sub2ind_recurse(inds, L, ind, i::Integer, I::Integer...)
-    @_inline_meta
+    @inline
     r1 = inds[1]
     _sub2ind_recurse(tail(inds), nextL(L, r1), ind+offsetin(i, r1)*L, I...)
 end
@@ -2592,20 +2598,20 @@ nextL(L, r::Slice) = L*length(r.indices)
 offsetin(i, l::Integer) = i-1
 offsetin(i, r::AbstractUnitRange) = i-first(r)
 
-_ind2sub(::Tuple{}, ind::Integer) = (@_inline_meta; ind == 1 ? () : throw(BoundsError()))
-_ind2sub(dims::DimsInteger, ind::Integer) = (@_inline_meta; _ind2sub_recurse(dims, ind-1))
-_ind2sub(inds::Indices, ind::Integer)     = (@_inline_meta; _ind2sub_recurse(inds, ind-1))
+_ind2sub(::Tuple{}, ind::Integer) = (@inline; ind == 1 ? () : throw(BoundsError()))
+_ind2sub(dims::DimsInteger, ind::Integer) = (@inline; _ind2sub_recurse(dims, ind-1))
+_ind2sub(inds::Indices, ind::Integer)     = (@inline; _ind2sub_recurse(inds, ind-1))
 _ind2sub(inds::Indices{1}, ind::Integer) =
     throw(ArgumentError("Linear indexing is not defined for one-dimensional arrays"))
 _ind2sub(inds::Tuple{OneTo}, ind::Integer) = (ind,)
 
 _ind2sub_recurse(::Tuple{}, ind) = (ind+1,)
 function _ind2sub_recurse(indslast::NTuple{1}, ind)
-    @_inline_meta
+    @inline
     (_lookup(ind, indslast[1]),)
 end
 function _ind2sub_recurse(inds, ind)
-    @_inline_meta
+    @inline
     r1 = inds[1]
     indnext, f, l = _div(ind, r1)
     (ind-l*indnext+f, _ind2sub_recurse(tail(inds), indnext)...)
@@ -2636,7 +2642,7 @@ function _sub2ind_vecs(inds, I::AbstractVector...)
 end
 
 function _sub2ind!(Iout, inds, Iinds, I)
-    @_noinline_meta
+    @noinline
     for i in Iinds
         # Iout[i] = _sub2ind(inds, map(Ij -> Ij[i], I)...)
         Iout[i] = sub2ind_vec(inds, i, I)
@@ -2644,8 +2650,8 @@ function _sub2ind!(Iout, inds, Iinds, I)
     Iout
 end
 
-sub2ind_vec(inds, i, I) = (@_inline_meta; _sub2ind(inds, _sub2ind_vec(i, I...)...))
-_sub2ind_vec(i, I1, I...) = (@_inline_meta; (I1[i], _sub2ind_vec(i, I...)...))
+sub2ind_vec(inds, i, I) = (@inline; _sub2ind(inds, _sub2ind_vec(i, I...)...))
+_sub2ind_vec(i, I1, I...) = (@inline; (I1[i], _sub2ind_vec(i, I...)...))
 _sub2ind_vec(i) = ()
 
 function _ind2sub(inds::Union{DimsInteger{N},Indices{N}}, ind::AbstractVector{<:Integer}) where N
@@ -3067,29 +3073,9 @@ end
 
 ## keepat! ##
 
-"""
-    keepat!(a::AbstractVector, inds)
-
-Remove the items at all the indices which are not given by `inds`,
-and return the modified `a`.
-Items which are kept are shifted to fill the resulting gaps.
+# NOTE: since these use `@inbounds`, they are actually only intended for Vector and BitVector
 
-`inds` must be an iterator of sorted and unique integer indices.
-See also [`deleteat!`](@ref).
-
-!!! compat "Julia 1.7"
-    This function is available as of Julia 1.7.
-
-# Examples
-```jldoctest
-julia> keepat!([6, 5, 4, 3, 2, 1], 1:2:5)
-3-element Vector{Int64}:
- 6
- 4
- 2
-```
-"""
-function keepat!(a::AbstractVector, inds)
+function _keepat!(a::AbstractVector, inds)
     local prev
     i = firstindex(a)
     for k in inds
@@ -3106,3 +3092,29 @@ function keepat!(a::AbstractVector, inds)
     deleteat!(a, i:lastindex(a))
     return a
 end
+
+function _keepat!(a::AbstractVector, m::AbstractVector{Bool})
+    length(m) == length(a) || throw(BoundsError(a, m))
+    j = firstindex(a)
+    for i in eachindex(a, m)
+        @inbounds begin
+            if m[i]
+                i == j || (a[j] = a[i])
+                j = nextind(a, j)
+            end
+        end
+    end
+    deleteat!(a, j:lastindex(a))
+end
+
+## 1-d circshift ##
+function circshift!(a::AbstractVector, shift::Integer)
+    n = length(a)
+    n == 0 && return
+    shift = mod(shift, n)
+    shift == 0 && return
+    reverse!(a, 1, shift)
+    reverse!(a, shift+1, length(a))
+    reverse!(a)
+    return a
+end
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index 4dd24214a63fc..9690fc0f2e4c4 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -95,11 +95,127 @@ _dropdims(A::AbstractArray, dim::Integer) = _dropdims(A, (Int(dim),))
 
 ## Unary operators ##
 
-conj(x::AbstractArray{<:Real}) = x
+"""
+    conj!(A)
+
+Transform an array to its complex conjugate in-place.
+
+See also [`conj`](@ref).
+
+# Examples
+```jldoctest
+julia> A = [1+im 2-im; 2+2im 3+im]
+2×2 Matrix{Complex{Int64}}:
+ 1+1im  2-1im
+ 2+2im  3+1im
+
+julia> conj!(A);
+
+julia> A
+2×2 Matrix{Complex{Int64}}:
+ 1-1im  2+1im
+ 2-2im  3-1im
+```
+"""
+conj!(A::AbstractArray{<:Number}) = (@inbounds broadcast!(conj, A, A); A)
 conj!(x::AbstractArray{<:Real}) = x
 
-real(x::AbstractArray{<:Real}) = x
-imag(x::AbstractArray{<:Real}) = zero(x)
+"""
+    conj(A::AbstractArray)
+
+Return an array containing the complex conjugate of each entry in array `A`.
+
+Equivalent to `conj.(A)`, except that when `eltype(A) <: Real`
+`A` is returned without copying, and that when `A` has zero dimensions,
+a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> conj([1, 2im, 3 + 4im])
+3-element Vector{Complex{Int64}}:
+ 1 + 0im
+ 0 - 2im
+ 3 - 4im
+
+julia> conj(fill(2 - im))
+0-dimensional Array{Complex{Int64}, 0}:
+2 + 1im
+```
+"""
+conj(A::AbstractArray) = broadcast_preserving_zero_d(conj, A)
+conj(A::AbstractArray{<:Real}) = A
+
+"""
+    real(A::AbstractArray)
+
+Return an array containing the real part of each entry in array `A`.
+
+Equivalent to `real.(A)`, except that when `eltype(A) <: Real`
+`A` is returned without copying, and that when `A` has zero dimensions,
+a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> real([1, 2im, 3 + 4im])
+3-element Vector{Int64}:
+ 1
+ 0
+ 3
+
+julia> real(fill(2 - im))
+0-dimensional Array{Int64, 0}:
+2
+```
+"""
+real(A::AbstractArray) = broadcast_preserving_zero_d(real, A)
+real(A::AbstractArray{<:Real}) = A
+
+"""
+    imag(A::AbstractArray)
+
+Return an array containing the imaginary part of each entry in array `A`.
+
+Equivalent to `imag.(A)`, except that when `A` has zero dimensions,
+a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> imag([1, 2im, 3 + 4im])
+3-element Vector{Int64}:
+ 0
+ 2
+ 4
+
+julia> imag(fill(2 - im))
+0-dimensional Array{Int64, 0}:
+-1
+```
+"""
+imag(A::AbstractArray) = broadcast_preserving_zero_d(imag, A)
+imag(A::AbstractArray{<:Real}) = zero(A)
+
+"""
+    reim(A::AbstractArray)
+
+Return a tuple of two arrays containing respectively the real and the imaginary
+part of each entry in `A`.
+
+Equivalent to `(real.(A), imag.(A))`, except that when `eltype(A) <: Real`
+`A` is returned without copying to represent the real part, and that when `A` has
+zero dimensions, a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> reim([1, 2im, 3 + 4im])
+([1, 0, 3], [0, 2, 4])
+
+julia> reim(fill(2 - im))
+(fill(2), fill(-1))
+```
+"""
+reim(A::AbstractArray)
+
+-(A::AbstractArray) = broadcast_preserving_zero_d(-, A)
 
 +(x::AbstractArray{<:Number}) = x
 *(x::AbstractArray{<:Number,2}) = x
@@ -385,7 +501,6 @@ function repeat_outer(arr::AbstractArray{<:Any,N}, dims::NTuple{N,Any}) where {N
 end
 
 function repeat_inner(arr, inner)
-    basedims = size(arr)
     outsize = map(*, size(arr), inner)
     out = similar(arr, outsize)
     for I in CartesianIndices(arr)
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 4304a13e71a66..74a73a9ad348a 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -14,7 +14,7 @@ end
 
 const secret_table_token = :__c782dbf1cf4d6a2e5e3865d7e95634f2e09b5902__
 
-haskey(d::AbstractDict, k) = in(k, keys(d))
+haskey(d, k) = in(k, keys(d))
 
 function in(p::Pair, a::AbstractDict, valcmp=(==))
     v = get(a, p.first, secret_table_token)
@@ -66,6 +66,8 @@ function iterate(v::Union{KeySet,ValueIterator}, state...)
     return (y[1][isa(v, KeySet) ? 1 : 2], y[2])
 end
 
+copy(v::KeySet) = copymutable(v)
+
 in(k, v::KeySet) = get(v.dict, k, secret_table_token) !== secret_table_token
 
 """
@@ -134,6 +136,38 @@ values(a::AbstractDict) = ValueIterator(a)
 Return an iterator over `key => value` pairs for any
 collection that maps a set of keys to a set of values.
 This includes arrays, where the keys are the array indices.
+
+# Examples
+```jldoctest
+julia> a = Dict(zip(["a", "b", "c"], [1, 2, 3]))
+Dict{String, Int64} with 3 entries:
+  "c" => 3
+  "b" => 2
+  "a" => 1
+
+julia> pairs(a)
+Dict{String, Int64} with 3 entries:
+  "c" => 3
+  "b" => 2
+  "a" => 1
+
+julia> foreach(println, pairs(["a", "b", "c"]))
+1 => "a"
+2 => "b"
+3 => "c"
+
+julia> (;a=1, b=2, c=3) |> pairs |> collect
+3-element Vector{Pair{Symbol, Int64}}:
+ :a => 1
+ :b => 2
+ :c => 3
+
+julia> (;a=1, b=2, c=3) |> collect
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
 """
 pairs(collection) = Generator(=>, keys(collection), values(collection))
 
diff --git a/base/abstractset.jl b/base/abstractset.jl
index 442bbcb4bff48..bec4a84b19d15 100644
--- a/base/abstractset.jl
+++ b/base/abstractset.jl
@@ -11,30 +11,36 @@ copy!(dst::AbstractSet, src::AbstractSet) = union!(empty!(dst), src)
     union(s, itrs...)
     ∪(s, itrs...)
 
-Construct the union of sets. Maintain order with arrays.
+Construct an object containing all distinct elements from all of the arguments.
 
-See also: [`intersect`](@ref), [`isdisjoint`](@ref), [`vcat`](@ref), [`Iterators.flatten`](@ref).
+The first argument controls what kind of container is returned.
+If this is an array, it maintains the order in which elements first appear.
+
+Unicode `∪` can be typed by writing `\\cup` then pressing tab in the Julia REPL, and in many editors.
+This is an infix operator, allowing `s ∪ itr`.
+
+See also [`unique`](@ref), [`intersect`](@ref), [`isdisjoint`](@ref), [`vcat`](@ref), [`Iterators.flatten`](@ref).
 
 # Examples
 ```jldoctest
-julia> union([1, 2], [3, 4])
-4-element Vector{Int64}:
+julia> union([1, 2], [3])
+3-element Vector{Int64}:
  1
  2
  3
- 4
 
-julia> union([1, 2], [2, 4])
-3-element Vector{Int64}:
- 1
- 2
- 4
+julia> union([4 2 3 4 4], 1:3, 3.0)
+4-element Vector{Float64}:
+ 4.0
+ 2.0
+ 3.0
+ 1.0
 
-julia> union([4, 2], 1:2)
-3-element Vector{Int64}:
- 4
- 2
- 1
+julia> (0, 0.0) ∪ (-0.0, NaN)
+3-element Vector{Real}:
+   0
+  -0.0
+ NaN
 
 julia> union(Set([1, 2]), 2:3)
 Set{Int64} with 3 elements:
@@ -45,8 +51,6 @@ Set{Int64} with 3 elements:
 """
 function union end
 
-_in(itr) = x -> x in itr
-
 union(s, sets...) = union!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
 union(s::AbstractSet) = copy(s)
 
@@ -55,14 +59,14 @@ const ∪ = union
 """
     union!(s::Union{AbstractSet,AbstractVector}, itrs...)
 
-Construct the union of passed in sets and overwrite `s` with the result.
+Construct the [`union`](@ref) of passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
 
 # Examples
 ```jldoctest
-julia> a = Set([1, 3, 4, 5]);
+julia> a = Set([3, 4, 5]);
 
-julia> union!(a, 1:2:8);
+julia> union!(a, 1:2:7);
 
 julia> a
 Set{Int64} with 5 elements:
@@ -104,10 +108,19 @@ end
     intersect(s, itrs...)
     ∩(s, itrs...)
 
-Construct the intersection of sets.
-Maintain order with arrays.
+Construct the set containing those elements which appear in all of the arguments.
+
+The first argument controls what kind of container is returned.
+If this is an array, it maintains the order in which elements first appear.
+
+Unicode `∩` can be typed by writing `\\cap` then pressing tab in the Julia REPL, and in many editors.
+This is an infix operator, allowing `s ∩ itr`.
+
+See also [`setdiff`](@ref), [`isdisjoint`](@ref), [`issubset`](@ref Base.issubset), [`issetequal`](@ref).
 
-See also: [`setdiff`](@ref), [`isdisjoint`](@ref), [`issubset`](@ref Base.issubset), [`issetequal`](@ref).
+!!! compat "Julia 1.8"
+    As of Julia 1.8 intersect returns a result with the eltype of the
+    type-promoted eltypes of the two inputs
 
 # Examples
 ```jldoctest
@@ -115,19 +128,49 @@ julia> intersect([1, 2, 3], [3, 4, 5])
 1-element Vector{Int64}:
  3
 
-julia> intersect([1, 4, 4, 5, 6], [4, 6, 6, 7, 8])
+julia> intersect([1, 4, 4, 5, 6], [6, 4, 6, 7, 8])
 2-element Vector{Int64}:
  4
  6
 
-julia> intersect(Set([1, 2]), BitSet([2, 3]))
-Set{Int64} with 1 element:
-  2
+julia> intersect(1:16, 7:99)
+7:16
+
+julia> (0, 0.0) ∩ (-0.0, 0)
+1-element Vector{Real}:
+ 0
+
+julia> intersect(Set([1, 2]), BitSet([2, 3]), 1.0:10.0)
+Set{Float64} with 1 element:
+  2.0
 ```
 """
-intersect(s::AbstractSet, itr, itrs...) = intersect!(intersect(s, itr), itrs...)
+function intersect(s::AbstractSet, itr, itrs...)
+    # heuristics to try to `intersect` with the shortest Set on the left
+    if length(s)>50 && haslength(itr) && all(haslength, itrs)
+        min_length, min_idx = findmin(length, itrs)
+        if length(itr) > min_length
+            new_itrs = setindex(itrs, itr, min_idx)
+            return intersect(s, itrs[min_idx], new_itrs...)
+        end
+    end
+    T = promote_eltype(s, itr, itrs...)
+    if T == promote_eltype(s, itr)
+        out = intersect(s, itr)
+    else
+        out = union!(emptymutable(s, T), s)
+        intersect!(out, itr)
+    end
+    return intersect!(out, itrs...)
+end
 intersect(s) = union(s)
-intersect(s::AbstractSet, itr) = mapfilter(_in(s), push!, itr, emptymutable(s))
+function intersect(s::AbstractSet, itr)
+    if haslength(itr) && hasfastin(itr) && length(s) < length(itr)
+        return mapfilter(in(itr), push!, s, emptymutable(s, promote_eltype(s, itr)))
+    else
+        return mapfilter(in(s), push!, itr, emptymutable(s, promote_eltype(s, itr)))
+    end
+end
 
 const ∩ = intersect
 
@@ -143,7 +186,7 @@ function intersect!(s::AbstractSet, itrs...)
     end
     return s
 end
-intersect!(s::AbstractSet, s2::AbstractSet) = filter!(_in(s2), s)
+intersect!(s::AbstractSet, s2::AbstractSet) = filter!(in(s2), s)
 intersect!(s::AbstractSet, itr) =
     intersect!(s, union!(emptymutable(s, eltype(itr)), itr))
 
@@ -276,21 +319,21 @@ issubset, ⊆, ⊇
 
 const FASTIN_SET_THRESHOLD = 70
 
-function issubset(l, r)
-    if haslength(r) && (isa(l, AbstractSet) || !hasfastin(r))
-        rlen = length(r) # conditions above make this length computed only when needed
-        # check l for too many unique elements
-        if isa(l, AbstractSet) && length(l) > rlen
+function issubset(a, b)
+    if haslength(b) && (isa(a, AbstractSet) || !hasfastin(b))
+        blen = length(b) # conditions above make this length computed only when needed
+        # check a for too many unique elements
+        if isa(a, AbstractSet) && length(a) > blen
             return false
         end
-        # when `in` would be too slow and r is big enough, convert it to a Set
+        # when `in` would be too slow and b is big enough, convert it to a Set
         # this threshold was empirically determined (cf. #26198)
-        if !hasfastin(r) && rlen > FASTIN_SET_THRESHOLD
-            return issubset(l, Set(r))
+        if !hasfastin(b) && blen > FASTIN_SET_THRESHOLD
+            return issubset(a, Set(b))
         end
     end
-    for elt in l
-        elt in r || return false
+    for elt in a
+        elt in b || return false
     end
     return true
 end
@@ -308,7 +351,7 @@ hasfastin(::Type) = false
 hasfastin(::Union{Type{<:AbstractSet},Type{<:AbstractDict},Type{<:AbstractRange}}) = true
 hasfastin(x) = hasfastin(typeof(x))
 
-⊇(l, r) = r ⊆ l
+⊇(a, b) = b ⊆ a
 
 ## strict subset comparison
 
@@ -333,9 +376,11 @@ false
 """
 ⊊, ⊋
 
-⊊(l::AbstractSet, r) = length(l) < length(r) && l ⊆ r
-⊊(l, r) = Set(l) ⊊ r
-⊋(l, r) = r ⊊ l
+⊊(a::AbstractSet, b::AbstractSet) = length(a) < length(b) && a ⊆ b
+⊊(a::AbstractSet, b) = a ⊊ Set(b)
+⊊(a, b::AbstractSet) = Set(a) ⊊ b
+⊊(a, b) = Set(a) ⊊ Set(b)
+⊋(a, b) = b ⊊ a
 
 function ⊈ end
 function ⊉ end
@@ -358,8 +403,8 @@ false
 """
 ⊈, ⊉
 
-⊈(l, r) = !⊆(l, r)
-⊉(l, r) = r ⊈ l
+⊈(a, b) = !⊆(a, b)
+⊉(a, b) = b ⊈ a
 
 ## set equality comparison
 
@@ -380,56 +425,65 @@ julia> issetequal([1, 2], [2, 1])
 true
 ```
 """
-issetequal(l::AbstractSet, r::AbstractSet) = l == r
-issetequal(l::AbstractSet, r) = issetequal(l, Set(r))
+issetequal(a::AbstractSet, b::AbstractSet) = a == b
+issetequal(a::AbstractSet, b) = issetequal(a, Set(b))
 
-function issetequal(l, r::AbstractSet)
-    if haslength(l)
-        # check r for too many unique elements
-        length(l) < length(r) && return false
+function issetequal(a, b::AbstractSet)
+    if haslength(a)
+        # check b for too many unique elements
+        length(a) < length(b) && return false
     end
-    return issetequal(Set(l), r)
+    return issetequal(Set(a), b)
 end
 
-function issetequal(l, r)
-    haslength(l) && return issetequal(l, Set(r))
-    haslength(r) && return issetequal(r, Set(l))
-    return issetequal(Set(l), Set(r))
+function issetequal(a, b)
+    haslength(a) && return issetequal(a, Set(b))
+    haslength(b) && return issetequal(b, Set(a))
+    return issetequal(Set(a), Set(b))
 end
 
 ## set disjoint comparison
 """
-    isdisjoint(v1, v2) -> Bool
+    isdisjoint(a, b) -> Bool
 
-Return whether the collections `v1` and `v2` are disjoint, i.e. whether
-their intersection is empty.
+Determine whether the collections `a` and `b` are disjoint.
+Equivalent to `isempty(a ∩ b)` but more efficient when possible.
 
-See also: [`issetequal`](@ref), [`intersect`](@ref).
+See also: [`intersect`](@ref), [`isempty`](@ref), [`issetequal`](@ref).
 
 !!! compat "Julia 1.5"
     This function requires at least Julia 1.5.
+
+# Examples
+```jldoctest
+julia> isdisjoint([1, 2], [2, 3, 4])
+false
+
+julia> isdisjoint([3, 1], [2, 4])
+true
+```
 """
-function isdisjoint(l, r)
-    function _isdisjoint(l, r)
-        hasfastin(r) && return !any(in(r), l)
-        hasfastin(l) && return !any(in(l), r)
-        haslength(r) && length(r) < FASTIN_SET_THRESHOLD &&
-            return !any(in(r), l)
-        return !any(in(Set(r)), l)
+function isdisjoint(a, b)
+    function _isdisjoint(a, b)
+        hasfastin(b) && return !any(in(b), a)
+        hasfastin(a) && return !any(in(a), b)
+        haslength(b) && length(b) < FASTIN_SET_THRESHOLD &&
+            return !any(in(b), a)
+        return !any(in(Set(b)), a)
     end
-    if haslength(l) && haslength(r) && length(r) < length(l)
-        return _isdisjoint(r, l)
+    if haslength(a) && haslength(b) && length(b) < length(a)
+        return _isdisjoint(b, a)
     end
-    _isdisjoint(l, r)
+    _isdisjoint(a, b)
 end
 
 ## partial ordering of sets by containment
 
-==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
+==(a::AbstractSet, b::AbstractSet) = length(a) == length(b) && a ⊆ b
 # convenience functions for AbstractSet
 # (if needed, only their synonyms ⊊ and ⊆ must be specialized)
-<( l::AbstractSet, r::AbstractSet) = l ⊊ r
-<=(l::AbstractSet, r::AbstractSet) = l ⊆ r
+<( a::AbstractSet, b::AbstractSet) = a ⊊ b
+<=(a::AbstractSet, b::AbstractSet) = a ⊆ b
 
 ## filtering sets
 
diff --git a/base/accumulate.jl b/base/accumulate.jl
index 6f0b6e7d05ba3..663bd850695a8 100644
--- a/base/accumulate.jl
+++ b/base/accumulate.jl
@@ -116,35 +116,29 @@ end
 """
     cumsum(itr)
 
-Cumulative sum an iterator. See also [`cumsum!`](@ref)
-to use a preallocated output array, both for performance and to control the precision of the
-output (e.g. to avoid overflow).
+Cumulative sum of an iterator.
+
+See also [`accumulate`](@ref) to apply functions other than `+`.
 
 !!! compat "Julia 1.5"
     `cumsum` on a non-array iterator requires at least Julia 1.5.
 
 # Examples
 ```jldoctest
-julia> cumsum([1, 1, 1])
+julia> cumsum(1:3)
 3-element Vector{Int64}:
  1
- 2
  3
+ 6
+
+julia> cumsum((true, false, true, false, true))
+(1, 1, 2, 2, 3)
 
-julia> cumsum([fill(1, 2) for i in 1:3])
+julia> cumsum(fill(1, 2) for i in 1:3)
 3-element Vector{Vector{Int64}}:
  [1, 1]
  [2, 2]
  [3, 3]
-
-julia> cumsum((1, 1, 1))
-(1, 2, 3)
-
-julia> cumsum(x^2 for x in 1:3)
-3-element Vector{Int64}:
-  1
-  5
- 14
 ```
 """
 cumsum(x::AbstractVector) = cumsum(x, dims=1)
@@ -177,10 +171,7 @@ to control the precision of the output (e.g. to avoid overflow).
 
 # Examples
 ```jldoctest
-julia> a = [1 2 3; 4 5 6]
-2×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
+julia> a = Int8[1 2 3; 4 5 6];
 
 julia> cumprod(a, dims=1)
 2×3 Matrix{Int64}:
@@ -200,9 +191,7 @@ end
 """
     cumprod(itr)
 
-Cumulative product of an iterator. See also
-[`cumprod!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow).
+Cumulative product of an iterator.
 
 See also [`cumprod!`](@ref), [`accumulate`](@ref), [`cumsum`](@ref).
 
@@ -217,20 +206,16 @@ julia> cumprod(fill(1//2, 3))
  1//4
  1//8
 
-julia> cumprod([fill(1//3, 2, 2) for i in 1:3])
-3-element Vector{Matrix{Rational{Int64}}}:
- [1//3 1//3; 1//3 1//3]
- [2//9 2//9; 2//9 2//9]
- [4//27 4//27; 4//27 4//27]
+julia> cumprod((1, 2, 1, 3, 1))
+(1, 2, 2, 6, 6)
 
-julia> cumprod((1, 2, 1))
-(1, 2, 2)
-
-julia> cumprod(x^2 for x in 1:3)
-3-element Vector{Int64}:
-  1
-  4
- 36
+julia> cumprod("julia")
+5-element Vector{String}:
+ "j"
+ "ju"
+ "jul"
+ "juli"
+ "julia"
 ```
 """
 cumprod(x::AbstractVector) = cumprod(x, dims=1)
@@ -243,8 +228,11 @@ cumprod(itr) = accumulate(mul_prod, itr)
 Cumulative operation `op` along the dimension `dims` of `A` (providing `dims` is optional
 for vectors). An initial value `init` may optionally be provided by a keyword argument. See
 also [`accumulate!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow). For common operations
-there are specialized variants of `accumulate`, see: [`cumsum`](@ref), [`cumprod`](@ref)
+to control the precision of the output (e.g. to avoid overflow).
+
+For common operations there are specialized variants of `accumulate`,
+see [`cumsum`](@ref), [`cumprod`](@ref). For a lazy version, see
+[`Iterators.accumulate`](@ref).
 
 !!! compat "Julia 1.5"
     `accumulate` on a non-array iterator requires at least Julia 1.5.
@@ -257,35 +245,28 @@ julia> accumulate(+, [1,2,3])
  3
  6
 
-julia> accumulate(*, [1,2,3])
-3-element Vector{Int64}:
- 1
- 2
- 6
+julia> accumulate(min, (1, -2, 3, -4, 5), init=0)
+(0, -2, -2, -4, -4)
 
-julia> accumulate(+, [1,2,3]; init=100)
-3-element Vector{Int64}:
- 101
- 103
- 106
+julia> accumulate(/, (2, 4, Inf), init=100)
+(50.0, 12.5, 0.0)
 
-julia> accumulate(min, [1,2,-1]; init=0)
-3-element Vector{Int64}:
-  0
-  0
- -1
-
-julia> accumulate(+, fill(1, 3, 3), dims=1)
-3×3 Matrix{Int64}:
- 1  1  1
- 2  2  2
- 3  3  3
-
-julia> accumulate(+, fill(1, 3, 3), dims=2)
-3×3 Matrix{Int64}:
- 1  2  3
- 1  2  3
- 1  2  3
+julia> accumulate(=>, i^2 for i in 1:3)
+3-element Vector{Any}:
+          1
+        1 => 4
+ (1 => 4) => 9
+
+julia> accumulate(+, fill(1, 3, 4))
+3×4 Matrix{Int64}:
+ 1  4  7  10
+ 2  5  8  11
+ 3  6  9  12
+
+julia> accumulate(+, fill(1, 2, 5), dims=2, init=100.0)
+2×5 Matrix{Float64}:
+ 101.0  102.0  103.0  104.0  105.0
+ 101.0  102.0  103.0  104.0  105.0
 ```
 """
 function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
@@ -318,41 +299,39 @@ end
 
 Cumulative operation `op` on `A` along the dimension `dims`, storing the result in `B`.
 Providing `dims` is optional for vectors.  If the keyword argument `init` is given, its
-value is used to instantiate the accumulation. See also [`accumulate`](@ref).
+value is used to instantiate the accumulation.
+
+See also [`accumulate`](@ref), [`cumsum!`](@ref), [`cumprod!`](@ref).
 
 # Examples
 ```jldoctest
 julia> x = [1, 0, 2, 0, 3];
 
-julia> y = [0, 0, 0, 0, 0];
+julia> y = rand(5);
 
 julia> accumulate!(+, y, x);
 
 julia> y
-5-element Vector{Int64}:
- 1
- 1
- 3
- 3
- 6
+5-element Vector{Float64}:
+ 1.0
+ 1.0
+ 3.0
+ 3.0
+ 6.0
 
-julia> A = [1 2; 3 4];
+julia> A = [1 2 3; 4 5 6];
 
-julia> B = [0 0; 0 0];
+julia> B = similar(A);
 
-julia> accumulate!(-, B, A, dims=1);
-
-julia> B
-2×2 Matrix{Int64}:
-  1   2
- -2  -2
-
-julia> accumulate!(-, B, A, dims=2);
+julia> accumulate!(-, B, A, dims=1)
+2×3 Matrix{Int64}:
+  1   2   3
+ -3  -3  -3
 
-julia> B
-2×2 Matrix{Int64}:
- 1  -1
- 3  -1
+julia> accumulate!(*, B, A, dims=2, init=10)
+2×3 Matrix{Int64}:
+ 10   20    60
+ 40  200  1200
 ```
 """
 function accumulate!(op, B, A; dims::Union{Integer, Nothing} = nothing, kw...)
diff --git a/base/array.jl b/base/array.jl
index f669333201c06..18a13e89c41c2 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -150,7 +150,7 @@ end
 size(a::Array, d::Integer) = arraysize(a, convert(Int, d))
 size(a::Vector) = (arraysize(a,1),)
 size(a::Matrix) = (arraysize(a,1), arraysize(a,2))
-size(a::Array{<:Any,N}) where {N} = (@_inline_meta; ntuple(M -> size(a, M), Val(N))::Dims)
+size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::Dims)
 
 asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...)
 
@@ -174,7 +174,7 @@ isbitsunion(u::Union) = allocatedinline(u)
 isbitsunion(x) = false
 
 function _unsetindex!(A::Array{T}, i::Int) where {T}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, i)
     t = @_gc_preserve_begin A
     p = Ptr{Ptr{Cvoid}}(pointer(A, i))
@@ -217,7 +217,7 @@ elsize(::Type{<:Array{T}}) where {T} = aligned_sizeof(T)
 sizeof(a::Array) = Core.sizeof(a)
 
 function isassigned(a::Array, i::Int...)
-    @_inline_meta
+    @inline
     ii = (_sub2ind(size(a), i...) % UInt) - 1
     @boundscheck ii < length(a) % UInt || return false
     ccall(:jl_array_isassigned, Cint, (Any, UInt), a, ii) == 1
@@ -336,7 +336,7 @@ end
 # occurs, see discussion in #27874.
 # It is also mitigated by using a constant string.
 function _throw_argerror()
-    @_noinline_meta
+    @noinline
     throw(ArgumentError("Number of elements to copy must be nonnegative."))
 end
 
@@ -408,10 +408,10 @@ function getindex(::Type{T}, vals...) where T
     return a
 end
 
-getindex(::Type{T}) where {T} = (@_inline_meta; Vector{T}())
-getindex(::Type{T}, x) where {T} = (@_inline_meta; a = Vector{T}(undef, 1); @inbounds a[1] = x; a)
-getindex(::Type{T}, x, y) where {T} = (@_inline_meta; a = Vector{T}(undef, 2); @inbounds (a[1] = x; a[2] = y); a)
-getindex(::Type{T}, x, y, z) where {T} = (@_inline_meta; a = Vector{T}(undef, 3); @inbounds (a[1] = x; a[2] = y; a[3] = z); a)
+getindex(::Type{T}) where {T} = (@inline; Vector{T}())
+getindex(::Type{T}, x) where {T} = (@inline; a = Vector{T}(undef, 1); @inbounds a[1] = x; a)
+getindex(::Type{T}, x, y) where {T} = (@inline; a = Vector{T}(undef, 2); @inbounds (a[1] = x; a[2] = y); a)
+getindex(::Type{T}, x, y, z) where {T} = (@inline; a = Vector{T}(undef, 3); @inbounds (a[1] = x; a[2] = y; a[3] = z); a)
 
 function getindex(::Type{Any}, @nospecialize vals...)
     a = Vector{Any}(undef, length(vals))
@@ -431,14 +431,74 @@ to_dim(d::Integer) = d
 to_dim(d::OneTo) = last(d)
 
 """
-    fill(x, dims::Tuple)
-    fill(x, dims...)
+    fill(value, dims::Tuple)
+    fill(value, dims...)
 
-Create an array filled with the value `x`. For example, `fill(1.0, (5,5))` returns a 5×5
-array of floats, with each element initialized to `1.0`.
+Create an array of size `dims` with every location set to `value`.
 
-`dims` may be specified as either a tuple or a sequence of arguments. For example,
-the common idiom `fill(x)` creates a zero-dimensional array containing the single value `x`.
+For example, `fill(1.0, (5,5))` returns a 5×5 array of floats,
+with `1.0` in every location of the array.
+
+The dimension lengths `dims` may be specified as either a tuple or a sequence of arguments.
+An `N`-length tuple or `N` arguments following the `value` specify an `N`-dimensional
+array. Thus, a common idiom for creating a zero-dimensional array with its only location
+set to `x` is `fill(x)`.
+
+Every location of the returned array is set to (and is thus [`===`](@ref) to)
+the `value` that was passed; this means that if the `value` is itself modified,
+all elements of the `fill`ed array will reflect that modification because they're
+_still_ that very `value`. This is of no concern with `fill(1.0, (5,5))` as the
+`value` `1.0` is immutable and cannot itself be modified, but can be unexpected
+with mutable values like — most commonly — arrays.  For example, `fill([], 3)`
+places _the very same_ empty array in all three locations of the returned vector:
+
+```jldoctest
+julia> v = fill([], 3)
+3-element Vector{Vector{Any}}:
+ []
+ []
+ []
+
+julia> v[1] === v[2] === v[3]
+true
+
+julia> value = v[1]
+Any[]
+
+julia> push!(value, 867_5309)
+1-element Vector{Any}:
+ 8675309
+
+julia> v
+3-element Vector{Vector{Any}}:
+ [8675309]
+ [8675309]
+ [8675309]
+```
+
+To create an array of many independent inner arrays, use a [comprehension](@ref man-comprehensions) instead.
+This creates a new and distinct array on each iteration of the loop:
+
+```jldoctest
+julia> v2 = [[] for _ in 1:3]
+3-element Vector{Vector{Any}}:
+ []
+ []
+ []
+
+julia> v2[1] === v2[2] === v2[3]
+false
+
+julia> push!(v2[1], 8675309)
+1-element Vector{Any}:
+ 8675309
+
+julia> v2
+3-element Vector{Vector{Any}}:
+ [8675309]
+ []
+ []
+```
 
 See also: [`fill!`](@ref), [`zeros`](@ref), [`ones`](@ref), [`similar`](@ref).
 
@@ -452,15 +512,15 @@ julia> fill(1.0, (2,3))
 julia> fill(42)
 0-dimensional Array{Int64, 0}:
 42
-```
 
-If `x` is an object reference, all elements will refer to the same object:
-```jldoctest
-julia> A = fill(zeros(2), 2);
+julia> A = fill(zeros(2), 2) # sets both elements to the same [0.0, 0.0] vector
+2-element Vector{Vector{Float64}}:
+ [0.0, 0.0]
+ [0.0, 0.0]
 
-julia> A[1][1] = 42; # modifies both A[1][1] and A[2][1]
+julia> A[1][1] = 42; # modifies the filled value to be [42.0, 0.0]
 
-julia> A
+julia> A # both A[1] and A[2] are the very same vector
 2-element Vector{Vector{Float64}}:
  [42.0, 0.0]
  [42.0, 0.0]
@@ -583,23 +643,38 @@ julia> collect(Float64, 1:2:5)
 """
 collect(::Type{T}, itr) where {T} = _collect(T, itr, IteratorSize(itr))
 
-_collect(::Type{T}, itr, isz::HasLength) where {T} = copyto!(Vector{T}(undef, Int(length(itr)::Integer)), itr)
-_collect(::Type{T}, itr, isz::HasShape) where {T}  = copyto!(similar(Array{T}, axes(itr)), itr)
+_collect(::Type{T}, itr, isz::Union{HasLength,HasShape}) where {T} =
+    copyto!(_array_for(T, isz, _similar_shape(itr, isz)), itr)
 function _collect(::Type{T}, itr, isz::SizeUnknown) where T
     a = Vector{T}()
     for x in itr
-        push!(a,x)
+        push!(a, x)
     end
     return a
 end
 
 # make a collection similar to `c` and appropriate for collecting `itr`
-_similar_for(c::AbstractArray, ::Type{T}, itr, ::SizeUnknown) where {T} = similar(c, T, 0)
-_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasLength) where {T} =
-    similar(c, T, Int(length(itr)::Integer))
-_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasShape) where {T} =
-    similar(c, T, axes(itr))
-_similar_for(c, ::Type{T}, itr, isz) where {T} = similar(c, T)
+_similar_for(c, ::Type{T}, itr, isz, shp) where {T} = similar(c, T)
+
+_similar_shape(itr, ::SizeUnknown) = nothing
+_similar_shape(itr, ::HasLength) = length(itr)::Integer
+_similar_shape(itr, ::HasShape) = axes(itr)
+
+_similar_for(c::AbstractArray, ::Type{T}, itr, ::SizeUnknown, ::Nothing) where {T} =
+    similar(c, T, 0)
+_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasLength, len::Integer) where {T} =
+    similar(c, T, len)
+_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasShape, axs) where {T} =
+    similar(c, T, axs)
+
+# make a collection appropriate for collecting `itr::Generator`
+_array_for(::Type{T}, ::SizeUnknown, ::Nothing) where {T} = Vector{T}(undef, 0)
+_array_for(::Type{T}, ::HasLength, len::Integer) where {T} = Vector{T}(undef, Int(len))
+_array_for(::Type{T}, ::HasShape{N}, axs) where {T,N} = similar(Array{T,N}, axs)
+
+# used by syntax lowering for simple typed comprehensions
+_array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(itr, isz))
+
 
 """
     collect(collection)
@@ -638,10 +713,10 @@ collect(A::AbstractArray) = _collect_indices(axes(A), A)
 collect_similar(cont, itr) = _collect(cont, itr, IteratorEltype(itr), IteratorSize(itr))
 
 _collect(cont, itr, ::HasEltype, isz::Union{HasLength,HasShape}) =
-    copyto!(_similar_for(cont, eltype(itr), itr, isz), itr)
+    copyto!(_similar_for(cont, eltype(itr), itr, isz, _similar_shape(itr, isz)), itr)
 
 function _collect(cont, itr, ::HasEltype, isz::SizeUnknown)
-    a = _similar_for(cont, eltype(itr), itr, isz)
+    a = _similar_for(cont, eltype(itr), itr, isz, nothing)
     for x in itr
         push!(a,x)
     end
@@ -679,10 +754,11 @@ if isdefined(Core, :Compiler)
         I = esc(itr)
         return quote
             if $I isa Generator && ($I).f isa Type
-                ($I).f
+                T = ($I).f
             else
-                Core.Compiler.return_type(_iterator_upper_bound, Tuple{typeof($I)})
+                T = Core.Compiler.return_type(_iterator_upper_bound, Tuple{typeof($I)})
             end
+            promote_typejoin_union(T)
         end
     end
 else
@@ -690,7 +766,7 @@ else
         I = esc(itr)
         return quote
             if $I isa Generator && ($I).f isa Type
-                ($I).f
+                promote_typejoin_union($I.f)
             else
                 Any
             end
@@ -698,34 +774,44 @@ else
     end
 end
 
-_array_for(::Type{T}, itr, ::HasLength) where {T} = Vector{T}(undef, Int(length(itr)::Integer))
-_array_for(::Type{T}, itr, ::HasShape{N}) where {T,N} = similar(Array{T,N}, axes(itr))
-
 function collect(itr::Generator)
     isz = IteratorSize(itr.iter)
     et = @default_eltype(itr)
     if isa(isz, SizeUnknown)
         return grow_to!(Vector{et}(), itr)
     else
+        shp = _similar_shape(itr, isz)
         y = iterate(itr)
         if y === nothing
-            return _array_for(et, itr.iter, isz)
+            return _array_for(et, isz, shp)
         end
         v1, st = y
-        collect_to_with_first!(_array_for(typeof(v1), itr.iter, isz), v1, itr, st)
+        dest = _array_for(typeof(v1), isz, shp)
+        # The typeassert gives inference a helping hand on the element type and dimensionality
+        # (work-around for #28382)
+        et′ = et <: Type ? Type : et
+        RT = dest isa AbstractArray ? AbstractArray{<:et′, ndims(dest)} : Any
+        collect_to_with_first!(dest, v1, itr, st)::RT
     end
 end
 
 _collect(c, itr, ::EltypeUnknown, isz::SizeUnknown) =
-    grow_to!(_similar_for(c, @default_eltype(itr), itr, isz), itr)
+    grow_to!(_similar_for(c, @default_eltype(itr), itr, isz, nothing), itr)
 
 function _collect(c, itr, ::EltypeUnknown, isz::Union{HasLength,HasShape})
+    et = @default_eltype(itr)
+    shp = _similar_shape(itr, isz)
     y = iterate(itr)
     if y === nothing
-        return _similar_for(c, @default_eltype(itr), itr, isz)
+        return _similar_for(c, et, itr, isz, shp)
     end
     v1, st = y
-    collect_to_with_first!(_similar_for(c, typeof(v1), itr, isz), v1, itr, st)
+    dest = _similar_for(c, typeof(v1), itr, isz, shp)
+    # The typeassert gives inference a helping hand on the element type and dimensionality
+    # (work-around for #28382)
+    et′ = et <: Type ? Type : et
+    RT = dest isa AbstractArray ? AbstractArray{<:et′, ndims(dest)} : Any
+    collect_to_with_first!(dest, v1, itr, st)::RT
 end
 
 function collect_to_with_first!(dest::AbstractArray, v1, itr, st)
@@ -740,7 +826,7 @@ function collect_to_with_first!(dest, v1, itr, st)
 end
 
 function setindex_widen_up_to(dest::AbstractArray{T}, el, i) where T
-    @_inline_meta
+    @inline
     new = similar(dest, promote_typejoin(T, typeof(el)))
     f = first(LinearIndices(dest))
     copyto!(new, first(LinearIndices(new)), dest, f, i-f)
@@ -776,7 +862,7 @@ function grow_to!(dest, itr)
 end
 
 function push_widen(dest, el)
-    @_inline_meta
+    @inline
     new = sizehint!(empty(dest, promote_typejoin(eltype(dest), typeof(el))), length(dest))
     if new isa AbstractSet
         # TODO: merge back these two branches when copy! is re-enabled for sets/vectors
@@ -806,7 +892,7 @@ end
 
 ## Iteration ##
 
-iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
+iterate(A::Array, i=1) = (@inline; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
 
 ## Indexing: getindex ##
 
@@ -833,11 +919,11 @@ function getindex end
 
 # This is more complicated than it needs to be in order to get Win64 through bootstrap
 @eval getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)
-@eval getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@_inline_meta; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))
+@eval getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@inline; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))
 
-# Faster contiguous indexing using copyto! for UnitRange and Colon
+# Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
 function getindex(A::Array, I::AbstractUnitRange{<:Integer})
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I)
     lI = length(I)
     X = similar(A, axes(I))
@@ -876,7 +962,7 @@ function setindex! end
 
 @eval setindex!(A::Array{T}, x, i1::Int) where {T} = arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1)
 @eval setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@_inline_meta; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
+    (@inline; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
 
 # This is redundant with the abstract fallbacks but needed and helpful for bootstrap
 function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
@@ -895,8 +981,8 @@ function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
 end
 
 # Faster contiguous setindex! with copyto!
-function setindex!(A::Array{T}, X::Array{T}, I::UnitRange{Int}) where T
-    @_inline_meta
+function setindex!(A::Array{T}, X::Array{T}, I::AbstractUnitRange{Int}) where T
+    @inline
     @boundscheck checkbounds(A, I)
     lI = length(I)
     @boundscheck setindex_shape_check(X, lI)
@@ -906,7 +992,7 @@ function setindex!(A::Array{T}, X::Array{T}, I::UnitRange{Int}) where T
     return A
 end
 function setindex!(A::Array{T}, X::Array{T}, c::Colon) where T
-    @_inline_meta
+    @inline
     lI = length(A)
     @boundscheck setindex_shape_check(X, lI)
     if lI > 0
@@ -1393,14 +1479,24 @@ julia> deleteat!([6, 5, 4, 3, 2, 1], 2)
  1
 ```
 """
-deleteat!(a::Vector, i::Integer) = (_deleteat!(a, i, 1); a)
-
-function deleteat!(a::Vector, r::UnitRange{<:Integer})
-    n = length(a)
-    isempty(r) || _deleteat!(a, first(r), length(r))
+function deleteat!(a::Vector, i::Integer)
+    i isa Bool && depwarn("passing Bool as an index is deprecated", :deleteat!)
+    _deleteat!(a, i, 1)
     return a
 end
 
+function deleteat!(a::Vector, r::AbstractUnitRange{<:Integer})
+    if eltype(r) === Bool
+        return invoke(deleteat!, Tuple{Vector, AbstractVector{Bool}}, a, r)
+    else
+        n = length(a)
+        f = first(r)
+        f isa Bool && depwarn("passing Bool as an index is deprecated", :deleteat!)
+        isempty(r) || _deleteat!(a, f, length(r))
+        return a
+    end
+end
+
 """
     deleteat!(a::Vector, inds)
 
@@ -1561,7 +1657,7 @@ Remove items at specified indices, and return a collection containing
 the removed items.
 Subsequent items are shifted left to fill the resulting gaps.
 If specified, replacement values from an ordered collection will be spliced in
-place of the removed items; in this case, `indices` must be a `UnitRange`.
+place of the removed items; in this case, `indices` must be a `AbstractUnitRange`.
 
 To insert `replacement` before an index `n` without removing any items, use
 `splice!(collection, n:n-1, replacement)`.
@@ -1569,6 +1665,9 @@ To insert `replacement` before an index `n` without removing any items, use
 !!! compat "Julia 1.5"
     Prior to Julia 1.5, `indices` must always be a `UnitRange`.
 
+!!! compat "Julia 1.8"
+    Prior to Julia 1.8, `indices` must be a `UnitRange` if splicing in replacement values.
+
 # Examples
 ```jldoctest
 julia> A = [-1, -2, -3, 5, 4, 3, -1]; splice!(A, 4:3, 2)
@@ -1586,7 +1685,7 @@ julia> A
  -1
 ```
 """
-function splice!(a::Vector, r::UnitRange{<:Integer}, ins=_default_splice)
+function splice!(a::Vector, r::AbstractUnitRange{<:Integer}, ins=_default_splice)
     v = a[r]
     m = length(ins)
     if m == 0
@@ -1824,18 +1923,7 @@ julia> findnext(A, CartesianIndex(1, 1))
 CartesianIndex(2, 1)
 ```
 """
-function findnext(A, start)
-    l = last(keys(A))
-    i = oftype(l, start)
-    i > l && return nothing
-    while true
-        A[i] && return i
-        i == l && break
-        # nextind(A, l) can throw/overflow
-        i = nextind(A, i)
-    end
-    return nothing
-end
+findnext(A, start) = findnext(identity, A, start)
 
 """
     findfirst(A)
@@ -1872,14 +1960,7 @@ julia> findfirst(A)
 CartesianIndex(2, 1)
 ```
 """
-function findfirst(A)
-    for (i, a) in pairs(A)
-        if a
-            return i
-        end
-    end
-    return nothing
-end
+findfirst(A) = findfirst(identity, A)
 
 # Needed for bootstrap, and allows defining only an optimized findnext method
 findfirst(A::AbstractArray) = findnext(A, first(keys(A)))
@@ -1971,7 +2052,7 @@ findfirst(p::Union{Fix2{typeof(isequal),Int},Fix2{typeof(==),Int}}, r::OneTo{Int
     1 <= p.x <= r.stop ? p.x : nothing
 
 findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitRange) where {T<:Integer} =
-    first(r) <= p.x <= last(r) ? 1+Int(p.x - first(r)) : nothing
+    first(r) <= p.x <= last(r) ? firstindex(r) + Int(p.x - first(r)) : nothing
 
 function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::StepRange{T,S}) where {T,S}
     isempty(r) && return nothing
@@ -2015,18 +2096,7 @@ julia> findprev(A, CartesianIndex(2, 1))
 CartesianIndex(2, 1)
 ```
 """
-function findprev(A, start)
-    f = first(keys(A))
-    i = oftype(f, start)
-    i < f && return nothing
-    while true
-        A[i] && return i
-        i == f && break
-        # prevind(A, f) can throw/underflow
-        i = prevind(A, i)
-    end
-    return nothing
-end
+findprev(A, start) = findprev(identity, A, start)
 
 """
     findlast(A)
@@ -2064,14 +2134,7 @@ julia> findlast(A)
 CartesianIndex(2, 1)
 ```
 """
-function findlast(A)
-    for (i, a) in Iterators.reverse(pairs(A))
-        if a
-            return i
-        end
-    end
-    return nothing
-end
+findlast(A) = findlast(identity, A)
 
 # Needed for bootstrap, and allows defining only an optimized findprev method
 findlast(A::AbstractArray) = findprev(A, last(keys(A)))
@@ -2264,6 +2327,7 @@ Int64[]
 function findall(A)
     collect(first(p) for p in pairs(A) if last(p))
 end
+
 # Allocating result upfront is faster (possible only when collection can be iterated twice)
 function findall(A::AbstractArray{Bool})
     n = count(A)
@@ -2503,6 +2567,54 @@ function filter!(f, a::AbstractVector)
     return a
 end
 
+"""
+    keepat!(a::Vector, inds)
+
+Remove the items at all the indices which are not given by `inds`,
+and return the modified `a`.
+Items which are kept are shifted to fill the resulting gaps.
+
+`inds` must be an iterator of sorted and unique integer indices.
+See also [`deleteat!`](@ref).
+
+!!! compat "Julia 1.7"
+    This function is available as of Julia 1.7.
+
+# Examples
+```jldoctest
+julia> keepat!([6, 5, 4, 3, 2, 1], 1:2:5)
+3-element Vector{Int64}:
+ 6
+ 4
+ 2
+```
+"""
+keepat!(a::Vector, inds) = _keepat!(a, inds)
+
+"""
+    keepat!(a::Vector, m::AbstractVector{Bool})
+
+The in-place version of logical indexing `a = a[m]`. That is, `keepat!(a, m)` on
+vectors of equal length `a` and `m` will remove all elements from `a` for which
+`m` at the corresponding index is `false`.
+
+# Examples
+```jldoctest
+julia> a = [:a, :b, :c];
+
+julia> keepat!(a, [true, false, true])
+2-element Vector{Symbol}:
+ :a
+ :c
+
+julia> a
+2-element Vector{Symbol}:
+ :a
+ :c
+```
+"""
+keepat!(a::Vector, m::AbstractVector{Bool}) = _keepat!(a, m)
+
 # set-like operators for vectors
 # These are moderately efficient, preserve order, and remove dupes.
 
@@ -2536,19 +2648,27 @@ function _shrink!(shrinker!, v::AbstractVector, itrs)
     seen = Set{eltype(v)}()
     filter!(_grow_filter!(seen), v)
     shrinker!(seen, itrs...)
-    filter!(_in(seen), v)
+    filter!(in(seen), v)
 end
 
 intersect!(v::AbstractVector, itrs...) = _shrink!(intersect!, v, itrs)
 setdiff!(  v::AbstractVector, itrs...) = _shrink!(setdiff!, v, itrs)
 
-vectorfilter(f, v::AbstractVector) = filter(f, v) # TODO: do we want this special case?
-vectorfilter(f, v) = [x for x in v if f(x)]
+vectorfilter(T::Type, f, v) = T[x for x in v if f(x)]
 
 function _shrink(shrinker!, itr, itrs)
-    keep = shrinker!(Set(itr), itrs...)
-    vectorfilter(_shrink_filter!(keep), itr)
+    T = promote_eltype(itr, itrs...)
+    keep = shrinker!(Set{T}(itr), itrs...)
+    vectorfilter(T, _shrink_filter!(keep), itr)
 end
 
 intersect(itr, itrs...) = _shrink(intersect!, itr, itrs)
 setdiff(  itr, itrs...) = _shrink(setdiff!, itr, itrs)
+
+function intersect(v::AbstractVector, r::AbstractRange)
+    T = promote_eltype(v, r)
+    common = Iterators.filter(in(r), v)
+    seen = Set{T}(common)
+    return vectorfilter(T, _shrink_filter!(seen), common)
+end
+intersect(r::AbstractRange, v::AbstractVector) = intersect(v, r)
diff --git a/base/arraymath.jl b/base/arraymath.jl
index e75e98bf9dd62..62dc3772e4938 100644
--- a/base/arraymath.jl
+++ b/base/arraymath.jl
@@ -1,36 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-## Unary operators ##
-
-"""
-    conj!(A)
-
-Transform an array to its complex conjugate in-place.
-
-See also [`conj`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1+im 2-im; 2+2im 3+im]
-2×2 Matrix{Complex{Int64}}:
- 1+1im  2-1im
- 2+2im  3+1im
-
-julia> conj!(A);
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 1-1im  2+1im
- 2-2im  3-1im
-```
-"""
-conj!(A::AbstractArray{<:Number}) = (@inbounds broadcast!(conj, A, A); A)
-
-for f in (:-, :conj, :real, :imag)
-    @eval ($f)(A::AbstractArray) = broadcast_preserving_zero_d($f, A)
-end
-
-
 ## Binary arithmetic operators ##
 
 for f in (:+, :-)
diff --git a/base/arrayshow.jl b/base/arrayshow.jl
index 1e9f3e59729e6..0d480b64bb32d 100644
--- a/base/arrayshow.jl
+++ b/base/arrayshow.jl
@@ -59,7 +59,8 @@ column going across the screen.
 """
 function alignment(io::IO, @nospecialize(X::AbstractVecOrMat),
         rows::AbstractVector{T}, cols::AbstractVector{V},
-        cols_if_complete::Integer, cols_otherwise::Integer, sep::Integer) where {T,V}
+        cols_if_complete::Integer, cols_otherwise::Integer, sep::Integer,
+        #= `size(X) may not infer, set this in caller =# ncols::Integer=size(X, 2)) where {T,V}
     a = Tuple{T, V}[]
     for j in cols # need to go down each column one at a time
         l = r = 0
@@ -78,7 +79,7 @@ function alignment(io::IO, @nospecialize(X::AbstractVecOrMat),
             break
         end
     end
-    if 1 < length(a) < length(axes(X,2))
+    if 1 < length(a) < ncols
         while sum(map(sum,a)) + sep*length(a) >= cols_otherwise
             pop!(a)
         end
@@ -95,7 +96,8 @@ is specified as string sep.
 """
 function print_matrix_row(io::IO,
         @nospecialize(X::AbstractVecOrMat), A::Vector,
-        i::Integer, cols::AbstractVector, sep::AbstractString)
+        i::Integer, cols::AbstractVector, sep::AbstractString,
+        #= `axes(X)` may not infer, set this in caller =# idxlast::Integer=last(axes(X, 2)))
     for (k, j) = enumerate(cols)
         k > length(A) && break
         if isassigned(X,Int(i),Int(j)) # isassigned accepts only `Int` indices
@@ -114,7 +116,7 @@ function print_matrix_row(io::IO,
             sx = undef_ref_str
         end
         l = repeat(" ", A[k][1]-a[1]) # pad on left and right as needed
-        r = j == axes(X, 2)[end] ? "" : repeat(" ", A[k][2]-a[2])
+        r = j == idxlast ? "" : repeat(" ", A[k][2]-a[2])
         prettysx = replace_in_print_matrix(X,i,j,sx)
         print(io, l, prettysx, r)
         if k < length(A); print(io, sep); end
@@ -171,6 +173,7 @@ end
 
 function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, hdots, vdots, ddots, hmod, vmod, rowsA, colsA)
     hmod, vmod = Int(hmod)::Int, Int(vmod)::Int
+    ncols, idxlast = length(colsA), last(colsA)
     if !(get(io, :limit, false)::Bool)
         screenheight = screenwidth = typemax(Int)
     else
@@ -201,26 +204,26 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
     else
 	    colsA = [colsA;]
     end
-    A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize)
+    A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize, ncols)
     # Nine-slicing is accomplished using print_matrix_row repeatedly
     if m <= screenheight # rows fit vertically on screen
         if n <= length(A) # rows and cols fit so just print whole matrix in one piece
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,A,i,colsA,sep)
+                print_matrix_row(io, X,A,i,colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != last(rowsA); println(io); end
             end
         else # rows fit down screen but cols don't, so need horizontal ellipsis
             c = div(screenwidth-length(hdots)::Int+1,2)+1  # what goes to right of ellipsis
-            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize)) # alignments for right
+            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize, ncols)) # alignments for right
             c = screenwidth - sum(map(sum,Ralign)) - (length(Ralign)-1)*sepsize - length(hdots)::Int
-            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize) # alignments for left of ellipsis
+            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize, ncols) # alignments for left of ellipsis
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep)
+                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep,idxlast)
                 print(io, (i - first(rowsA)) % hmod == 0 ? hdots : repeat(" ", length(hdots)::Int))
-                print_matrix_row(io, X, Ralign, i, (n - length(Ralign)) .+ colsA, sep)
+                print_matrix_row(io, X, Ralign, i, (n - length(Ralign)) .+ colsA, sep, idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != last(rowsA); println(io); end
             end
@@ -229,7 +232,7 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
         if n <= length(A) # rows don't fit, cols do, so only vertical ellipsis
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,A,i,colsA,sep)
+                print_matrix_row(io, X,A,i,colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != rowsA[end] || i == rowsA[halfheight]; println(io); end
                 if i == rowsA[halfheight]
@@ -240,15 +243,15 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
             end
         else # neither rows nor cols fit, so use all 3 kinds of dots
             c = div(screenwidth-length(hdots)::Int+1,2)+1
-            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize))
+            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize, ncols))
             c = screenwidth - sum(map(sum,Ralign)) - (length(Ralign)-1)*sepsize - length(hdots)::Int
-            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize)
+            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize, ncols)
             r = mod((length(Ralign)-n+1),vmod) # where to put dots on right half
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep)
+                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep,idxlast)
                 print(io, (i - first(rowsA)) % hmod == 0 ? hdots : repeat(" ", length(hdots)::Int))
-                print_matrix_row(io, X,Ralign,i,(n-length(Ralign)).+colsA,sep)
+                print_matrix_row(io, X,Ralign,i,(n-length(Ralign)).+colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != rowsA[end] || i == rowsA[halfheight]; println(io); end
                 if i == rowsA[halfheight]
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index c8d2c404b0a09..5229ca58ab03d 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -15,7 +15,7 @@ mutable struct AsyncCondition
     handle::Ptr{Cvoid}
     cond::ThreadSynchronizer
     isopen::Bool
-    set::Bool
+    @atomic set::Bool
 
     function AsyncCondition()
         this = new(Libc.malloc(_sizeof_uv_async), ThreadSynchronizer(), true, false)
@@ -60,22 +60,26 @@ end
 
 Create a timer that wakes up tasks waiting for it (by calling [`wait`](@ref) on the timer object).
 
-Waiting tasks are woken after an initial delay of `delay` seconds, and then repeating with the given
-`interval` in seconds. If `interval` is equal to `0`, the timer is only triggered once. When
-the timer is closed (by [`close`](@ref)) waiting tasks are woken with an error. Use [`isopen`](@ref)
-to check whether a timer is still active.
+Waiting tasks are woken after an initial delay of at least `delay` seconds, and then repeating after
+at least `interval` seconds again elapse. If `interval` is equal to `0`, the timer is only triggered
+once. When the timer is closed (by [`close`](@ref)) waiting tasks are woken with an error. Use
+[`isopen`](@ref) to check whether a timer is still active.
+
+Note: `interval` is subject to accumulating time skew. If you need precise events at a particular
+absolute time, create a new timer at each expiration with the difference to the next time computed.
 """
 mutable struct Timer
     handle::Ptr{Cvoid}
     cond::ThreadSynchronizer
     isopen::Bool
-    set::Bool
+    @atomic set::Bool
 
     function Timer(timeout::Real; interval::Real = 0.0)
         timeout ≥ 0 || throw(ArgumentError("timer cannot have negative timeout of $timeout seconds"))
         interval ≥ 0 || throw(ArgumentError("timer cannot have negative repeat interval of $interval seconds"))
-        timeout = UInt64(round(timeout * 1000)) + 1
-        interval = UInt64(ceil(interval * 1000))
+        # libuv has a tendency to timeout 1 ms early, so we need +1 on the timeout (in milliseconds), unless it is zero
+        timeoutms = ceil(UInt64, timeout * 1000) + !iszero(timeout)
+        intervalms = ceil(UInt64, interval * 1000)
         loop = eventloop()
 
         this = new(Libc.malloc(_sizeof_uv_timer), ThreadSynchronizer(), true, false)
@@ -87,7 +91,7 @@ mutable struct Timer
         ccall(:uv_update_time, Cvoid, (Ptr{Cvoid},), loop)
         err = ccall(:uv_timer_start, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, UInt64, UInt64),
             this, @cfunction(uv_timercb, Cvoid, (Ptr{Cvoid},)),
-            timeout, interval)
+            timeoutms, intervalms)
         @assert err == 0
         iolock_end()
         return this
@@ -124,7 +128,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
         end
         iolock_end()
     end
-    t.set = false
+    @atomic :monotonic t.set = false
     return set
 end
 
@@ -182,7 +186,7 @@ function uv_asynccb(handle::Ptr{Cvoid})
     async = @handle_as handle AsyncCondition
     lock(async.cond)
     try
-        async.set = true
+        @atomic :monotonic async.set = true
         notify(async.cond, true)
     finally
         unlock(async.cond)
@@ -194,7 +198,7 @@ function uv_timercb(handle::Ptr{Cvoid})
     t = @handle_as handle Timer
     lock(t.cond)
     try
-        t.set = true
+        @atomic :monotonic t.set = true
         if ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 0
             # timer is stopped now
             close(t)
@@ -222,18 +226,18 @@ end
 """
     Timer(callback::Function, delay; interval = 0)
 
-Create a timer that wakes up tasks waiting for it (by calling [`wait`](@ref) on the timer object) and
-calls the function `callback`.
+Create a timer that runs the function `callback` at each timer expiration.
 
-Waiting tasks are woken and the function `callback` is called after an initial delay of `delay` seconds,
-and then repeating with the given `interval` in seconds. If `interval` is equal to `0`, the timer
-is only triggered once. The function `callback` is called with a single argument, the timer itself.
-When the timer is closed (by [`close`](@ref)) waiting tasks are woken with an error. Use [`isopen`](@ref)
-to check whether a timer is still active.
+Waiting tasks are woken and the function `callback` is called after an initial delay of `delay`
+seconds, and then repeating with the given `interval` in seconds. If `interval` is equal to `0`, the
+callback is only run once. The function `callback` is called with a single argument, the timer
+itself. Stop a timer by calling `close`. The `cb` may still be run one final time, if the timer has
+already expired.
 
 # Examples
 
-Here the first number is printed after a delay of two seconds, then the following numbers are printed quickly.
+Here the first number is printed after a delay of two seconds, then the following numbers are
+printed quickly.
 
 ```julia-repl
 julia> begin
diff --git a/base/asyncmap.jl b/base/asyncmap.jl
index 976ce6c7b85ca..10dcf23420c16 100644
--- a/base/asyncmap.jl
+++ b/base/asyncmap.jl
@@ -305,20 +305,7 @@ end
 function iterate(itr::AsyncCollector)
     itr.ntasks = verify_ntasks(itr.enumerator, itr.ntasks)
     itr.batch_size = verify_batch_size(itr.batch_size)
-    if itr.batch_size !== nothing
-        exec_func = batch -> begin
-            # extract indices from the input tuple
-            batch_idxs = map(x->x[1], batch)
-
-            # and the args tuple....
-            batched_args = map(x->x[2], batch)
 
-            results = f(batched_args)
-            foreach(x -> (itr.results[batch_idxs[x[1]]] = x[2]), enumerate(results))
-        end
-    else
-        exec_func = (i,args) -> (itr.results[i]=itr.f(args...))
-    end
     chnl, worker_tasks = setup_chnl_and_tasks((i,args) -> (itr.results[i]=itr.f(args...)), itr.ntasks, itr.batch_size)
     return iterate(itr, AsyncCollectorState(chnl, worker_tasks))
 end
diff --git a/base/atomics.jl b/base/atomics.jl
index 97405d88fd408..e6d62c3fc807b 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -356,13 +356,13 @@ for typ in atomictypes
     rt = "$lt, $lt*"
     irt = "$ilt, $ilt*"
     @eval getindex(x::Atomic{$typ}) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
                  ret $lt %rv
                  """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
     @eval setindex!(x::Atomic{$typ}, v::$typ) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
                  ret void
@@ -371,7 +371,7 @@ for typ in atomictypes
     # Note: atomic_cas! succeeded (i.e. it stored "new") if and only if the result is "cmp"
     if typ <: Integer
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                      %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
                      %rv = extractvalue { $lt, i1 } %rs, 0
@@ -380,7 +380,7 @@ for typ in atomictypes
                      unsafe_convert(Ptr{$typ}, x), cmp, new)
     else
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                      %icmp = bitcast $lt %1 to $ilt
                      %inew = bitcast $lt %2 to $ilt
@@ -403,7 +403,7 @@ for typ in atomictypes
         if rmwop in arithmetic_ops && !(typ <: ArithmeticTypes) continue end
         if typ <: Integer
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                          %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
                          ret $lt %rv
@@ -411,7 +411,7 @@ for typ in atomictypes
         else
             rmwop === :xchg || continue
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                          %ival = bitcast $lt %1 to $ilt
                          %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index aff1de4a80993..cfd7412faf656 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -706,7 +706,7 @@ function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::
         libstdcxx_version = get_field(m, libstdcxx_version_mapping)
         cxxstring_abi = get_field(m, cxxstring_abi_mapping)
         function split_tags(tagstr)
-            tag_fields = filter(!isempty, split(tagstr, "-"))
+            tag_fields = split(tagstr, "-"; keepempty=false)
             if isempty(tag_fields)
                 return Pair{String,String}[]
             end
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 1db84cad37a1c..33e2715572018 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -703,7 +703,7 @@ end
 indexoffset(i) = first(i)-1
 indexoffset(::Colon) = 0
 
-@propagate_inbounds function setindex!(B::BitArray, X::AbstractArray, J0::Union{Colon,UnitRange{Int}})
+@propagate_inbounds function setindex!(B::BitArray, X::AbstractArray, J0::Union{Colon,AbstractUnitRange{Int}})
     _setindex!(IndexStyle(B), B, X, to_indices(B, (J0,))[1])
 end
 
@@ -947,6 +947,7 @@ function _deleteat!(B::BitVector, i::Int)
 end
 
 function deleteat!(B::BitVector, i::Integer)
+    i isa Bool && depwarn("passing Bool as an index is deprecated", :deleteat!)
     i = Int(i)
     n = length(B)
     1 <= i <= n || throw(BoundsError(B, i))
@@ -954,7 +955,7 @@ function deleteat!(B::BitVector, i::Integer)
     return _deleteat!(B, i)
 end
 
-function deleteat!(B::BitVector, r::UnitRange{Int})
+function deleteat!(B::BitVector, r::AbstractUnitRange{Int})
     n = length(B)
     i_f = first(r)
     i_l = last(r)
@@ -987,25 +988,27 @@ function deleteat!(B::BitVector, inds)
 
     (p, s) = y
     checkbounds(B, p)
+    p isa Bool && throw(ArgumentError("invalid index $p of type Bool"))
     q = p+1
     new_l -= 1
     y = iterate(inds, s)
     while y !== nothing
         (i, s) = y
         if !(q <= i <= n)
+            i isa Bool && throw(ArgumentError("invalid index $i of type Bool"))
             i < q && throw(ArgumentError("indices must be unique and sorted"))
             throw(BoundsError(B, i))
         end
         new_l -= 1
         if i > q
-            copy_chunks!(Bc, p, Bc, Int(q), Int(i-q))
+            copy_chunks!(Bc, Int(p), Bc, Int(q), Int(i-q))
             p += i-q
         end
         q = i+1
         y = iterate(inds, s)
     end
 
-    q <= n && copy_chunks!(Bc, p, Bc, Int(q), Int(n-q+1))
+    q <= n && copy_chunks!(Bc, Int(p), Bc, Int(q), Int(n-q+1))
 
     delta_k = num_bit_chunks(new_l) - length(Bc)
     delta_k < 0 && _deleteend!(Bc, -delta_k)
@@ -1019,7 +1022,55 @@ function deleteat!(B::BitVector, inds)
     return B
 end
 
+function deleteat!(B::BitVector, inds::AbstractVector{Bool})
+    length(inds) == length(B) || throw(BoundsError(B, inds))
+
+    n = new_l = length(B)
+    y = findfirst(inds)
+    y === nothing && return B
+
+    Bc = B.chunks
+
+    p = y
+    s = y + 1
+    checkbounds(B, p)
+    q = p + 1
+    new_l -= 1
+    y = findnext(inds, s)
+    while y !== nothing
+        i = y
+        s = y + 1
+        new_l -= 1
+        if i > q
+            copy_chunks!(Bc, Int(p), Bc, Int(q), Int(i-q))
+            p += i - q
+        end
+        q = i + 1
+        y = findnext(inds, s)
+    end
+
+    q <= n && copy_chunks!(Bc, Int(p), Bc, Int(q), Int(n - q + 1))
+
+    delta_k = num_bit_chunks(new_l) - length(Bc)
+    delta_k < 0 && _deleteend!(Bc, -delta_k)
+
+    B.len = new_l
+
+    if new_l > 0
+        Bc[end] &= _msk_end(new_l)
+    end
+
+    return B
+end
+
+keepat!(B::BitVector, inds) = _keepat!(B, inds)
+keepat!(B::BitVector, inds::AbstractVector{Bool}) = _keepat!(B, inds)
+
 function splice!(B::BitVector, i::Integer)
+    # TODO: after deprecation remove the four lines below
+    #       as v = B[i] is enough to do both bounds checking
+    #       and Bool check then just pass Int(i) to _deleteat!
+    i isa Bool && depwarn("passing Bool as an index is deprecated", :splice!)
     i = Int(i)
     n = length(B)
     1 <= i <= n || throw(BoundsError(B, i))
@@ -1031,9 +1082,11 @@ end
 
 const _default_bit_splice = BitVector()
 
-function splice!(B::BitVector, r::Union{UnitRange{Int}, Integer}, ins::AbstractArray = _default_bit_splice)
-    _splice_int!(B, isa(r, UnitRange{Int}) ? r : Int(r), ins)
+function splice!(B::BitVector, r::Union{AbstractUnitRange{Int}, Integer}, ins::AbstractArray = _default_bit_splice)
+    r isa Bool && depwarn("passing Bool as an index is deprecated", :splice!)
+    _splice_int!(B, isa(r, AbstractUnitRange{Int}) ? r : Int(r), ins)
 end
+
 function _splice_int!(B::BitVector, r, ins)
     n = length(B)
     i_f, i_l = first(r), last(r)
@@ -1073,7 +1126,7 @@ function _splice_int!(B::BitVector, r, ins)
     return v
 end
 
-function splice!(B::BitVector, r::Union{UnitRange{Int}, Integer}, ins)
+function splice!(B::BitVector, r::Union{AbstractUnitRange{Int}, Integer}, ins)
     Bins = BitVector(undef, length(ins))
     i = 1
     for x in ins
diff --git a/base/boot.jl b/base/boot.jl
index 98b8cf2e9cf40..b9de755ed8125 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -171,7 +171,7 @@ export
     # key types
     Any, DataType, Vararg, NTuple,
     Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
-    AbstractArray, DenseArray, NamedTuple,
+    AbstractArray, DenseArray, NamedTuple, Pair,
     # special objects
     Function, Method,
     Module, Symbol, Task, Array, UndefInitializer, undef, WeakRef, VecElement,
@@ -192,8 +192,8 @@ export
     Expr, QuoteNode, LineNumberNode, GlobalRef,
     # object model functions
     fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!,
-    nfields, throw, tuple, ===, isdefined, eval, ifelse,
-    # sizeof    # not exported, to avoid conflicting with Base.sizeof
+    nfields, throw, tuple, ===, isdefined, eval,
+    # ifelse, sizeof    # not exported, to avoid conflicting with Base
     # type reflection
     <:, typeof, isa, typeassert,
     # method reflection
@@ -267,20 +267,15 @@ struct ErrorException <: Exception
     msg::AbstractString
 end
 
-macro _inline_meta()
-    Expr(:meta, :inline)
-end
-
-macro _noinline_meta()
-    Expr(:meta, :noinline)
-end
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
 
 struct BoundsError <: Exception
     a::Any
     i::Any
     BoundsError() = new()
-    BoundsError(@nospecialize(a)) = (@_noinline_meta; new(a))
-    BoundsError(@nospecialize(a), i) = (@_noinline_meta; new(a,i))
+    BoundsError(@nospecialize(a)) = (@noinline; new(a))
+    BoundsError(@nospecialize(a), i) = (@noinline; new(a,i))
 end
 struct DivideError         <: Exception end
 struct OutOfMemoryError    <: Exception end
@@ -298,8 +293,8 @@ struct InterruptException <: Exception end
 struct DomainError <: Exception
     val
     msg::AbstractString
-    DomainError(@nospecialize(val)) = (@_noinline_meta; new(val, ""))
-    DomainError(@nospecialize(val), @nospecialize(msg)) = (@_noinline_meta; new(val, msg))
+    DomainError(@nospecialize(val)) = (@noinline; new(val, ""))
+    DomainError(@nospecialize(val), @nospecialize(msg)) = (@noinline; new(val, msg))
 end
 struct TypeError <: Exception
     # `func` is the name of the builtin function that encountered a type error,
@@ -320,7 +315,7 @@ struct InexactError <: Exception
     func::Symbol
     T  # Type
     val
-    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@_noinline_meta; new(f, T, val))
+    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@noinline; new(f, T, val))
 end
 struct OverflowError <: Exception
     msg::AbstractString
@@ -433,7 +428,7 @@ eval(Core, :(InterConditional(slot::Int, @nospecialize(vtype), @nospecialize(els
 eval(Core, :(MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) =
     $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))))
 
-Module(name::Symbol=:anonymous, std_imports::Bool=true, using_core::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, using_core)
+Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
 
 function _Task(@nospecialize(f), reserved_stack::Int, completion_future)
     return ccall(:jl_new_task, Ref{Task}, (Any, Any, Int), f, completion_future, reserved_stack)
@@ -609,26 +604,26 @@ eval(Core, :(NamedTuple{names,T}(args::T) where {names, T <: Tuple} =
 
 import .Intrinsics: eq_int, trunc_int, lshr_int, sub_int, shl_int, bitcast, sext_int, zext_int, and_int
 
-throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@_noinline_meta; throw(InexactError(f, T, val)))
+throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@noinline; throw(InexactError(f, T, val)))
 
 function is_top_bit_set(x)
-    @_inline_meta
+    @inline
     eq_int(trunc_int(UInt8, lshr_int(x, sub_int(shl_int(sizeof(x), 3), 1))), trunc_int(UInt8, 1))
 end
 
 function is_top_bit_set(x::Union{Int8,UInt8})
-    @_inline_meta
+    @inline
     eq_int(lshr_int(x, 7), trunc_int(typeof(x), 1))
 end
 
 function check_top_bit(::Type{To}, x) where {To}
-    @_inline_meta
+    @inline
     is_top_bit_set(x) && throw_inexacterror(:check_top_bit, To, x)
     x
 end
 
 function checked_trunc_sint(::Type{To}, x::From) where {To,From}
-    @_inline_meta
+    @inline
     y = trunc_int(To, x)
     back = sext_int(From, y)
     eq_int(x, back) || throw_inexacterror(:trunc, To, x)
@@ -636,7 +631,7 @@ function checked_trunc_sint(::Type{To}, x::From) where {To,From}
 end
 
 function checked_trunc_uint(::Type{To}, x::From) where {To,From}
-    @_inline_meta
+    @inline
     y = trunc_int(To, x)
     back = zext_int(From, y)
     eq_int(x, back) || throw_inexacterror(:trunc, To, x)
@@ -813,4 +808,16 @@ _parse = nothing
 # support for deprecated uses of internal _apply function
 _apply(x...) = Core._apply_iterate(Main.Base.iterate, x...)
 
+struct Pair{A, B}
+    first::A
+    second::B
+    # if we didn't inline this, it's probably because the callsite was actually dynamic
+    # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
+    # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
+    # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
+    Pair(a, b) = new{typeof(a), typeof(b)}(a, b)
+    Pair{A, B}(a::A, b::B) where {A, B} = new(a, b)
+    Pair{Any, Any}(@nospecialize(a::Any), @nospecialize(b::Any)) = new(a, b)
+end
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index c6651e28489a3..971727768a4be 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -8,7 +8,7 @@ Module containing the broadcasting implementation.
 module Broadcast
 
 using .Base.Cartesian
-using .Base: Indices, OneTo, tail, to_shape, isoperator, promote_typejoin, @pure,
+using .Base: Indices, OneTo, tail, to_shape, isoperator, promote_typejoin, promote_typejoin_union, @pure,
              _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache, unalias
 import .Base: copy, copyto!, axes
 export broadcast, broadcast!, BroadcastStyle, broadcast_axes, broadcastable, dotview, @__dot__, BroadcastFunction
@@ -697,7 +697,7 @@ julia> Broadcast.broadcastable("hello") # Strings break convention of matching i
 Base.RefValue{String}("hello")
 ```
 """
-broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair}) = Ref(x)
+broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair,IO}) = Ref(x)
 broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T)
 broadcastable(x::Union{AbstractArray,Number,AbstractChar,Ref,Tuple,Broadcasted}) = x
 # Default to collecting iterables — which will error for non-iterables
@@ -713,50 +713,6 @@ eltypes(t::Tuple{Any}) = Tuple{_broadcast_getindex_eltype(t[1])}
 eltypes(t::Tuple{Any,Any}) = Tuple{_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2])}
 eltypes(t::Tuple) = Tuple{_broadcast_getindex_eltype(t[1]), eltypes(tail(t)).types...}
 
-function promote_typejoin_union(::Type{T}) where T
-    if T === Union{}
-        return Union{}
-    elseif T isa UnionAll
-        return Any # TODO: compute more precise bounds
-    elseif T isa Union
-        return promote_typejoin(promote_typejoin_union(T.a), promote_typejoin_union(T.b))
-    elseif T <: Tuple
-        return typejoin_union_tuple(T)
-    else
-        return T
-    end
-end
-
-@pure function typejoin_union_tuple(T::Type)
-    u = Base.unwrap_unionall(T)
-    u isa Union && return typejoin(
-            typejoin_union_tuple(Base.rewrap_unionall(u.a, T)),
-            typejoin_union_tuple(Base.rewrap_unionall(u.b, T)))
-    p = (u::DataType).parameters
-    lr = length(p)::Int
-    if lr == 0
-        return Tuple{}
-    end
-    c = Vector{Any}(undef, lr)
-    for i = 1:lr
-        pi = p[i]
-        U = Core.Compiler.unwrapva(pi)
-        if U === Union{}
-            ci = Union{}
-        elseif U isa Union
-            ci = typejoin(U.a, U.b)
-        else
-            ci = U
-        end
-        if i == lr && Core.Compiler.isvarargtype(pi)
-            c[i] = isdefined(pi, :N) ? Vararg{ci, pi.N} : Vararg{ci}
-        else
-            c[i] = ci
-        end
-    end
-    return Base.rewrap_unionall(Tuple{c...}, T)
-end
-
 # Inferred eltype of result of broadcast(f, args...)
 combine_eltypes(f, args::Tuple) =
     promote_typejoin_union(Base._return_type(f, eltypes(args)))
@@ -985,8 +941,8 @@ broadcast_unalias(::Nothing, src) = src
 preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
 
 @inline preprocess_args(dest, args::Tuple) = (preprocess(dest, args[1]), preprocess_args(dest, tail(args))...)
-preprocess_args(dest, args::Tuple{Any}) = (preprocess(dest, args[1]),)
-preprocess_args(dest, args::Tuple{}) = ()
+@inline preprocess_args(dest, args::Tuple{Any}) = (preprocess(dest, args[1]),)
+@inline preprocess_args(dest, args::Tuple{}) = ()
 
 # Specialize this method if all you want to do is specialize on typeof(dest)
 @inline function copyto!(dest::AbstractArray, bc::Broadcasted{Nothing})
@@ -1121,19 +1077,22 @@ end
 
 ## scalar-range broadcast operations ##
 # DefaultArrayStyle and \ are not available at the time of range.jl
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange) = r
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::StepRangeLen) = r
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::LinRange) = r
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractRange) = r
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange) = range(-first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange) = range(-first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange) = range(-first(r), -last(r), step=-step(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen) = StepRangeLen(-r.ref, -r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::LinRange) = LinRange(-r.start, -r.stop, length(r))
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, length=length(r))
 # For #18336 we need to prevent promotion of the step type:
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractRange, x::Number) = range(first(r) + x, step=step(r), length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::AbstractRange) = range(x + first(r), step=step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange, x::Integer) = range(first(r) + x, last(r) + x, step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::OrdinalRange) = range(x + first(r), x + last(r), step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Integer) = range(first(r) + x, last(r) + x)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::AbstractUnitRange) = range(x + first(r), x + last(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::StepRangeLen{T}, x::Number) where T =
     StepRangeLen{typeof(T(r.ref)+x)}(r.ref + x, r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::StepRangeLen{T}) where T =
@@ -1142,9 +1101,12 @@ broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::LinRange, x::Number) = LinRa
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::LinRange) = LinRange(x + r.start, x + r.stop, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r1::AbstractRange, r2::AbstractRange) = r1 + r2
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Number) = range(first(r)-x, length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange, x::Number) = range(first(r)-x, step=step(r), length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::AbstractRange) = range(x-first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange, x::Number) = range(first(r) - x, step=step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::AbstractRange) = range(x - first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange, x::Integer) = range(first(r) - x, last(r) - x, step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Integer, r::OrdinalRange) = range(x - first(r), x - last(r), step=-step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Integer) = range(first(r) - x, last(r) - x)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Real) = range(first(r) - x, length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen{T}, x::Number) where T =
     StepRangeLen{typeof(T(r.ref)-x)}(r.ref - x, r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::StepRangeLen{T}) where T =
diff --git a/base/c.jl b/base/c.jl
index a26f41856dc8f..fb0d4e7dc0583 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -270,6 +270,21 @@ reasonably represented in the target encoding; it always succeeds for
 conversions between UTF-XX encodings, even for invalid Unicode data.
 
 Only conversion to/from UTF-8 is currently supported.
+
+# Examples
+```jldoctest
+julia> str = "αβγ"
+"αβγ"
+
+julia> transcode(UInt16, str)
+3-element Vector{UInt16}:
+ 0x03b1
+ 0x03b2
+ 0x03b3
+
+julia> transcode(String, transcode(UInt16, str))
+"αβγ"
+```
 """
 function transcode end
 
@@ -533,6 +548,12 @@ function expand_ccallable(rt, def)
     error("expected method definition in @ccallable")
 end
 
+"""
+    @ccallable(def)
+
+Make the annotated function be callable from C using its name. This can, for example,
+be used to expose functionality as a C-API when creating a custom Julia sysimage.
+"""
 macro ccallable(def)
     expand_ccallable(nothing, def)
 end
diff --git a/base/channels.jl b/base/channels.jl
index 1557504bbe21e..b0c8c44667e2e 100644
--- a/base/channels.jl
+++ b/base/channels.jl
@@ -33,10 +33,11 @@ mutable struct Channel{T} <: AbstractChannel{T}
     cond_take::Threads.Condition                 # waiting for data to become available
     cond_wait::Threads.Condition                 # waiting for data to become maybe available
     cond_put::Threads.Condition                  # waiting for a writeable slot
-    state::Symbol
+    @atomic state::Symbol
     excp::Union{Exception, Nothing}      # exception to be thrown when state !== :open
 
     data::Vector{T}
+    @atomic n_avail_items::Int           # Available items for taking, can be read without lock
     sz_max::Int                          # maximum size of channel
 
     function Channel{T}(sz::Integer = 0) where T
@@ -46,7 +47,7 @@ mutable struct Channel{T} <: AbstractChannel{T}
         lock = ReentrantLock()
         cond_put, cond_take = Threads.Condition(lock), Threads.Condition(lock)
         cond_wait = (sz == 0 ? Threads.Condition(lock) : cond_take) # wait is distinct from take iff unbuffered
-        return new(cond_take, cond_wait, cond_put, :open, nothing, Vector{T}(), sz)
+        return new(cond_take, cond_wait, cond_put, :open, nothing, Vector{T}(), 0, sz)
     end
 end
 
@@ -121,7 +122,7 @@ julia> chnl = Channel{Char}(1, spawn=true) do ch
                put!(ch, c)
            end
        end
-Channel{Char}(1) (1 item available)
+Channel{Char}(1) (2 items available)
 
 julia> String(collect(chnl))
 "hello world"
@@ -166,6 +167,8 @@ isbuffered(c::Channel) = c.sz_max==0 ? false : true
 
 function check_channel_state(c::Channel)
     if !isopen(c)
+        # if the monotonic load succeed, now do an acquire fence
+        (@atomic :acquire c.state) === :open && concurrency_violation()
         excp = c.excp
         excp !== nothing && throw(excp)
         throw(closed_exception())
@@ -182,8 +185,8 @@ Close a channel. An exception (optionally given by `excp`), is thrown by:
 function close(c::Channel, excp::Exception=closed_exception())
     lock(c)
     try
-        c.state = :closed
         c.excp = excp
+        @atomic :release c.state = :closed
         notify_error(c.cond_take, excp)
         notify_error(c.cond_wait, excp)
         notify_error(c.cond_put, excp)
@@ -192,7 +195,7 @@ function close(c::Channel, excp::Exception=closed_exception())
     end
     nothing
 end
-isopen(c::Channel) = (c.state === :open)
+isopen(c::Channel) = ((@atomic :monotonic c.state) === :open)
 
 """
     bind(chnl::Channel, task::Task)
@@ -298,6 +301,7 @@ struct InvalidStateException <: Exception
     msg::String
     state::Symbol
 end
+showerror(io::IO, ex::InvalidStateException) = print(io, "InvalidStateException: ", ex.msg)
 
 """
     put!(c::Channel, v)
@@ -316,17 +320,36 @@ function put!(c::Channel{T}, v) where T
     return isbuffered(c) ? put_buffered(c, v) : put_unbuffered(c, v)
 end
 
+# Atomically update channel n_avail, *assuming* we hold the channel lock.
+function _increment_n_avail(c, inc)
+    # We hold the channel lock so it's safe to non-atomically read and
+    # increment c.n_avail_items
+    newlen = c.n_avail_items + inc
+    # Atomically store c.n_avail_items to prevent data races with other threads
+    # reading this outside the lock.
+    @atomic :monotonic c.n_avail_items = newlen
+end
+
 function put_buffered(c::Channel, v)
     lock(c)
+    did_buffer = false
     try
+        # Increment channel n_avail eagerly (before push!) to count data in the
+        # buffer as well as offers from tasks which are blocked in wait().
+        _increment_n_avail(c, 1)
         while length(c.data) == c.sz_max
             check_channel_state(c)
             wait(c.cond_put)
         end
+        check_channel_state(c)
         push!(c.data, v)
+        did_buffer = true
         # notify all, since some of the waiters may be on a "fetch" call.
         notify(c.cond_take, nothing, true, false)
     finally
+        # Decrement the available items if this task had an exception before pushing the
+        # item to the buffer (e.g., during `wait(c.cond_put)`):
+        did_buffer || _increment_n_avail(c, -1)
         unlock(c)
     end
     return v
@@ -335,14 +358,17 @@ end
 function put_unbuffered(c::Channel, v)
     lock(c)
     taker = try
+        _increment_n_avail(c, 1)
         while isempty(c.cond_take.waitq)
             check_channel_state(c)
             notify(c.cond_wait)
             wait(c.cond_put)
         end
+        check_channel_state(c)
         # unfair scheduled version of: notify(c.cond_take, v, false, false); yield()
         popfirst!(c.cond_take.waitq)
     finally
+        _increment_n_avail(c, -1)
         unlock(c)
     end
     schedule(taker, v)
@@ -389,6 +415,7 @@ function take_buffered(c::Channel)
             wait(c.cond_take)
         end
         v = popfirst!(c.data)
+        _increment_n_avail(c, -1)
         notify(c.cond_put, nothing, false, false) # notify only one, since only one slot has become available for a put!.
         return v
     finally
@@ -418,8 +445,11 @@ For unbuffered channels returns `true` if there are tasks waiting
 on a [`put!`](@ref).
 """
 isready(c::Channel) = n_avail(c) > 0
-n_avail(c::Channel) = isbuffered(c) ? length(c.data) : length(c.cond_put.waitq)
-isempty(c::Channel) = isbuffered(c) ? isempty(c.data) : isempty(c.cond_put.waitq)
+isempty(c::Channel) = n_avail(c) == 0
+function n_avail(c::Channel)
+    # Lock-free equivalent to `length(c.data) + length(c.cond_put.waitq)`
+    @atomic :monotonic c.n_avail_items
+end
 
 lock(c::Channel) = lock(c.cond_take)
 lock(f, c::Channel) = lock(f, c.cond_take)
@@ -455,7 +485,7 @@ function show(io::IO, ::MIME"text/plain", c::Channel)
                 print(io, " (empty)")
             else
                 s = n == 1 ? "" : "s"
-                print(io, " (", n_avail(c), " item$s available)")
+                print(io, " (", n, " item$s available)")
             end
         end
     end
diff --git a/base/char.jl b/base/char.jl
index 0584471cb6a33..c8b1c28166bbf 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -45,10 +45,10 @@ represents a valid Unicode character.
 """
 Char
 
-@aggressive_constprop (::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
-@aggressive_constprop AbstractChar(x::Number) = Char(x)
-@aggressive_constprop (::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
-@aggressive_constprop (::Type{T})(x::AbstractChar) where {T<:Union{Int32,Int64}} = codepoint(x) % T
+@constprop :aggressive (::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
+@constprop :aggressive AbstractChar(x::Number) = Char(x)
+@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
+@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Int32,Int64}} = codepoint(x) % T
 (::Type{T})(x::T) where {T<:AbstractChar} = x
 
 """
@@ -75,7 +75,7 @@ return a different-sized integer (e.g. `UInt8`).
 """
 function codepoint end
 
-@aggressive_constprop codepoint(c::Char) = UInt32(c)
+@constprop :aggressive codepoint(c::Char) = UInt32(c)
 
 struct InvalidCharError{T<:AbstractChar} <: Exception
     char::T
@@ -124,7 +124,7 @@ See also [`decode_overlong`](@ref) and [`show_invalid`](@ref).
 """
 isoverlong(c::AbstractChar) = false
 
-@aggressive_constprop function UInt32(c::Char)
+@constprop :aggressive function UInt32(c::Char)
     # TODO: use optimized inline LLVM
     u = bitcast(UInt32, c)
     u < 0x80000000 && return u >> 24
@@ -148,7 +148,7 @@ that support overlong encodings should implement `Base.decode_overlong`.
 """
 function decode_overlong end
 
-@aggressive_constprop function decode_overlong(c::Char)
+@constprop :aggressive function decode_overlong(c::Char)
     u = bitcast(UInt32, c)
     l1 = leading_ones(u)
     t0 = trailing_zeros(u) & 56
@@ -158,7 +158,7 @@ function decode_overlong end
     ((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
 end
 
-@aggressive_constprop function Char(u::UInt32)
+@constprop :aggressive function Char(u::UInt32)
     u < 0x80 && return bitcast(Char, u << 24)
     u < 0x00200000 || throw_code_point_err(u)
     c = ((u << 0) & 0x0000003f) | ((u << 2) & 0x00003f00) |
@@ -169,14 +169,14 @@ end
     bitcast(Char, c)
 end
 
-@aggressive_constprop @noinline UInt32_cold(c::Char) = UInt32(c)
-@aggressive_constprop function (T::Union{Type{Int8},Type{UInt8}})(c::Char)
+@constprop :aggressive @noinline UInt32_cold(c::Char) = UInt32(c)
+@constprop :aggressive function (T::Union{Type{Int8},Type{UInt8}})(c::Char)
     i = bitcast(Int32, c)
     i ≥ 0 ? ((i >>> 24) % T) : T(UInt32_cold(c))
 end
 
-@aggressive_constprop @noinline Char_cold(b::UInt32) = Char(b)
-@aggressive_constprop function Char(b::Union{Int8,UInt8})
+@constprop :aggressive @noinline Char_cold(b::UInt32) = Char(b)
+@constprop :aggressive function Char(b::Union{Int8,UInt8})
     0 ≤ b ≤ 0x7f ? bitcast(Char, (b % UInt32) << 24) : Char_cold(UInt32(b))
 end
 
diff --git a/base/checked.jl b/base/checked.jl
index ba23d4c5acd2b..ad92a44e1e5bc 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -13,7 +13,7 @@ import Core.Intrinsics:
        checked_srem_int,
        checked_uadd_int, checked_usub_int, checked_umul_int, checked_udiv_int,
        checked_urem_int
-import ..no_op_err, ..@_inline_meta, ..@_noinline_meta, ..checked_length
+import ..no_op_err, ..@inline, ..@noinline, ..checked_length
 
 # define promotion behavior for checked operations
 checked_add(x::Integer, y::Integer) = checked_add(promote(x,y)...)
@@ -86,7 +86,7 @@ The overflow protection may impose a perceptible performance penalty.
 function checked_neg(x::T) where T<:Integer
     checked_sub(T(0), x)
 end
-throw_overflowerr_negation(x) = (@_noinline_meta;
+throw_overflowerr_negation(x) = (@noinline;
     throw(OverflowError(Base.invokelatest(string, "checked arithmetic: cannot compute -x for x = ", x, "::", typeof(x)))))
 if BrokenSignedInt != Union{}
 function checked_neg(x::BrokenSignedInt)
@@ -150,7 +150,7 @@ end
 end
 
 
-throw_overflowerr_binaryop(op, x, y) = (@_noinline_meta;
+throw_overflowerr_binaryop(op, x, y) = (@noinline;
     throw(OverflowError(Base.invokelatest(string, x, " ", op, " ", y, " overflowed for type ", typeof(x)))))
 
 """
@@ -161,7 +161,7 @@ Calculates `x+y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_add(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = add_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:+, x, y)
     z
@@ -218,7 +218,7 @@ Calculates `x-y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_sub(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = sub_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:-, x, y)
     z
@@ -283,7 +283,7 @@ Calculates `x*y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_mul(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = mul_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:*, x, y)
     z
diff --git a/base/client.jl b/base/client.jl
index 7e5f1ab5c5d58..8a3df71dc0b4e 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -84,27 +84,33 @@ end
 
 function scrub_repl_backtrace(bt)
     if bt !== nothing && !(bt isa Vector{Any}) # ignore our sentinel value types
-        bt = stacktrace(bt)
+        bt = bt isa Vector{StackFrame} ? copy(bt) : stacktrace(bt)
         # remove REPL-related frames from interactive printing
         eval_ind = findlast(frame -> !frame.from_c && frame.func === :eval, bt)
         eval_ind === nothing || deleteat!(bt, eval_ind:length(bt))
     end
     return bt
 end
+scrub_repl_backtrace(stack::ExceptionStack) =
+    ExceptionStack(Any[(;x.exception, backtrace = scrub_repl_backtrace(x.backtrace)) for x in stack])
 
-function display_error(io::IO, er, bt)
+istrivialerror(stack::ExceptionStack) =
+    length(stack) == 1 && length(stack[1].backtrace) ≤ 1
+    # frame 1 = top level; assumes already went through scrub_repl_backtrace
+
+function display_error(io::IO, stack::ExceptionStack)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
-    bt = scrub_repl_backtrace(bt)
-    showerror(IOContext(io, :limit => true), er, bt, backtrace = bt!==nothing)
+    show_exception_stack(IOContext(io, :limit => true), stack)
     println(io)
 end
-function display_error(io::IO, stack::ExceptionStack)
+display_error(stack::ExceptionStack) = display_error(stderr, stack)
+
+# these forms are depended on by packages outside Julia
+function display_error(io::IO, er, bt)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
-    bt = Any[ (x[1], scrub_repl_backtrace(x[2])) for x in stack ]
-    show_exception_stack(IOContext(io, :limit => true), bt)
+    showerror(IOContext(io, :limit => true), er, bt, backtrace = bt!==nothing)
     println(io)
 end
-display_error(stack::ExceptionStack) = display_error(stderr, stack)
 display_error(er, bt=nothing) = display_error(stderr, er, bt)
 
 function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
@@ -117,6 +123,8 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
                 print(color_normal)
             end
             if lasterr !== nothing
+                lasterr = scrub_repl_backtrace(lasterr)
+                istrivialerror(lasterr) || ccall(:jl_set_global, Cvoid, (Any, Any, Any), Main, :err, lasterr)
                 invokelatest(display_error, errio, lasterr)
                 errcount = 0
                 lasterr = nothing
@@ -143,7 +151,8 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
                 @error "SYSTEM: display_error(errio, lasterr) caused an error"
             end
             errcount += 1
-            lasterr = current_exceptions()
+            lasterr = scrub_repl_backtrace(current_exceptions())
+            ccall(:jl_set_global, Cvoid, (Any, Any, Any), Main, :err, lasterr)
             if errcount > 2
                 @error "It is likely that something important is broken, and Julia will not be able to continue normally" errcount
                 break
@@ -252,13 +261,16 @@ function exec_options(opts)
         invokelatest(Main.Distributed.process_opts, opts)
     end
 
+    interactiveinput = (repl || is_interactive::Bool) && isa(stdin, TTY)
+    is_interactive::Bool |= interactiveinput
+
     # load ~/.julia/config/startup.jl file
     if startup
         try
             load_julia_startup()
         catch
-            invokelatest(display_error, current_exceptions())
-            !(repl || is_interactive) && exit(1)
+            invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
+            !(repl || is_interactive::Bool) && exit(1)
         end
     end
 
@@ -291,17 +303,14 @@ function exec_options(opts)
         try
             include(Main, PROGRAM_FILE)
         catch
-            invokelatest(display_error, current_exceptions())
+            invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
             if !is_interactive::Bool
                 exit(1)
             end
         end
     end
-    repl |= is_interactive::Bool
-    if repl
-        interactiveinput = isa(stdin, TTY)
+    if repl || is_interactive::Bool
         if interactiveinput
-            global is_interactive = true
             banner = (opts.banner != 0) # --banner!=no
         else
             banner = (opts.banner == 1) # --banner=yes
@@ -472,6 +481,8 @@ Returns the result of the last evaluated expression of the input file. During in
 a task-local include path is set to the directory containing the file. Nested calls to
 `include` will search relative to that path. This function is typically used to load source
 interactively, or to combine files in packages that are broken into multiple source files.
+The argument `path` is normalized using [`normpath`](@ref) which will resolve
+relative path tokens such as `..` and convert `/` to the appropriate path separator.
 
 The optional first argument `mapexpr` can be used to transform the included code before
 it is evaluated: for each parsed expression `expr` in `path`, the `include` function
@@ -494,7 +505,7 @@ function _start()
     try
         exec_options(JLOptions())
     catch
-        invokelatest(display_error, current_exceptions())
+        invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
         exit(1)
     end
     if is_interactive && get(stdout, :color, false)
diff --git a/base/cmd.jl b/base/cmd.jl
index ff52191fc51ff..dc8fae2ab052f 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -167,6 +167,7 @@ rawhandle(x::OS_HANDLE) = x
 if OS_HANDLE !== RawFD
     rawhandle(x::RawFD) = Libc._get_osfhandle(x)
 end
+setup_stdio(stdio::Union{DevNull,OS_HANDLE,RawFD}, ::Bool) = (stdio, false)
 
 const Redirectable = Union{IO, FileRedirect, RawFD, OS_HANDLE}
 const StdIOSet = NTuple{3, Redirectable}
@@ -232,7 +233,7 @@ byteenv(env::Union{AbstractVector{Pair{T,V}}, Tuple{Vararg{Pair{T,V}}}}) where {
     String[cstr(k*"="*string(v)) for (k,v) in env]
 
 """
-    setenv(command::Cmd, env; dir="")
+    setenv(command::Cmd, env; dir)
 
 Set environment variables to use when running the given `command`. `env` is either a
 dictionary mapping strings to strings, an array of strings of the form `"var=val"`, or
@@ -241,13 +242,15 @@ existing environment, create `env` through `copy(ENV)` and then setting `env["va
 as desired, or use [`addenv`](@ref).
 
 The `dir` keyword argument can be used to specify a working directory for the command.
+`dir` defaults to the currently set `dir` for `command` (which is the current working
+directory if not specified already).
 
 See also [`Cmd`](@ref), [`addenv`](@ref), [`ENV`](@ref), [`pwd`](@ref).
 """
-setenv(cmd::Cmd, env; dir="") = Cmd(cmd; env=byteenv(env), dir=dir)
-setenv(cmd::Cmd, env::Pair{<:AbstractString}...; dir="") =
+setenv(cmd::Cmd, env; dir=cmd.dir) = Cmd(cmd; env=byteenv(env), dir=dir)
+setenv(cmd::Cmd, env::Pair{<:AbstractString}...; dir=cmd.dir) =
     setenv(cmd, env; dir=dir)
-setenv(cmd::Cmd; dir="") = Cmd(cmd; dir=dir)
+setenv(cmd::Cmd; dir=cmd.dir) = Cmd(cmd; dir=dir)
 
 """
     addenv(command::Cmd, env...; inherit::Bool = true)
@@ -255,6 +258,7 @@ setenv(cmd::Cmd; dir="") = Cmd(cmd; dir=dir)
 Merge new environment mappings into the given [`Cmd`](@ref) object, returning a new `Cmd` object.
 Duplicate keys are replaced.  If `command` does not contain any environment values set already,
 it inherits the current environment at time of `addenv()` call if `inherit` is `true`.
+Keys with value `nothing` are deleted from the env.
 
 See also [`Cmd`](@ref), [`setenv`](@ref), [`ENV`](@ref).
 
@@ -268,12 +272,16 @@ function addenv(cmd::Cmd, env::Dict; inherit::Bool = true)
             merge!(new_env, ENV)
         end
     else
-        for (k, v) in split.(cmd.env, "=")
+        for (k, v) in eachsplit.(cmd.env, "=")
             new_env[string(k)::String] = string(v)::String
         end
     end
     for (k, v) in env
-        new_env[string(k)::String] = string(v)::String
+        if v === nothing
+            delete!(new_env, string(k)::String)
+        else
+            new_env[string(k)::String] = string(v)::String
+        end
     end
     return setenv(cmd, new_env)
 end
@@ -283,7 +291,7 @@ function addenv(cmd::Cmd, pairs::Pair{<:AbstractString}...; inherit::Bool = true
 end
 
 function addenv(cmd::Cmd, env::Vector{<:AbstractString}; inherit::Bool = true)
-    return addenv(cmd, Dict(k => v for (k, v) in split.(env, "=")); inherit)
+    return addenv(cmd, Dict(k => v for (k, v) in eachsplit.(env, "=")); inherit)
 end
 
 (&)(left::AbstractCmd, right::AbstractCmd) = AndCmds(left, right)
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index daa534e068af6..2dd69fbce4c42 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -103,6 +103,18 @@ function swapcols!(a::AbstractMatrix, i, j)
         @inbounds a[k,i],a[k,j] = a[k,j],a[k,i]
     end
 end
+
+# swap rows i and j of a, in-place
+function swaprows!(a::AbstractMatrix, i, j)
+    i == j && return
+    rows = axes(a,1)
+    @boundscheck i in rows || throw(BoundsError(a, (:,i)))
+    @boundscheck j in rows || throw(BoundsError(a, (:,j)))
+    for k in axes(a,2)
+        @inbounds a[i,k],a[j,k] = a[j,k],a[i,k]
+    end
+end
+
 # like permute!! applied to each row of a, in-place in a (overwriting p).
 function permutecols!!(a::AbstractMatrix, p::AbstractVector{<:Integer})
     require_one_based_indexing(a, p)
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 23b00134c6071..b1d4eceefc2cf 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -16,92 +16,23 @@ const _REF_NAME = Ref.body.name
 call_result_unused(frame::InferenceState) =
     isexpr(frame.src.code[frame.currpc], :call) && isempty(frame.ssavalue_uses[frame.currpc])
 
-# check if this return type is improvable (i.e. whether it's possible that with
-# more information, we might get a more precise type)
-function is_improvable(@nospecialize(rtype))
-    if isa(rtype, Type)
-        # Could always be improved to Const or PartialStruct, unless we're
-        # already at Bottom
-        return rtype !== Union{}
-    end
-    # Could be improved to `Const` or a more precise wrapper
-    return isa(rtype, PartialStruct) || isa(rtype, InterConditional)
-end
-
 function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
-                                  fargs::Union{Nothing,Vector{Any}}, argtypes::Vector{Any}, @nospecialize(atype),
+                                  arginfo::ArgInfo, @nospecialize(atype),
                                   sv::InferenceState, max_methods::Int = InferenceParams(interp).MAX_METHODS)
-    if sv.params.unoptimize_throw_blocks && sv.currpc in sv.throw_blocks
+    if sv.params.unoptimize_throw_blocks && is_stmt_throw_block(get_curr_ssaflag(sv))
         add_remark!(interp, sv, "Skipped call in throw block")
         return CallMeta(Any, false)
     end
-    valid_worlds = WorldRange()
-    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    splitunions = 1 < unionsplitcost(argtypes) <= InferenceParams(interp).MAX_UNION_SPLITTING
-    mts = Core.MethodTable[]
-    fullmatch = Bool[]
-    if splitunions
-        split_argtypes = switchtupleunion(argtypes)
-        applicable = Any[]
-        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
-        infos = MethodMatchInfo[]
-        for arg_n in split_argtypes
-            sig_n = argtypes_to_type(arg_n)
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
-            if mt === nothing
-                add_remark!(interp, sv, "Could not identify method table for call")
-                return CallMeta(Any, false)
-            end
-            mt = mt::Core.MethodTable
-            matches = findall(sig_n, method_table(interp); limit=max_methods)
-            if matches === missing
-                add_remark!(interp, sv, "For one of the union split cases, too many methods matched")
-                return CallMeta(Any, false)
-            end
-            push!(infos, MethodMatchInfo(matches))
-            for m in matches
-                push!(applicable, m)
-                push!(applicable_argtypes, arg_n)
-            end
-            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
-            found = false
-            for (i, mt′) in enumerate(mts)
-                if mt′ === mt
-                    fullmatch[i] &= thisfullmatch
-                    found = true
-                    break
-                end
-            end
-            if !found
-                push!(mts, mt)
-                push!(fullmatch, thisfullmatch)
-            end
-        end
-        info = UnionSplitInfo(infos)
-    else
-        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-        if mt === nothing
-            add_remark!(interp, sv, "Could not identify method table for call")
-            return CallMeta(Any, false)
-        end
-        mt = mt::Core.MethodTable
-        matches = findall(atype, method_table(interp, sv); limit=max_methods)
-        if matches === missing
-            # this means too many methods matched
-            # (assume this will always be true, so we don't compute / update valid age in this case)
-            add_remark!(interp, sv, "Too many methods matched")
-            return CallMeta(Any, false)
-        end
-        push!(mts, mt)
-        push!(fullmatch, _any(match->(match::MethodMatch).fully_covers, matches))
-        info = MethodMatchInfo(matches)
-        applicable = matches.matches
-        valid_worlds = matches.valid_worlds
-        applicable_argtypes = nothing
+
+    argtypes = arginfo.argtypes
+    matches = find_matching_methods(argtypes, atype, method_table(interp, sv), InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
+    if isa(matches, FailedMethodMatch)
+        add_remark!(interp, sv, matches.reason)
+        return CallMeta(Any, false)
     end
+
+    (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
-    applicable = applicable::Array{Any,1}
     napplicable = length(applicable)
     rettype = Bottom
     edges = MethodInstance[]
@@ -119,6 +50,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         end
     end
 
+    fargs = arginfo.fargs
     for i in 1:napplicable
         match = applicable[i]::MethodMatch
         method = match.method
@@ -142,10 +74,14 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                 if edge !== nothing
                     push!(edges, edge)
                 end
-                this_argtypes = applicable_argtypes === nothing ? argtypes : applicable_argtypes[i]
-                const_rt, const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
-                if const_rt !== rt && const_rt ⊑ rt
-                    rt = const_rt
+                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
+                this_arginfo = ArgInfo(fargs, this_argtypes)
+                const_result = abstract_call_method_with_const_args(interp, result, f, this_arginfo, match, sv, false)
+                if const_result !== nothing
+                    const_rt, const_result = const_result
+                    if const_rt !== rt && const_rt ⊑ rt
+                        rt = const_rt
+                    end
                 end
                 push!(const_results, const_result)
                 if const_result !== nothing
@@ -164,10 +100,14 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
             end
             # try constant propagation with argtypes for this match
             # this is in preparation for inlining, or improving the return result
-            this_argtypes = applicable_argtypes === nothing ? argtypes : applicable_argtypes[i]
-            const_this_rt, const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
-            if const_this_rt !== this_rt && const_this_rt ⊑ this_rt
-                this_rt = const_this_rt
+            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
+            this_arginfo = ArgInfo(fargs, this_argtypes)
+            const_result = abstract_call_method_with_const_args(interp, result, f, this_arginfo, match, sv, false)
+            if const_result !== nothing
+                const_this_rt, const_result = const_result
+                if const_this_rt !== this_rt && const_this_rt ⊑ this_rt
+                    this_rt = const_this_rt
+                end
             end
             push!(const_results, const_result)
             if const_result !== nothing
@@ -184,19 +124,10 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                 conditionals = Any[Bottom for _ in 1:length(argtypes)],
                                Any[Bottom for _ in 1:length(argtypes)]
             end
-            condval = maybe_extract_const_bool(this_conditional)
             for i = 1:length(argtypes)
-                fargs[i] isa SlotNumber || continue
-                if this_conditional isa InterConditional && this_conditional.slot == i
-                    vtype = this_conditional.vtype
-                    elsetype = this_conditional.elsetype
-                else
-                    elsetype = vtype = tmeet(argtypes[i], fieldtype(sig, i))
-                    condval === true && (elsetype = Union{})
-                    condval === false && (vtype = Union{})
-                end
-                conditionals[1][i] = tmerge(conditionals[1][i], vtype)
-                conditionals[2][i] = tmerge(conditionals[2][i], elsetype)
+                cnd = conditional_argtype(this_conditional, sig, argtypes, i)
+                conditionals[1][i] = tmerge(conditionals[1][i], cnd.vtype)
+                conditionals[2][i] = tmerge(conditionals[2][i], cnd.elsetype)
             end
         end
         if bail_out_call(interp, rettype, sv)
@@ -211,59 +142,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         info = ConstCallInfo(info, const_results)
     end
 
-    if rettype isa LimitedAccuracy
-        union!(sv.pclimitations, rettype.causes)
-        rettype = rettype.typ
-    end
-    # if we have argument refinement information, apply that now to get the result
-    if is_lattice_bool(rettype) && conditionals !== nothing && fargs !== nothing
-        slot = 0
-        vtype = elsetype = Any
-        condval = maybe_extract_const_bool(rettype)
-        for i in 1:length(fargs)
-            # find the first argument which supports refinment,
-            # and intersect all equvalent arguments with it
-            arg = fargs[i]
-            arg isa SlotNumber || continue # can't refine
-            old = argtypes[i]
-            old isa Type || continue # unlikely to refine
-            id = slot_id(arg)
-            if slot == 0 || id == slot
-                new_vtype = conditionals[1][i]
-                if condval === false
-                    vtype = Union{}
-                elseif new_vtype ⊑ vtype
-                    vtype = new_vtype
-                else
-                    vtype = tmeet(vtype, widenconst(new_vtype))
-                end
-                new_elsetype = conditionals[2][i]
-                if condval === true
-                    elsetype = Union{}
-                elseif new_elsetype ⊑ elsetype
-                    elsetype = new_elsetype
-                else
-                    elsetype = tmeet(elsetype, widenconst(new_elsetype))
-                end
-                if (slot > 0 || condval !== false) && !(old ⊑ vtype) # essentially vtype ⋤ old
-                    slot = id
-                elseif (slot > 0 || condval !== true) && !(old ⊑ elsetype) # essentially elsetype ⋤ old
-                    slot = id
-                else # reset: no new useful information for this slot
-                    vtype = elsetype = Any
-                    if slot > 0
-                        slot = 0
-                    end
-                end
-            end
-        end
-        if vtype === Bottom && elsetype === Bottom
-            rettype = Bottom # accidentally proved this call to be dead / throw !
-        elseif slot > 0
-            rettype = Conditional(SlotNumber(slot), vtype, elsetype) # record a Conditional improvement to this slot
-        end
-    end
-    @assert !(rettype isa InterConditional) "invalid lattice element returned from inter-procedural context"
+    rettype = from_interprocedural!(rettype, sv, arginfo, conditionals)
 
     if call_result_unused(sv) && !(rettype === Bottom)
         add_remark!(interp, sv, "Call result type was widened because the return value is unused")
@@ -275,7 +154,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         # and avoid keeping track of a more complex result type.
         rettype = Any
     end
-    add_call_backedges!(interp, rettype, edges, fullmatch, mts, atype, sv)
+    add_call_backedges!(interp, rettype, edges, matches, atype, sv)
     if !isempty(sv.pclimitations) # remove self, if present
         delete!(sv.pclimitations, sv)
         for caller in sv.callers_in_cycle
@@ -286,24 +165,221 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     return CallMeta(rettype, info)
 end
 
-function add_call_backedges!(interp::AbstractInterpreter,
-                             @nospecialize(rettype),
-                             edges::Vector{MethodInstance},
-                             fullmatch::Vector{Bool}, mts::Vector{Core.MethodTable}, @nospecialize(atype),
-                             sv::InferenceState)
-    if rettype === Any
-        # for `NativeInterpreter`, we don't add backedges when a new method couldn't refine
-        # (widen) this type
-        return
+struct FailedMethodMatch
+    reason::String
+end
+
+struct MethodMatches
+    applicable::Vector{Any}
+    info::MethodMatchInfo
+    valid_worlds::WorldRange
+    mt::Core.MethodTable
+    fullmatch::Bool
+end
+
+struct UnionSplitMethodMatches
+    applicable::Vector{Any}
+    applicable_argtypes::Vector{Vector{Any}}
+    info::UnionSplitInfo
+    valid_worlds::WorldRange
+    mts::Vector{Core.MethodTable}
+    fullmatches::Vector{Bool}
+end
+
+function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
+                               union_split::Int, max_methods::Int)
+    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
+    if 1 < unionsplitcost(argtypes) <= union_split
+        split_argtypes = switchtupleunion(argtypes)
+        infos = MethodMatchInfo[]
+        applicable = Any[]
+        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
+        valid_worlds = WorldRange()
+        mts = Core.MethodTable[]
+        fullmatches = Bool[]
+        for i in 1:length(split_argtypes)
+            arg_n = split_argtypes[i]::Vector{Any}
+            sig_n = argtypes_to_type(arg_n)
+            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
+            mt === nothing && return FailedMethodMatch("Could not identify method table for call")
+            mt = mt::Core.MethodTable
+            matches = findall(sig_n, method_table; limit = max_methods)
+            if matches === missing
+                return FailedMethodMatch("For one of the union split cases, too many methods matched")
+            end
+            push!(infos, MethodMatchInfo(matches))
+            for m in matches
+                push!(applicable, m)
+                push!(applicable_argtypes, arg_n)
+            end
+            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
+            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+            found = false
+            for (i, mt′) in enumerate(mts)
+                if mt′ === mt
+                    fullmatches[i] &= thisfullmatch
+                    found = true
+                    break
+                end
+            end
+            if !found
+                push!(mts, mt)
+                push!(fullmatches, thisfullmatch)
+            end
+        end
+        return UnionSplitMethodMatches(applicable,
+                                       applicable_argtypes,
+                                       UnionSplitInfo(infos),
+                                       valid_worlds,
+                                       mts,
+                                       fullmatches)
+    else
+        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+        if mt === nothing
+            return FailedMethodMatch("Could not identify method table for call")
+        end
+        mt = mt::Core.MethodTable
+        matches = findall(atype, method_table; limit = max_methods)
+        if matches === missing
+            # this means too many methods matched
+            # (assume this will always be true, so we don't compute / update valid age in this case)
+            return FailedMethodMatch("Too many methods matched")
+        end
+        fullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+        return MethodMatches(matches.matches,
+                             MethodMatchInfo(matches),
+                             matches.valid_worlds,
+                             mt,
+                             fullmatch)
+    end
+end
+
+"""
+    from_interprocedural!(rt, sv::InferenceState, arginfo::ArgInfo, maybecondinfo) -> newrt
+
+Converts inter-procedural return type `rt` into a local lattice element `newrt`,
+that is appropriate in the context of current local analysis frame `sv`, especially:
+- unwraps `rt::LimitedAccuracy` and collects its limitations into the current frame `sv`
+- converts boolean `rt` to new boolean `newrt` in a way `newrt` can propagate extra conditional
+  refinement information, e.g. translating `rt::InterConditional` into `newrt::Conditional`
+  that holds a type constraint information about a variable in `sv`
+
+This function _should_ be used wherever we propagate results returned from
+`abstract_call_method` or `abstract_call_method_with_const_args`.
+
+When `maybecondinfo !== nothing`, this function also tries extra conditional argument type refinement.
+In such cases `maybecondinfo` should be either of:
+- `maybecondinfo::Tuple{Vector{Any},Vector{Any}}`: precomputed argument type refinement information
+- method call signature tuple type
+When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
+`tmerge`ing argument signature type of each method call.
+"""
+function from_interprocedural!(@nospecialize(rt), sv::InferenceState, arginfo::ArgInfo, @nospecialize(maybecondinfo))
+    rt = collect_limitations!(rt, sv)
+    if is_lattice_bool(rt)
+        if maybecondinfo === nothing
+            rt = widenconditional(rt)
+        else
+            rt = from_interconditional(rt, sv, arginfo, maybecondinfo)
+        end
     end
+    @assert !(rt isa InterConditional) "invalid lattice element returned from inter-procedural context"
+    return rt
+end
+
+function collect_limitations!(@nospecialize(typ), sv::InferenceState)
+    if isa(typ, LimitedAccuracy)
+        union!(sv.pclimitations, typ.causes)
+        return typ.typ
+    end
+    return typ
+end
+
+function from_interconditional(@nospecialize(typ), sv::InferenceState, (; fargs, argtypes)::ArgInfo, @nospecialize(maybecondinfo))
+    fargs === nothing && return widenconditional(typ)
+    slot = 0
+    vtype = elsetype = Any
+    condval = maybe_extract_const_bool(typ)
+    for i in 1:length(fargs)
+        # find the first argument which supports refinement,
+        # and intersect all equivalent arguments with it
+        arg = ssa_def_slot(fargs[i], sv)
+        arg isa SlotNumber || continue # can't refine
+        old = argtypes[i]
+        old isa Type || continue # unlikely to refine
+        id = slot_id(arg)
+        if slot == 0 || id == slot
+            if isa(maybecondinfo, Tuple{Vector{Any},Vector{Any}})
+                # if we have already computed argument refinement information, apply that now to get the result
+                new_vtype = maybecondinfo[1][i]
+                new_elsetype = maybecondinfo[2][i]
+            else
+                # otherwise compute it on the fly
+                cnd = conditional_argtype(typ, maybecondinfo, argtypes, i)
+                new_vtype = cnd.vtype
+                new_elsetype = cnd.elsetype
+            end
+            if condval === false
+                vtype = Bottom
+            elseif new_vtype ⊑ vtype
+                vtype = new_vtype
+            else
+                vtype = tmeet(vtype, widenconst(new_vtype))
+            end
+            if condval === true
+                elsetype = Bottom
+            elseif new_elsetype ⊑ elsetype
+                elsetype = new_elsetype
+            else
+                elsetype = tmeet(elsetype, widenconst(new_elsetype))
+            end
+            if (slot > 0 || condval !== false) && vtype ⋤ old
+                slot = id
+            elseif (slot > 0 || condval !== true) && elsetype ⋤ old
+                slot = id
+            else # reset: no new useful information for this slot
+                vtype = elsetype = Any
+                if slot > 0
+                    slot = 0
+                end
+            end
+        end
+    end
+    if vtype === Bottom && elsetype === Bottom
+        return Bottom # accidentally proved this call to be dead / throw !
+    elseif slot > 0
+        return Conditional(SlotNumber(slot), vtype, elsetype) # record a Conditional improvement to this slot
+    end
+    return widenconditional(typ)
+end
+
+function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Vector{Any}, i::Int)
+    if isa(rt, InterConditional) && rt.slot == i
+        return rt
+    else
+        vtype = elsetype = tmeet(argtypes[i], fieldtype(sig, i))
+        condval = maybe_extract_const_bool(rt)
+        condval === true && (elsetype = Bottom)
+        condval === false && (vtype = Bottom)
+        return InterConditional(i, vtype, elsetype)
+    end
+end
+
+function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), edges::Vector{MethodInstance},
+                             matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
+                             sv::InferenceState)
+    # for `NativeInterpreter`, we don't add backedges when a new method couldn't refine (widen) this type
+    rettype === Any && return
     for edge in edges
         add_backedge!(edge, sv)
     end
-    for (thisfullmatch, mt) in zip(fullmatch, mts)
-        if !thisfullmatch
-            # also need an edge to the method table in case something gets
-            # added that did not intersect with any existing method
-            add_mt_backedge!(mt, atype, sv)
+    # also need an edge to the method table in case something gets
+    # added that did not intersect with any existing method
+    if isa(matches, MethodMatches)
+        matches.fullmatch || add_mt_backedge!(matches.mt, atype, sv)
+    else
+        for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
+            thisfullmatch || add_mt_backedge!(mt, atype, sv)
         end
     end
 end
@@ -492,36 +568,41 @@ struct MethodCallResult
 end
 
 function abstract_call_method_with_const_args(interp::AbstractInterpreter, result::MethodCallResult,
-                                              @nospecialize(f), argtypes::Vector{Any}, match::MethodMatch,
+                                              @nospecialize(f), arginfo::ArgInfo, match::MethodMatch,
                                               sv::InferenceState, va_override::Bool)
-    mi = maybe_get_const_prop_profitable(interp, result, f, argtypes, match, sv)
-    mi === nothing && return Any, nothing
+    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, match, sv)
+    mi === nothing && return nothing
     # try constant prop'
     inf_cache = get_inference_cache(interp)
-    inf_result = cache_lookup(mi, argtypes, inf_cache)
+    inf_result = cache_lookup(mi, arginfo.argtypes, inf_cache)
     if inf_result === nothing
         # if there might be a cycle, check to make sure we don't end up
         # calling ourselves here.
-        if result.edgecycle && _any(InfStackUnwind(sv)) do infstate
-                # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
-                # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
-                # propagate different constant elements if the recursion is finite over the lattice
-                return (result.edgelimited ? match.method === infstate.linfo.def : mi === infstate.linfo) &&
-                        any(infstate.result.overridden_by_const)
-            end
-            add_remark!(interp, sv, "[constprop] Edge cycle encountered")
-            return Any, nothing
-        end
-        inf_result = InferenceResult(mi, argtypes, va_override)
-        frame = InferenceState(inf_result, #=cache=#false, interp)
-        frame === nothing && return Any, nothing # this is probably a bad generated function (unsound), but just ignore it
+        let result = result # prevent capturing
+            if result.edgecycle && _any(InfStackUnwind(sv)) do infstate
+                    # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
+                    # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
+                    # propagate different constant elements if the recursion is finite over the lattice
+                    return (result.edgelimited ? match.method === infstate.linfo.def : mi === infstate.linfo) &&
+                            any(infstate.result.overridden_by_const)
+                end
+                add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+                return nothing
+            end
+        end
+        inf_result = InferenceResult(mi, (arginfo, sv), va_override)
+        if !any(inf_result.overridden_by_const)
+            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
+            return nothing
+        end
+        frame = InferenceState(inf_result, #=cache=#:local, interp)
+        frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
         frame.parent = sv
-        push!(inf_cache, inf_result)
-        typeinf(interp, frame) || return Any, nothing
+        typeinf(interp, frame) || return nothing
     end
     result = inf_result.result
     # if constant inference hits a cycle, just bail out
-    isa(result, InferenceState) && return Any, nothing
+    isa(result, InferenceState) && return nothing
     add_backedge!(mi, sv)
     return result, inf_result
 end
@@ -529,32 +610,39 @@ end
 # if there's a possibility we could get a better result (hopefully without doing too much work)
 # returns `MethodInstance` with constant arguments, returns nothing otherwise
 function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::MethodCallResult,
-                                         @nospecialize(f), argtypes::Vector{Any}, match::MethodMatch,
+                                         @nospecialize(f), arginfo::ArgInfo, match::MethodMatch,
                                          sv::InferenceState)
-    const_prop_entry_heuristic(interp, result, sv) || return nothing
+    if !InferenceParams(interp).ipo_constant_propagation
+        add_remark!(interp, sv, "[constprop] Disabled by parameter")
+        return nothing
+    end
     method = match.method
+    if method.constprop == 0x02
+        add_remark!(interp, sv, "[constprop] Disabled by method parameter")
+        return nothing
+    end
+    force = force_const_prop(interp, f, method)
+    force || const_prop_entry_heuristic(interp, result, sv) || return nothing
     nargs::Int = method.nargs
     method.isva && (nargs -= 1)
-    if length(argtypes) < nargs
+    length(arginfo.argtypes) < nargs && return nothing
+    if !const_prop_argument_heuristic(interp, arginfo, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
         return nothing
     end
-    const_prop_argument_heuristic(interp, argtypes) || const_prop_rettype_heuristic(interp, result.rt) || return nothing
-    allconst = is_allconst(argtypes)
-    force = force_const_prop(interp, f, method)
-    if !force
-        if !const_prop_function_heuristic(interp, f, argtypes, nargs, allconst)
-            add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
-            return nothing
-        end
+    all_overridden = is_all_overridden(arginfo, sv)
+    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
+        return nothing
     end
-    force |= allconst
-    mi = specialize_method(match, !force)
+    force |= all_overridden
+    mi = specialize_method(match; preexisting=!force)
     if mi === nothing
         add_remark!(interp, sv, "[constprop] Failed to specialize")
         return nothing
     end
     mi = mi::MethodInstance
-    if !force && !const_prop_methodinstance_heuristic(interp, method, mi)
+    if !force && !const_prop_methodinstance_heuristic(interp, match, mi, arginfo, sv)
         add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
         return nothing
     end
@@ -563,18 +651,45 @@ end
 
 function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, sv::InferenceState)
     if call_result_unused(sv) && result.edgecycle
-        add_remark!(interp, sv, "[constprop] Edgecycle with unused result")
+        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
+        return false
+    end
+    # check if this return type is improvable (i.e. whether it's possible that with more
+    # information, we might get a more precise type)
+    rt = result.rt
+    if isa(rt, Type)
+        # could always be improved to `Const`, `PartialStruct` or just a more precise type,
+        # unless we're already at `Bottom`
+        if rt === Bottom
+            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (erroneous result)")
+            return false
+        else
+            return true
+        end
+    elseif isa(rt, PartialStruct) || isa(rt, InterConditional)
+        # could be improved to `Const` or a more precise wrapper
+        return true
+    elseif isa(rt, LimitedAccuracy)
+        # optimizations like inlining are disabled for limited frames,
+        # thus there won't be much benefit in constant-prop' here
+        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (limited accuracy)")
+        return false
+    else
+        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (unimprovable return type)")
         return false
     end
-    return is_improvable(result.rt) && InferenceParams(interp).ipo_constant_propagation
 end
 
-# see if propagating constants may be worthwhile
-function const_prop_argument_heuristic(interp::AbstractInterpreter, argtypes::Vector{Any})
-    for a in argtypes
-        a = widenconditional(a)
-        if has_nontrivial_const_info(a) && is_const_prop_profitable_arg(a)
-            return true
+# determines heuristically whether if constant propagation can be worthwhile
+# by checking if any of given `argtypes` is "interesting" enough to be propagated
+function const_prop_argument_heuristic(_::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::InferenceState)
+    for i in 1:length(argtypes)
+        a = argtypes[i]
+        if isa(a, Conditional) && fargs !== nothing
+            is_const_prop_profitable_conditional(a, fargs, sv) && return true
+        else
+            a = widenconditional(a)
+            has_nontrivial_const_info(a) && is_const_prop_profitable_arg(a) && return true
         end
     end
     return false
@@ -595,28 +710,51 @@ function is_const_prop_profitable_arg(@nospecialize(arg))
     return isa(val, Symbol) || isa(val, Type) || (!isa(val, String) && !ismutable(val))
 end
 
-function const_prop_rettype_heuristic(interp::AbstractInterpreter, @nospecialize(rettype))
-    return improvable_via_constant_propagation(rettype)
+function is_const_prop_profitable_conditional(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slotid = find_constrained_arg(cnd, fargs, sv)
+    if slotid !== nothing
+        return true
+    end
+    # as a minor optimization, we just check the result is a constant or not,
+    # since both `has_nontrivial_const_info`/`is_const_prop_profitable_arg` return `true`
+    # for `Const(::Bool)`
+    return isa(widenconditional(cnd), Const)
+end
+
+function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slot = slot_id(cnd.var)
+    for i in 1:length(fargs)
+        arg = ssa_def_slot(fargs[i], sv)
+        if isa(arg, SlotNumber) && slot_id(arg) == slot
+            return i
+        end
+    end
+    return nothing
 end
 
-function is_allconst(argtypes::Vector{Any})
+# checks if all argtypes has additional information other than what `Type` can provide
+function is_all_overridden((; fargs, argtypes)::ArgInfo, sv::InferenceState)
     for a in argtypes
-        a = widenconditional(a)
-        if !isa(a, Const) && !isconstType(a) && !isa(a, PartialStruct) && !isa(a, PartialOpaque)
-            return false
+        if isa(a, Conditional) && fargs !== nothing
+            is_const_prop_profitable_conditional(a, fargs, sv) || return false
+        else
+            a = widenconditional(a)
+            is_forwardable_argtype(a) || return false
         end
     end
     return true
 end
 
 function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
-    return method.aggressive_constprop ||
+    return method.constprop == 0x01 ||
            InferenceParams(interp).aggressive_constant_propagation ||
            istopfunction(f, :getproperty) ||
            istopfunction(f, :setproperty!)
 end
 
-function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any}, nargs::Int, allconst::Bool)
+function const_prop_function_heuristic(
+    _::AbstractInterpreter, @nospecialize(f), (; argtypes)::ArgInfo,
+    nargs::Int, all_overridden::Bool, _::InferenceState)
     if nargs > 1
         if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
             arrty = argtypes[2]
@@ -633,22 +771,22 @@ function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecializ
             end
         end
     end
-    if !allconst && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
-                     istopfunction(f, :(==)) || istopfunction(f, :!=) ||
-                     istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
-                     istopfunction(f, :<<) || istopfunction(f, :>>))
+    if !all_overridden && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
+                           istopfunction(f, :(==)) || istopfunction(f, :!=) ||
+                           istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
+                           istopfunction(f, :<<) || istopfunction(f, :>>))
         # it is almost useless to inline the op when all the same type,
         # but highly worthwhile to inline promote of a constant
         length(argtypes) > 2 || return false
         t1 = widenconst(argtypes[2])
-        all_same = true
         for i in 3:length(argtypes)
-            if widenconst(argtypes[i]) !== t1
-                all_same = false
-                break
+            at = argtypes[i]
+            ty = isvarargtype(at) ? unwraptv(at) : widenconst(at)
+            if ty !== t1
+                return true
             end
         end
-        return !all_same
+        return false
     end
     return true
 end
@@ -656,7 +794,10 @@ end
 # This is a heuristic to avoid trying to const prop through complicated functions
 # where we would spend a lot of time, but are probably unlikely to get an improved
 # result anyway.
-function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance)
+function const_prop_methodinstance_heuristic(
+    interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance,
+    (; argtypes)::ArgInfo, sv::InferenceState)
+    method = match.method
     if method.is_for_opaque_closure
         # Not inlining an opaque closure can be very expensive, so be generous
         # with the const-prop-ability. It is quite possible that we can't infer
@@ -668,19 +809,27 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method
     # was able to cut it down to something simple (inlineable in particular).
     # If so, there's a good chance we might be able to const prop all the way
     # through and learn something new.
-    code = get(code_cache(interp), mi, nothing)
-    declared_inline = isdefined(method, :source) && ccall(:jl_ir_flag_inlineable, Bool, (Any,), method.source)
-    cache_inlineable = declared_inline
-    if isdefined(code, :inferred) && !cache_inlineable
-        cache_inf = code.inferred
-        if !(cache_inf === nothing)
-            cache_inlineable = inlining_policy(interp)(cache_inf) !== nothing
+    if isdefined(method, :source) && ccall(:jl_ir_flag_inlineable, Bool, (Any,), method.source)
+        return true
+    else
+        flag = get_curr_ssaflag(sv)
+        if is_stmt_inline(flag)
+            # force constant propagation for a call that is going to be inlined
+            # since the inliner will try to find this constant result
+            # if these constant arguments arrive there
+            return true
+        elseif is_stmt_noinline(flag)
+            # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
+            return false
+        else
+            code = get(code_cache(interp), mi, nothing)
+            if isdefined(code, :inferred) && inlining_policy(
+                    interp, code.inferred, IR_FLAG_NULL, mi, argtypes) !== nothing
+                return true
+            end
         end
     end
-    if !cache_inlineable
-        return false
-    end
-    return true
+    return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful
 end
 
 # This is only for use with `Conditional`.
@@ -727,34 +876,32 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
     if isa(tti, DataType) && tti.name === NamedTuple_typename
         # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
         # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
-        tti = tti.parameters[2]
-        while isa(tti, TypeVar)
-            tti = tti.ub
-        end
+        tti = unwraptv(tti.parameters[2])
         tti0 = rewrap_unionall(tti, tti0)
     end
     if isa(tti, Union)
         utis = uniontypes(tti)
-        if _any(t -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
+        if _any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
             return Any[Vararg{Any}], nothing
         end
-        result = Any[rewrap_unionall(p, tti0) for p in (utis[1]::DataType).parameters]
-        for t::DataType in utis[2:end]
-            if length(t.parameters) != length(result)
+        ltp = length((utis[1]::DataType).parameters)
+        for t in utis
+            if length((t::DataType).parameters) != ltp
                 return Any[Vararg{Any}], nothing
             end
-            for j in 1:length(t.parameters)
-                result[j] = tmerge(result[j], rewrap_unionall(t.parameters[j], tti0))
+        end
+        result = Any[ Union{} for _ in 1:ltp ]
+        for t in utis
+            tps = (t::DataType).parameters
+            _all(valid_as_lattice, tps) || continue
+            for j in 1:ltp
+                result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
             end
         end
         return result, nothing
     elseif tti0 <: Tuple
         if isa(tti0, DataType)
-            if isvatuple(tti0) && length(tti0.parameters) == 1
-                return Any[Vararg{unwrapva(tti0.parameters[1])}], nothing
-            else
-                return Any[ p for p in tti0.parameters ], nothing
-            end
+            return Any[ p for p in tti0.parameters ], nothing
         elseif !isa(tti, DataType)
             return Any[Vararg{Any}], nothing
         else
@@ -784,7 +931,7 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
         return Any[Vararg{Any}], nothing
     end
     @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, nothing, Any[itft, itertype], sv)
+    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), sv)
     stateordonet = call.rt
     info = call.info
     # Return Bottom if this is not an iterator.
@@ -795,11 +942,11 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
     valtype = statetype = Bottom
     ret = Any[]
     calls = CallMeta[call]
+    stateordonet_widened = widenconst(stateordonet)
 
     # Try to unroll the iteration up to MAX_TUPLE_SPLAT, which covers any finite
     # length iterators, or interesting prefix
     while true
-        stateordonet_widened = widenconst(stateordonet)
         if stateordonet_widened === Nothing
             return ret, AbstractIterationInfo(calls)
         end
@@ -818,33 +965,47 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
         valtype = getfield_tfunc(stateordonet, Const(1))
         push!(ret, valtype)
         statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv)
+        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), sv)
         stateordonet = call.rt
+        stateordonet_widened = widenconst(stateordonet)
         push!(calls, call)
     end
     # From here on, we start asking for results on the widened types, rather than
     # the precise (potentially const) state type
-    statetype = widenconst(statetype)
-    valtype = widenconst(valtype)
+    # statetype and valtype are reinitialized in the first iteration below from the
+    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+    statetype = Bottom
+    valtype = Bottom
+    may_have_terminated = Nothing <: stateordonet_widened
     while valtype !== Any
-        stateordonet = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv).rt
-        stateordonet = widenconst(stateordonet)
-        nounion = typesubtract(stateordonet, Nothing, 0)
-        if !isa(nounion, DataType) || !(nounion <: Tuple) || isvatuple(nounion) || length(nounion.parameters) != 2
+        nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
+        if nounion !== Union{} && !isa(nounion, DataType)
+            # nounion is of a type we cannot handle
             valtype = Any
             break
         end
-        if nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype
-            if typeintersect(stateordonet, Nothing) === Union{}
-                # Reached a fixpoint, but Nothing is not possible => iterator is infinite or failing
-                return Any[Bottom], nothing
+        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+            # reached a fixpoint or iterator failed/gave invalid answer
+            if !hasintersect(stateordonet_widened, Nothing)
+                # ... but cannot terminate
+                if !may_have_terminated
+                    #  ... and cannot have terminated prior to this loop
+                    return Any[Bottom], nothing
+                else
+                    # iterator may have terminated prior to this loop, but not during it
+                    valtype = Bottom
+                end
             end
             break
         end
         valtype = tmerge(valtype, nounion.parameters[1])
         statetype = tmerge(statetype, nounion.parameters[2])
+        stateordonet = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), sv).rt
+        stateordonet_widened = widenconst(stateordonet)
+    end
+    if valtype !== Union{}
+        push!(ret, Vararg{valtype})
     end
-    push!(ret, Vararg{valtype})
     return ret, nothing
 end
 
@@ -868,19 +1029,19 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::
     nargs = length(aargtypes)
     splitunions = 1 < unionsplitcost(aargtypes) <= InferenceParams(interp).MAX_APPLY_UNION_ENUM
     ctypes = [Any[aft]]
-    infos = [Union{Nothing, AbstractIterationInfo}[]]
+    infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
     for i = 1:nargs
         ctypes´ = Vector{Any}[]
-        infos′ = Vector{Union{Nothing, AbstractIterationInfo}}[]
+        infos′ = Vector{MaybeAbstractIterationInfo}[]
         for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
             if !isvarargtype(ti)
                 cti_info = precise_container_type(interp, itft, ti, sv)
                 cti = cti_info[1]::Vector{Any}
-                info = cti_info[2]::Union{Nothing,AbstractIterationInfo}
+                info = cti_info[2]::MaybeAbstractIterationInfo
             else
                 cti_info = precise_container_type(interp, itft, unwrapva(ti), sv)
                 cti = cti_info[1]::Vector{Any}
-                info = cti_info[2]::Union{Nothing,AbstractIterationInfo}
+                info = cti_info[2]::MaybeAbstractIterationInfo
                 # We can't represent a repeating sequence of the same types,
                 # so tmerge everything together to get one type that represents
                 # everything.
@@ -928,7 +1089,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::
                 break
             end
         end
-        call = abstract_call(interp, nothing, ct, sv, max_methods)
+        call = abstract_call(interp, ArgInfo(nothing, ct), sv, max_methods)
         push!(retinfos, ApplyCallInfo(call.info, arginfo))
         res = tmerge(res, call.rt)
         if bail_out_apply(interp, res, sv)
@@ -947,7 +1108,7 @@ end
 function is_method_pure(method::Method, @nospecialize(sig), sparams::SimpleVector)
     if isdefined(method, :generator)
         method.generator.expand_early || return false
-        mi = specialize_method(method, sig, sparams, false)
+        mi = specialize_method(method, sig, sparams)
         isa(mi, MethodInstance) || return false
         staged = get_staged(mi)
         (staged isa CodeInfo && (staged::CodeInfo).pure) || return false
@@ -992,11 +1153,11 @@ function argtype_tail(argtypes::Vector{Any}, i::Int)
     return argtypes[i:n]
 end
 
-function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::Union{Nothing,Vector{Any}},
-        argtypes::Vector{Any}, sv::InferenceState, max_methods::Int)
+function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
+                               sv::InferenceState, max_methods::Int)
     @nospecialize f
     la = length(argtypes)
-    if f === ifelse && fargs isa Vector{Any} && la == 4
+    if f === Core.ifelse && fargs isa Vector{Any} && la == 4
         cnd = argtypes[2]
         if isa(cnd, Conditional)
             newcnd = widenconditional(cnd)
@@ -1020,16 +1181,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
         end
     end
     rt = builtin_tfunction(interp, f, argtypes[2:end], sv)
-    if f === getfield && isa(fargs, Vector{Any}) && la == 3 &&
-       (a3 = argtypes[3]; isa(a3, Const)) && (idx = a3.val; isa(idx, Int)) &&
-       (a2 = argtypes[2]; a2 ⊑ Tuple)
-        # TODO: why doesn't this use the getfield_tfunc?
-        cti_info = precise_container_type(interp, iterate, a2, sv)
-        cti = cti_info[1]::Vector{Any}
-        if 1 <= idx <= length(cti)
-            rt = unwrapva(cti[idx])
-        end
-    elseif (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
+    if (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
         # perform very limited back-propagation of type information for `is` and `isa`
         if f === isa
             a = ssa_def_slot(fargs[2], sv)
@@ -1045,6 +1197,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
                     tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
                     if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
                         ifty = typeintersect(aty, tty_ub)
+                        valid_as_lattice(ifty) || (ifty = Union{})
                         elty = typesubtract(aty, tty_lb, InferenceParams(interp).MAX_UNION_SPLITTING)
                         return Conditional(a, ifty, elty)
                     end
@@ -1095,9 +1248,32 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
                 end
                 return Conditional(aty.var, ifty, elty)
             end
+        elseif f === isdefined
+            uty = argtypes[2]
+            a = ssa_def_slot(fargs[2], sv)
+            if isa(uty, Union) && isa(a, SlotNumber)
+                fld = argtypes[3]
+                vtype = Union{}
+                elsetype = Union{}
+                for ty in uniontypes(uty)
+                    cnd = isdefined_tfunc(ty, fld)
+                    if isa(cnd, Const)
+                        if cnd.val::Bool
+                            vtype = tmerge(vtype, ty)
+                        else
+                            elsetype = tmerge(elsetype, ty)
+                        end
+                    else
+                        vtype = tmerge(vtype, ty)
+                        elsetype = tmerge(elsetype, ty)
+                    end
+                end
+                return Conditional(a, vtype, elsetype)
+            end
         end
     end
-    return isa(rt, TypeVar) ? rt.ub : rt
+    @assert !isa(rt, TypeVar) "unhandled TypeVar"
+    return rt
 end
 
 function abstract_call_unionall(argtypes::Vector{Any})
@@ -1134,7 +1310,7 @@ function abstract_call_unionall(argtypes::Vector{Any})
     return Any
 end
 
-function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::InferenceState)
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
     ft === Bottom && return CallMeta(Bottom, false)
@@ -1146,7 +1322,8 @@ function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv:
     nargtype === Bottom && return CallMeta(Bottom, false)
     nargtype isa DataType || return CallMeta(Any, false) # other cases are not implemented below
     isdispatchelem(ft) || return CallMeta(Any, false) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
-    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)
+    ft = ft::DataType
+    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
     result = findsup(types, method_table(interp))
@@ -1157,45 +1334,60 @@ function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv:
     (; rt, edge) = result = abstract_call_method(interp, method, ti, env, false, sv)
     edge !== nothing && add_backedge!(edge::MethodInstance, sv)
     match = MethodMatch(ti, env, method, argtype <: method.sig)
-    # try constant propagation with manual inlinings of some of the heuristics
-    # since some checks within `abstract_call_method_with_const_args` seem a bit costly
-    const_prop_entry_heuristic(interp, result, sv) || return CallMeta(rt, InvokeCallInfo(match, nothing))
-    argtypes′ = argtypes[4:end]
-    const_prop_argument_heuristic(interp, argtypes′) || const_prop_rettype_heuristic(interp, rt) || return CallMeta(rt, InvokeCallInfo(match, nothing))
-    pushfirst!(argtypes′, ft)
+    res = nothing
+    sig = match.spec_types
+    argtypes′ = invoke_rewrite(argtypes)
+    fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
+    arginfo = ArgInfo(fargs′, argtypes′)
     # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
     # for i in 1:length(argtypes′)
     #     t, a = ti.parameters[i], argtypes′[i]
     #     argtypes′[i] = t ⊑ a ? t : a
     # end
-    const_rt, const_result = abstract_call_method_with_const_args(interp, result, argtype_to_function(ft′), argtypes′, match, sv, false)
-    if const_rt !== rt && const_rt ⊑ rt
-        return CallMeta(const_rt, InvokeCallInfo(match, const_result))
-    else
-        return CallMeta(rt, InvokeCallInfo(match, nothing))
+    const_result = abstract_call_method_with_const_args(interp, result, singleton_type(ft′), arginfo, match, sv, false)
+    if const_result !== nothing
+        const_rt, const_result = const_result
+        if const_rt !== rt && const_rt ⊑ rt
+            rt, res = const_rt, const_result
+        end
     end
+    return CallMeta(from_interprocedural!(rt, sv, arginfo, sig), InvokeCallInfo(match, res))
+end
+
+function invoke_rewrite(xs::Vector{Any})
+    x0 = xs[2]
+    newxs = xs[3:end]
+    newxs[1] = x0
+    return newxs
 end
 
 # call where the function is known exactly
 function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
-        fargs::Union{Nothing,Vector{Any}}, argtypes::Vector{Any},
-        sv::InferenceState,
+        arginfo::ArgInfo, sv::InferenceState,
         max_methods::Int = InferenceParams(interp).MAX_METHODS)
-
+    (; fargs, argtypes) = arginfo
     la = length(argtypes)
 
     if isa(f, Builtin)
         if f === _apply_iterate
             return abstract_apply(interp, argtypes, sv, max_methods)
         elseif f === invoke
-            return abstract_invoke(interp, argtypes, sv)
+            return abstract_invoke(interp, arginfo, sv)
+        elseif f === modifyfield!
+            return abstract_modifyfield!(interp, argtypes, sv)
         end
-        return CallMeta(abstract_call_builtin(interp, f, fargs, argtypes, sv, max_methods), false)
+        return CallMeta(abstract_call_builtin(interp, f, arginfo, sv, max_methods), false)
+    elseif isa(f, Core.OpaqueClosure)
+        # calling an OpaqueClosure about which we have no information returns no information
+        return CallMeta(Any, false)
     elseif f === Core.kwfunc
         if la == 2
-            ft = widenconst(argtypes[2])
-            if isa(ft, DataType) && isdefined(ft.name, :mt) && isdefined(ft.name.mt, :kwsorter)
-                return CallMeta(Const(ft.name.mt.kwsorter), MethodResultPure())
+            aty = argtypes[2]
+            if !isvarargtype(aty)
+                ft = widenconst(aty)
+                if isa(ft, DataType) && isdefined(ft.name, :mt) && isdefined(ft.name.mt, :kwsorter)
+                    return CallMeta(Const(ft.name.mt.kwsorter), MethodResultPure())
+                end
             end
         end
         return CallMeta(Any, false)
@@ -1215,20 +1407,24 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         return CallMeta(typevar_tfunc(n, lb_var, ub_var), false)
     elseif f === UnionAll
         return CallMeta(abstract_call_unionall(argtypes), false)
-    elseif f === Tuple && la == 2 && !isconcretetype(widenconst(argtypes[2]))
-        return CallMeta(Tuple, false)
+    elseif f === Tuple && la == 2
+        aty = argtypes[2]
+        ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
+        if !isconcretetype(ty)
+            return CallMeta(Tuple, false)
+        end
     elseif is_return_type(f)
         return return_type_tfunc(interp, argtypes, sv)
     elseif la == 2 && istopfunction(f, :!)
         # handle Conditional propagation through !Bool
         aty = argtypes[2]
         if isa(aty, Conditional)
-            call = abstract_call_gf_by_type(interp, f, fargs, Any[Const(f), Bool], Tuple{typeof(f), Bool}, sv) # make sure we've inferred `!(::Bool)`
+            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), Tuple{typeof(f), Bool}, sv) # make sure we've inferred `!(::Bool)`
             return CallMeta(Conditional(aty.var, aty.elsetype, aty.vtype), call.info)
         end
     elseif la == 3 && istopfunction(f, :!==)
         # mark !== as exactly a negated call to ===
-        rty = abstract_call_known(interp, (===), fargs, argtypes, sv).rt
+        rty = abstract_call_known(interp, (===), arginfo, sv).rt
         if isa(rty, Conditional)
             return CallMeta(Conditional(rty.var, rty.elsetype, rty.vtype), false) # swap if-else
         elseif isa(rty, Const)
@@ -1244,7 +1440,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             fargs = nothing
         end
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
-        return CallMeta(abstract_call_known(interp, <:, fargs, argtypes, sv).rt, false)
+        return CallMeta(abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), sv).rt, false)
     elseif la == 2 &&
            (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
            istopfunction(f, :length)
@@ -1267,35 +1463,35 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         return CallMeta(val === false ? Type : val, MethodResultPure())
     end
     atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, fargs, argtypes, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, f, arginfo, atype, sv, max_methods)
 end
 
-function abstract_call_opaque_closure(interp::AbstractInterpreter, closure::PartialOpaque, argtypes::Vector{Any}, sv::InferenceState)
-    pushfirst!(argtypes, closure.env)
-    sig = argtypes_to_type(argtypes)
-    (; rt, edge) = result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, sv)
+function abstract_call_opaque_closure(interp::AbstractInterpreter, closure::PartialOpaque, arginfo::ArgInfo, sv::InferenceState)
+    sig = argtypes_to_type(arginfo.argtypes)
+    (; rt, edge) = result = abstract_call_method(interp, closure.source, sig, Core.svec(), false, sv)
     edge !== nothing && add_backedge!(edge, sv)
     tt = closure.typ
-    sigT = unwrap_unionall(tt).parameters[1]
-    match = MethodMatch(sig, Core.svec(), closure.source::Method, sig <: rewrap_unionall(sigT, tt))
-    info = OpaqueClosureCallInfo(match)
+    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
+    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
+    res = nothing
     if !result.edgecycle
-        const_rettype, const_result = abstract_call_method_with_const_args(interp, result, closure, argtypes,
-            match, sv, closure.isva)
-        if const_rettype ⊑ rt
-           rt = const_rettype
-        end
+        const_result = abstract_call_method_with_const_args(interp, result, closure,
+            arginfo, match, sv, closure.isva)
         if const_result !== nothing
-            info = ConstCallInfo(info, Union{Nothing,InferenceResult}[const_result])
+            const_rettype, const_result = const_result
+            if const_rettype ⊑ rt
+               rt, res = const_rettype, const_result
+            end
         end
     end
-    return CallMeta(rt, info)
+    info = OpaqueClosureCallInfo(match, res)
+    return CallMeta(from_interprocedural!(rt, sv, arginfo, match.spec_types), info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
     ret = Any[]
     cc = widenconst(closure)
-    argt = unwrap_unionall(cc).parameters[1]
+    argt = (unwrap_unionall(cc)::DataType).parameters[1]
     if !isa(argt, DataType) || argt.name !== typename(Tuple)
         argt = Tuple
     end
@@ -1303,37 +1499,27 @@ function most_general_argtypes(closure::PartialOpaque)
 end
 
 # call where the function is any lattice element
-function abstract_call(interp::AbstractInterpreter, fargs::Union{Nothing,Vector{Any}}, argtypes::Vector{Any},
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo,
                        sv::InferenceState, max_methods::Int = InferenceParams(interp).MAX_METHODS)
-    #print("call ", e.args[1], argtypes, "\n\n")
+    argtypes = arginfo.argtypes
     ft = argtypes[1]
-    f = argtype_to_function(ft)
+    f = singleton_type(ft)
     if isa(ft, PartialOpaque)
-        return abstract_call_opaque_closure(interp, ft, argtypes[2:end], sv)
-    elseif isa(unwrap_unionall(ft), DataType) && unwrap_unionall(ft).name === typename(Core.OpaqueClosure)
-        return CallMeta(rewrap_unionall(unwrap_unionall(ft).parameters[2], ft), false)
+        newargtypes = copy(argtypes)
+        newargtypes[1] = ft.env
+        return abstract_call_opaque_closure(interp, ft, ArgInfo(arginfo.fargs, newargtypes), sv)
+    elseif (uft = unwrap_unionall(widenconst(ft)); isa(uft, DataType) && uft.name === typename(Core.OpaqueClosure))
+        return CallMeta(rewrap_unionall((uft::DataType).parameters[2], widenconst(ft)), false)
     elseif f === nothing
         # non-constant function, but the number of arguments is known
         # and the ft is not a Builtin or IntrinsicFunction
-        if typeintersect(widenconst(ft), Union{Builtin, Core.OpaqueClosure}) != Union{}
+        if hasintersect(widenconst(ft), Union{Builtin, Core.OpaqueClosure})
             add_remark!(interp, sv, "Could not identify method table for call")
             return CallMeta(Any, false)
         end
-        return abstract_call_gf_by_type(interp, nothing, fargs, argtypes, argtypes_to_type(argtypes), sv, max_methods)
-    end
-    return abstract_call_known(interp, f, fargs, argtypes, sv, max_methods)
-end
-
-function argtype_to_function(@nospecialize(ft))
-    if isa(ft, Const)
-        return ft.val
-    elseif isconstType(ft)
-        return ft.parameters[1]
-    elseif isa(ft, DataType) && isdefined(ft, :instance)
-        return ft.instance
-    else
-        return nothing
+        return abstract_call_gf_by_type(interp, nothing, arginfo, argtypes_to_type(argtypes), sv, max_methods)
     end
+    return abstract_call_known(interp, f, arginfo, sv, max_methods)
 end
 
 function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
@@ -1355,7 +1541,7 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
         spsig = linfo.def.sig
         if isa(spsig, UnionAll)
             if !isempty(linfo.sparam_vals)
-                sparam_vals = Any[isa(v, Core.TypeofVararg) ? TypeVar(:N, Union{}, Any) :
+                sparam_vals = Any[isvarargtype(v) ? TypeVar(:N, Union{}, Any) :
                                   v for v in  linfo.sparam_vals]
                 T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, sparam_vals)
                 isref && isreturn && T === Any && return Bottom # catch invalid return Ref{T} where T = Any
@@ -1369,10 +1555,7 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
             end
         end
     end
-    while isa(T, TypeVar)
-        T = T.ub
-    end
-    return T
+    return unwraptv(T)
 end
 
 function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::VarTable, sv::InferenceState)
@@ -1383,7 +1566,7 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::V
     # this may be the wrong world for the call,
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
-    abstract_call(interp, nothing, at, sv)
+    abstract_call(interp, ArgInfo(nothing, at), sv)
     nothing
 end
 
@@ -1408,7 +1591,7 @@ function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(
     elseif isa(e, SSAValue)
         return abstract_eval_ssavalue(e::SSAValue, sv.src)
     elseif isa(e, SlotNumber) || isa(e, Argument)
-        return (vtypes[slot_id(e)]::VarState).typ
+        return vtypes[slot_id(e)].typ
     elseif isa(e, GlobalRef)
         return abstract_eval_global(e.mod, e.name)
     end
@@ -1421,11 +1604,7 @@ function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtyp
         return abstract_eval_value_expr(interp, e, vtypes, sv)
     else
         typ = abstract_eval_special_value(interp, e, vtypes, sv)
-        if typ isa LimitedAccuracy
-            union!(sv.pclimitations, typ.causes)
-            typ = typ.typ
-        end
-        return typ
+        return collect_limitations!(typ, sv)
     end
 end
 
@@ -1454,67 +1633,67 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
         return abstract_eval_special_value(interp, e, vtypes, sv)
     end
     e = e::Expr
-    if e.head === :call
+    ehead = e.head
+    if ehead === :call
         ea = e.args
         argtypes = collect_argtypes(interp, ea, vtypes, sv)
         if argtypes === nothing
             t = Bottom
         else
-            callinfo = abstract_call(interp, ea, argtypes, sv)
+            callinfo = abstract_call(interp, ArgInfo(ea, argtypes), sv)
             sv.stmt_info[sv.currpc] = callinfo.info
             t = callinfo.rt
         end
-    elseif e.head === :new
+    elseif ehead === :new
         t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
         if isconcretetype(t) && !ismutabletype(t)
-            args = Vector{Any}(undef, length(e.args)-1)
-            ats = Vector{Any}(undef, length(e.args)-1)
-            anyconst = false
-            allconst = true
+            nargs = length(e.args) - 1
+            ats = Vector{Any}(undef, nargs)
+            local anyrefine = false
+            local allconst = true
             for i = 2:length(e.args)
-                at = abstract_eval_value(interp, e.args[i], vtypes, sv)
-                if !anyconst
-                    anyconst = has_nontrivial_const_info(at)
-                end
-                ats[i-1] = at
+                at = widenconditional(abstract_eval_value(interp, e.args[i], vtypes, sv))
+                ft = fieldtype(t, i-1)
+                at = tmeet(at, ft)
                 if at === Bottom
                     t = Bottom
-                    allconst = anyconst = false
-                    break
-                elseif at isa Const
-                    if !(at.val isa fieldtype(t, i - 1))
-                        t = Bottom
-                        allconst = anyconst = false
-                        break
-                    end
-                    args[i-1] = at.val
-                else
+                    @goto t_computed
+                elseif !isa(at, Const)
                     allconst = false
                 end
+                if !anyrefine
+                    anyrefine = has_nontrivial_const_info(at) || # constant information
+                                at ⋤ ft                          # just a type-level information, but more precise than the declared type
+                end
+                ats[i-1] = at
             end
             # For now, don't allow partially initialized Const/PartialStruct
-            if t !== Bottom && fieldcount(t) == length(ats)
+            if fieldcount(t) == nargs
                 if allconst
-                    t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, args, length(args)))
-                elseif anyconst
+                    argvals = Vector{Any}(undef, nargs)
+                    for j in 1:nargs
+                        argvals[j] = (ats[j]::Const).val
+                    end
+                    t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, argvals, nargs))
+                elseif anyrefine
                     t = PartialStruct(t, ats)
                 end
             end
         end
-    elseif e.head === :splatnew
+    elseif ehead === :splatnew
         t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
         if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
             n = fieldcount(t)
-            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val) &&
-                let t = t; _all(i->getfield(at.val, i) isa fieldtype(t, i), 1:n); end
+            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+                let t = t, at = at; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
                 t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields) &&
-                let t = t, at = at; _all(i->at.fields[i] ⊑ fieldtype(t, i), 1:n); end
-                t = PartialStruct(t, at.fields)
+            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields::Vector{Any}) &&
+                let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ fieldtype(t, i), 1:n); end
+                t = PartialStruct(t, at.fields::Vector{Any})
             end
         end
-    elseif e.head === :new_opaque_closure
+    elseif ehead === :new_opaque_closure
         t = Union{}
         if length(e.args) >= 5
             ea = e.args
@@ -1527,13 +1706,15 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 if isa(t, PartialOpaque)
                     # Infer this now so that the specialization is available to
                     # optimization.
+                    argtypes = most_general_argtypes(t)
+                    pushfirst!(argtypes, t.env)
                     callinfo = abstract_call_opaque_closure(interp, t,
-                        most_general_argtypes(t), sv)
+                        ArgInfo(nothing, argtypes), sv)
                     sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
                 end
             end
         end
-    elseif e.head === :foreigncall
+    elseif ehead === :foreigncall
         abstract_eval_value(interp, e.args[1], vtypes, sv)
         t = sp_type_rewrap(e.args[2], sv.linfo, true)
         for i = 3:length(e.args)
@@ -1541,21 +1722,21 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Bottom
             end
         end
-    elseif e.head === :cfunction
+    elseif ehead === :cfunction
         t = e.args[1]
         isa(t, Type) || (t = Any)
         abstract_eval_cfunction(interp, e, vtypes, sv)
-    elseif e.head === :method
+    elseif ehead === :method
         t = (length(e.args) == 1) ? Any : Nothing
-    elseif e.head === :copyast
+    elseif ehead === :copyast
         t = abstract_eval_value(interp, e.args[1], vtypes, sv)
         if t isa Const && t.val isa Expr
             # `copyast` makes copies of Exprs
             t = Expr
         end
-    elseif e.head === :invoke
+    elseif ehead === :invoke || ehead === :invoke_modify
         error("type inference data-flow error: tried to double infer a function")
-    elseif e.head === :isdefined
+    elseif ehead === :isdefined
         sym = e.args[1]
         t = Bool
         if isa(sym, SlotNumber)
@@ -1574,7 +1755,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Const(true)
             end
         elseif isa(sym, Expr) && sym.head === :static_parameter
-            n = sym.args[1]
+            n = sym.args[1]::Int
             if 1 <= n <= length(sv.sptypes)
                 spty = sv.sptypes[n]
                 if isa(spty, Const)
@@ -1585,7 +1766,8 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
     else
         t = abstract_eval_value_expr(interp, e, vtypes, sv)
     end
-    @assert !isa(t, TypeVar)
+    @label t_computed
+    @assert !isa(t, TypeVar) "unhandled TypeVar"
     if isa(t, DataType) && isdefined(t, :instance)
         # replace singleton types with their equivalent Const object
         t = Const(t.instance)
@@ -1609,7 +1791,7 @@ function abstract_eval_global(M::Module, s::Symbol)
 end
 
 function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
-    typ = src.ssavaluetypes[s.id]
+    typ = (src.ssavaluetypes::Vector{Any})[s.id]
     if typ === NOT_FOUND
         return Bottom
     end
@@ -1625,7 +1807,7 @@ function widenreturn(@nospecialize(rt), @nospecialize(bestguess), nslots::Int, s
         if isa(rt, Conditional)
             id = slot_id(rt.var)
             if 1 ≤ id ≤ nslots
-                old_id_type = widenconditional(slottypes[id]) # same as `((s[1]::VarTable)[id]::VarState).typ`
+                old_id_type = widenconditional(slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
                 if (!(rt.vtype ⊑ old_id_type) || old_id_type ⊑ rt.vtype) &&
                    (!(rt.elsetype ⊑ old_id_type) || old_id_type ⊑ rt.elsetype)
                    # discard this `Conditional` since it imposes
@@ -1668,16 +1850,18 @@ function widenreturn(@nospecialize(rt), @nospecialize(bestguess), nslots::Int, s
     isa(rt, Type) && return rt
     if isa(rt, PartialStruct)
         fields = copy(rt.fields)
-        haveconst = false
+        local anyrefine = false
         for i in 1:length(fields)
-            a = widenreturn(fields[i], bestguess, nslots, slottypes, changes)
-            if !haveconst && has_const_info(a)
+            a = fields[i]
+            a = isvarargtype(a) ? a : widenreturn(a, bestguess, nslots, slottypes, changes)
+            if !anyrefine
                 # TODO: consider adding && const_prop_profitable(a) here?
-                haveconst = true
+                anyrefine = has_const_info(a) ||
+                            a ⊏ fieldtype(rt.typ, i)
             end
             fields[i] = a
         end
-        haveconst && return PartialStruct(rt.typ, fields)
+        anyrefine && return PartialStruct(rt.typ, fields)
     end
     if isa(rt, PartialOpaque)
         return rt # XXX: this case was missed in #39512
@@ -1697,20 +1881,19 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
     isva = isa(def, Method) && def.isva
     nslots = nargs - isva
     slottypes = frame.slottypes
+    ssavaluetypes = frame.src.ssavaluetypes::Vector{Any}
     while frame.pc´´ <= n
         # make progress on the active ip set
-        local pc::Int = frame.pc´´ # current program-counter
+        local pc::Int = frame.pc´´
         while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
             #print(pc,": ",s[pc],"\n")
             local pc´::Int = pc + 1 # next program-counter (after executing instruction)
             if pc == frame.pc´´
-                # need to update pc´´ to point at the new lowest instruction in W
-                min_pc = _bits_findnext(W.bits, pc + 1)
-                frame.pc´´ = min_pc == -1 ? n + 1 : min_pc
+                # want to update pc´´ to point at the new lowest instruction in W
+                frame.pc´´ = pc´
             end
             delete!(W, pc)
             frame.currpc = pc
-            frame.cur_hand = frame.handler_at[pc]
             edges = frame.stmt_edges[pc]
             edges === nothing || empty!(edges)
             frame.stmt_info[pc] = nothing
@@ -1752,7 +1935,6 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     pc´ = l
                 else
                     # general case
-                    frame.handler_at[l] = frame.cur_hand
                     changes_else = changes
                     if isa(condt, Conditional)
                         changes_else = conditional_changes(changes_else, condt.elsetype, condt.var)
@@ -1800,7 +1982,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     for (caller, caller_pc) in frame.cycle_backedges
                         # notify backedges of updated type information
                         typeassert(caller.stmt_types[caller_pc], VarTable) # we must have visited this statement before
-                        if !(caller.src.ssavaluetypes[caller_pc] === Any)
+                        if !((caller.src.ssavaluetypes::Vector{Any})[caller_pc] === Any)
                             # no reason to revisit if that call-site doesn't affect the final result
                             if caller_pc < caller.pc´´
                                 caller.pc´´ = caller_pc
@@ -1810,8 +1992,8 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     end
                 end
             elseif hd === :enter
+                stmt = stmt::Expr
                 l = stmt.args[1]::Int
-                frame.cur_hand = Pair{Any,Any}(l, frame.cur_hand)
                 # propagate type info to exception handler
                 old = states[l]
                 newstate_catch = stupdate!(old, changes)
@@ -1823,28 +2005,28 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     states[l] = newstate_catch
                 end
                 typeassert(states[l], VarTable)
-                frame.handler_at[l] = frame.cur_hand
             elseif hd === :leave
-                for i = 1:((stmt.args[1])::Int)
-                    frame.cur_hand = (frame.cur_hand::Pair{Any,Any}).second
-                end
             else
                 if hd === :(=)
+                    stmt = stmt::Expr
                     t = abstract_eval_statement(interp, stmt.args[2], changes, frame)
                     if t === Bottom
                         break
                     end
-                    frame.src.ssavaluetypes[pc] = t
+                    ssavaluetypes[pc] = t
                     lhs = stmt.args[1]
                     if isa(lhs, SlotNumber)
                         changes = StateUpdate(lhs, VarState(t, false), changes, false)
                     end
                 elseif hd === :method
+                    stmt = stmt::Expr
                     fname = stmt.args[1]
                     if isa(fname, SlotNumber)
                         changes = StateUpdate(fname, VarState(Any, false), changes, false)
                     end
-                elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect
+                elseif hd === :code_coverage_effect ||
+                       (hd !== :boundscheck && # :boundscheck can be narrowed to Bool
+                        hd !== nothing && is_meta_expr_head(hd))
                     # these do not generate code
                 else
                     t = abstract_eval_statement(interp, stmt, changes, frame)
@@ -1854,19 +2036,25 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     if !isempty(frame.ssavalue_uses[pc])
                         record_ssa_assign(pc, t, frame)
                     else
-                        frame.src.ssavaluetypes[pc] = t
+                        ssavaluetypes[pc] = t
                     end
                 end
-                if frame.cur_hand !== nothing && isa(changes, StateUpdate)
-                    # propagate new type info to exception handler
-                    # the handling for Expr(:enter) propagates all changes from before the try/catch
-                    # so this only needs to propagate any changes
-                    l = frame.cur_hand.first::Int
-                    if stupdate1!(states[l]::VarTable, changes::StateUpdate) !== false
-                        if l < frame.pc´´
-                            frame.pc´´ = l
+                if isa(changes, StateUpdate)
+                    let cur_hand = frame.handler_at[pc], l, enter
+                        while cur_hand != 0
+                            enter = frame.src.code[cur_hand]
+                            l = (enter::Expr).args[1]::Int
+                            # propagate new type info to exception handler
+                            # the handling for Expr(:enter) propagates all changes from before the try/catch
+                            # so this only needs to propagate any changes
+                            if stupdate1!(states[l]::VarTable, changes::StateUpdate) !== false
+                                if l < frame.pc´´
+                                    frame.pc´´ = l
+                                end
+                                push!(W, l)
+                            end
+                            cur_hand = frame.handler_at[cur_hand]
                         end
-                        push!(W, l)
                     end
                 end
             end
@@ -1875,11 +2063,10 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
 
             if t === nothing
                 # mark other reached expressions as `Any` to indicate they don't throw
-                frame.src.ssavaluetypes[pc] = Any
+                ssavaluetypes[pc] = Any
             end
 
             pc´ > n && break # can't proceed with the fast-path fall-through
-            frame.handler_at[pc´] = frame.cur_hand
             newstate = stupdate!(states[pc´], changes)
             if isa(stmt, GotoNode) && frame.pc´´ < pc´
                 # if we are processing a goto node anyways,
@@ -1890,7 +2077,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     states[pc´] = newstate
                 end
                 push!(W, pc´)
-                pc = frame.pc´´
+                break
             elseif newstate !== nothing
                 states[pc´] = newstate
                 pc = pc´
@@ -1900,13 +2087,14 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 break
             end
         end
+        frame.pc´´ = _bits_findnext(W.bits, frame.pc´´)::Int # next program-counter
     end
     frame.dont_work_on_me = false
     nothing
 end
 
 function conditional_changes(changes::VarTable, @nospecialize(typ), var::SlotNumber)
-    oldtyp = (changes[slot_id(var)]::VarState).typ
+    oldtyp = changes[slot_id(var)].typ
     # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
     # since we probably formed these types with `typesubstract`, the comparison is likely simple
     if ignorelimited(typ) ⊑ ignorelimited(oldtyp)
@@ -1919,7 +2107,7 @@ end
 
 function bool_rt_to_conditional(@nospecialize(rt), slottypes::Vector{Any}, state::VarTable, slot_id::Int)
     old = slottypes[slot_id]
-    new = widenconditional((state[slot_id]::VarState).typ) # avoid nested conditional
+    new = widenconditional(state[slot_id].typ) # avoid nested conditional
     if new ⊑ old && !(old ⊑ new)
         if isa(rt, Const)
             val = rt.val
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
index f351429aff7eb..2517b181d2804 100644
--- a/base/compiler/bootstrap.jl
+++ b/base/compiler/bootstrap.jl
@@ -5,10 +5,20 @@
 # especially try to make sure any recursive and leaf functions have concrete signatures,
 # since we won't be able to specialize & infer them at runtime
 
-let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call, run_passes],
-    world = get_world_counter(),
+time() = ccall(:jl_clock_now, Float64, ())
+
+let
+    world = get_world_counter()
     interp = NativeInterpreter(world)
 
+    fs = Any[
+        # we first create caches for the optimizer, because they contain many loop constructions
+        # and they're better to not run in interpreter even during bootstrapping
+        run_passes,
+        # then we create caches for inference entries
+        typeinf_ext, typeinf, typeinf_edge,
+    ]
+    # tfuncs can't be inferred from the inference entries above, so here we infer them manually
     for x in T_FFUNC_VAL
         push!(fs, x[3])
     end
@@ -20,16 +30,17 @@ let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call, run_passes],
             println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
         end
     end
+    starttime = time()
     for f in fs
         for m in _methods_by_ftype(Tuple{typeof(f), Vararg{Any}}, 10, typemax(UInt))
             # remove any TypeVars from the intersection
             typ = Any[m.spec_types.parameters...]
             for i = 1:length(typ)
-                if isa(typ[i], TypeVar)
-                    typ[i] = typ[i].ub
-                end
+                typ[i] = unwraptv(typ[i])
             end
             typeinf_type(interp, m.method, Tuple{typ...}, m.sparams)
         end
     end
+    endtime = time()
+    println("Core.Compiler ──── ", sub_float(endtime,starttime), " seconds")
 end
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 5882c967dbf14..c265512afcbf6 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -6,7 +6,8 @@ using Core.Intrinsics, Core.IR
 
 import Core: print, println, show, write, unsafe_write, stdout, stderr,
              _apply_iterate, svec, apply_type, Builtin, IntrinsicFunction,
-             MethodInstance, CodeInstance, MethodMatch, PartialOpaque
+             MethodInstance, CodeInstance, MethodMatch, PartialOpaque,
+             TypeofVararg
 
 const getproperty = Core.getfield
 const setproperty! = Core.setfield!
@@ -22,14 +23,10 @@ eval(m, x) = Core.eval(m, x)
 include(x) = Core.include(Compiler, x)
 include(mod, x) = Core.include(mod, x)
 
-# The real @inline macro is not available until after array.jl, so this
-# internal macro splices the meta Expr directly into the function body.
-macro _inline_meta()
-    Expr(:meta, :inline)
-end
-macro _noinline_meta()
-    Expr(:meta, :noinline)
-end
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
 
 # essential files and libraries
 include("essentials.jl")
@@ -88,9 +85,9 @@ using .Iterators: Flatten, Filter, product  # for generators
 include("namedtuple.jl")
 
 ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@_inline_meta; (f(1),))
-ntuple(f, ::Val{2}) = (@_inline_meta; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@_inline_meta; (f(1), f(2), f(3)))
+ntuple(f, ::Val{1}) = (@inline; (f(1),))
+ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
+ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
 ntuple(f, ::Val{n}) where {n} = ntuple(f, n::Int)
 ntuple(f, n) = (Any[f(i) for i = 1:n]...,)
 
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index 327ab85d104f3..ce806531adf42 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -3,43 +3,90 @@
 function is_argtype_match(@nospecialize(given_argtype),
                           @nospecialize(cache_argtype),
                           overridden_by_const::Bool)
-    if isa(given_argtype, Const) || isa(given_argtype, PartialStruct) || isa(given_argtype, PartialOpaque)
+    if is_forwardable_argtype(given_argtype)
         return is_lattice_equal(given_argtype, cache_argtype)
     end
     return !overridden_by_const
 end
 
+function is_forwardable_argtype(@nospecialize x)
+    return isa(x, Const) ||
+           isa(x, Conditional) ||
+           isa(x, PartialStruct) ||
+           isa(x, PartialOpaque)
+end
+
 # In theory, there could be a `cache` containing a matching `InferenceResult`
 # for the provided `linfo` and `given_argtypes`. The purpose of this function is
 # to return a valid value for `cache_lookup(linfo, argtypes, cache).argtypes`,
 # so that we can construct cache-correct `InferenceResult`s in the first place.
-function matching_cache_argtypes(linfo::MethodInstance, given_argtypes::Vector, va_override)
+function matching_cache_argtypes(
+    linfo::MethodInstance, (arginfo, sv)#=::Tuple{ArgInfo,InferenceState}=#, va_override::Bool)
+    (; fargs, argtypes) = arginfo
     @assert isa(linfo.def, Method) # ensure the next line works
     nargs::Int = linfo.def.nargs
-    @assert length(given_argtypes) >= (nargs - 1)
-    given_argtypes = anymap(widenconditional, given_argtypes)
-    if va_override || linfo.def.isva
+    cache_argtypes, overridden_by_const = matching_cache_argtypes(linfo, nothing, va_override)
+    given_argtypes = Vector{Any}(undef, length(argtypes))
+    local condargs = nothing
+    for i in 1:length(argtypes)
+        argtype = argtypes[i]
+        # forward `Conditional` if it conveys a constraint on any other argument
+        if isa(argtype, Conditional) && fargs !== nothing
+            cnd = argtype
+            slotid = find_constrained_arg(cnd, fargs, sv)
+            if slotid !== nothing
+                # using union-split signature, we may be able to narrow down `Conditional`
+                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
+                vtype = tmeet(cnd.vtype, sigt)
+                elsetype = tmeet(cnd.elsetype, sigt)
+                if vtype === Bottom && elsetype === Bottom
+                    # we accidentally proved this method match is impossible
+                    # TODO bail out here immediately rather than just propagating Bottom ?
+                    given_argtypes[i] = Bottom
+                else
+                    if condargs === nothing
+                        condargs = Tuple{Int,Int}[]
+                    end
+                    push!(condargs, (slotid, i))
+                    given_argtypes[i] = Conditional(SlotNumber(slotid), vtype, elsetype)
+                end
+                continue
+            end
+        end
+        given_argtypes[i] = widenconditional(argtype)
+    end
+    isva = va_override || linfo.def.isva
+    if isva || isvarargtype(given_argtypes[end])
         isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs - 1)
+        for i = 1:(nargs - isva)
             isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
         end
-        if length(given_argtypes) >= nargs || !isvarargtype(given_argtypes[end])
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[nargs:end])
-        else
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[end:end])
+        if isva
+            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
+                last = length(given_argtypes)
+            else
+                last = nargs
+            end
+            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[last:end])
+            # invalidate `Conditional` imposed on varargs
+            if condargs !== nothing
+                for (slotid, i) in condargs
+                    if slotid ≥ last
+                        isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i])
+                    end
+                end
+            end
         end
         given_argtypes = isva_given_argtypes
     end
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(linfo, nothing, va_override)
-    if nargs === length(given_argtypes)
-        for i in 1:nargs
-            given_argtype = given_argtypes[i]
-            cache_argtype = cache_argtypes[i]
-            if !is_argtype_match(given_argtype, cache_argtype, overridden_by_const[i])
-                # prefer the argtype we were given over the one computed from `linfo`
-                cache_argtypes[i] = given_argtype
-                overridden_by_const[i] = true
-            end
+    @assert length(given_argtypes) == nargs
+    for i in 1:nargs
+        given_argtype = given_argtypes[i]
+        cache_argtype = cache_argtypes[i]
+        if !is_argtype_match(given_argtype, cache_argtype, false)
+            # prefer the argtype we were given over the one computed from `linfo`
+            cache_argtypes[i] = given_argtype
+            overridden_by_const[i] = true
         end
     end
     return cache_argtypes, overridden_by_const
@@ -48,7 +95,7 @@ end
 function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(specTypes),
     isva::Bool, withfirst::Bool = true)
     toplevel = method === nothing
-    linfo_argtypes = Any[unwrap_unionall(specTypes).parameters...]
+    linfo_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
     nargs::Int = toplevel ? 0 : method.nargs
     if !withfirst
         # For opaque closure, the closure environment is processed elsewhere
@@ -60,7 +107,8 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
     if !toplevel && isva
         if specTypes == Tuple
             if nargs > 1
-                linfo_argtypes = svec(Any[Any for i = 1:(nargs - 1)]..., Tuple.parameters[1])
+                linfo_argtypes = Any[Any for i = 1:nargs]
+                linfo_argtypes[end] = Vararg{Any}
             end
             vargtype = Tuple
         else
@@ -69,17 +117,16 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
                 va = linfo_argtypes[linfo_argtypes_length]
                 if isvarargtype(va)
                     new_va = rewrap_unionall(unconstrain_vararg_length(va), specTypes)
-                    vargtype_elements = Any[new_va]
                     vargtype = Tuple{new_va}
                 else
-                    vargtype_elements = Any[]
                     vargtype = Tuple{}
                 end
             else
                 vargtype_elements = Any[]
-                for p in linfo_argtypes[nargs:linfo_argtypes_length]
-                    p = isvarargtype(p) ? unconstrain_vararg_length(p) : p
-                    push!(vargtype_elements, rewrap(p, specTypes))
+                for i in nargs:linfo_argtypes_length
+                    p = linfo_argtypes[i]
+                    p = unwraptv(isvarargtype(p) ? unconstrain_vararg_length(p) : p)
+                    push!(vargtype_elements, elim_free_typevars(rewrap_unionall(p, specTypes)))
                 end
                 for i in 1:length(vargtype_elements)
                     atyp = vargtype_elements[i]
@@ -111,16 +158,14 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
                 atyp = unwrapva(atyp)
                 tail_index -= 1
             end
-            while isa(atyp, TypeVar)
-                atyp = atyp.ub
-            end
+            atyp = unwraptv(atyp)
             if isa(atyp, DataType) && isdefined(atyp, :instance)
                 # replace singleton types with their equivalent Const object
                 atyp = Const(atyp.instance)
             elseif isconstType(atyp)
                 atyp = Const(atyp.parameters[1])
             else
-                atyp = rewrap(atyp, specTypes)
+                atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
             end
             i == n && (lastatype = atyp)
             cache_argtypes[i] = atyp
@@ -134,6 +179,19 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
     cache_argtypes
 end
 
+# eliminate free `TypeVar`s in order to make the life much easier down the road:
+# at runtime only `Type{...}::DataType` can contain invalid type parameters, and other
+# malformed types here are user-constructed type arguments given at an inference entry
+# so this function will replace only the malformed `Type{...}::DataType` with `Type`
+# and simply replace other possibilities with `Any`
+function elim_free_typevars(@nospecialize t)
+    if has_free_typevars(t)
+        return isType(t) ? Type : Any
+    else
+        return t
+    end
+end
+
 function matching_cache_argtypes(linfo::MethodInstance, ::Nothing, va_override::Bool)
     mthd = isa(linfo.def, Method) ? linfo.def::Method : nothing
     cache_argtypes = most_general_argtypes(mthd, linfo.specTypes,
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index a3f2b1ea95d27..ff9ffa5456458 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -2,6 +2,24 @@
 
 const LineNum = Int
 
+# The type of a variable load is either a value or an UndefVarError
+# (only used in abstractinterpret, doesn't appear in optimize)
+struct VarState
+    typ
+    undef::Bool
+    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
+end
+
+"""
+    const VarTable = Vector{VarState}
+
+The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
+Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
+Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
+to enable flow-sensitive analysis.
+"""
+const VarTable = Vector{VarState}
+
 mutable struct InferenceState
     params::InferenceParams
     result::InferenceResult # remember where to put the result
@@ -18,7 +36,7 @@ mutable struct InferenceState
     world::UInt
     valid_worlds::WorldRange
     nargs::Int
-    stmt_types::Vector{Union{Nothing, Vector{Any}}} # ::Vector{Union{Nothing, VarTable}}
+    stmt_types::Vector{Union{Nothing, VarTable}}
     stmt_edges::Vector{Union{Nothing, Vector{Any}}}
     stmt_info::Vector{Any}
     # return type
@@ -28,12 +46,9 @@ mutable struct InferenceState
     pc´´::LineNum
     nstmts::Int
     # current exception handler info
-    cur_hand #::Union{Nothing, Pair{LineNum, prev_handler}}
-    handler_at::Vector{Any}
-    n_handlers::Int
+    handler_at::Vector{LineNum}
     # ssavalue sparsity and restart info
     ssavalue_uses::Vector{BitSet}
-    throw_blocks::BitSet
 
     cycle_backedges::Vector{Tuple{InferenceState, LineNum}} # call-graph backedges connecting from callee to caller
     callers_in_cycle::Vector{InferenceState}
@@ -55,9 +70,11 @@ mutable struct InferenceState
 
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
     function InferenceState(result::InferenceResult, src::CodeInfo,
-                            cached::Bool, interp::AbstractInterpreter)
+                            cache::Symbol, interp::AbstractInterpreter)
         (; def) = linfo = result.linfo
-        code = src.code::Array{Any,1}
+        code = src.code::Vector{Any}
+
+        params = InferenceParams(interp)
 
         sp = sptypes_from_meth_instance(linfo::MethodInstance)
 
@@ -66,8 +83,8 @@ mutable struct InferenceState
         stmt_info = Any[ nothing for i = 1:length(code) ]
 
         n = length(code)
+        s_types = Union{Nothing, VarTable}[ nothing for i = 1:n ]
         s_edges = Union{Nothing, Vector{Any}}[ nothing for i = 1:n ]
-        s_types = Union{Nothing, Vector{Any}}[ nothing for i = 1:n ]
 
         # initial types
         nslots = length(src.slotflags)
@@ -83,41 +100,124 @@ mutable struct InferenceState
         s_types[1] = s_argtypes
 
         ssavalue_uses = find_ssavalue_uses(code, nssavalues)
-        throw_blocks = find_throw_blocks(code)
 
         # exception handlers
-        cur_hand = nothing
-        handler_at = Any[ nothing for i=1:n ]
-        n_handlers = 0
+        ip = BitSet()
+        handler_at = compute_trycatch(src.code, ip)
+        push!(ip, 1)
 
-        W = BitSet()
-        push!(W, 1) #initial pc to visit
+        # `throw` block deoptimization
+        params.unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
 
         mod = isa(def, Method) ? def.module : def
-
         valid_worlds = WorldRange(src.min_world,
             src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
+
+        @assert cache === :no || cache === :local || cache === :global
         frame = new(
-            InferenceParams(interp), result, linfo,
+            params, result, linfo,
             sp, slottypes, mod, 0,
             IdSet{InferenceState}(), IdSet{InferenceState}(),
             src, get_world_counter(interp), valid_worlds,
             nargs, s_types, s_edges, stmt_info,
-            Union{}, W, 1, n,
-            cur_hand, handler_at, n_handlers,
-            ssavalue_uses, throw_blocks,
+            Union{}, ip, 1, n, handler_at,
+            ssavalue_uses,
             Vector{Tuple{InferenceState,LineNum}}(), # cycle_backedges
             Vector{InferenceState}(), # callers_in_cycle
             #=parent=#nothing,
-            cached, false, false,
+            cache === :global, false, false,
             CachedMethodTable(method_table(interp)),
             interp)
         result.result = frame
-        cached && push!(get_inference_cache(interp), result)
+        cache !== :no && push!(get_inference_cache(interp), result)
         return frame
     end
 end
 
+function compute_trycatch(code::Vector{Any}, ip::BitSet)
+    # The goal initially is to record the frame like this for the state at exit:
+    # 1: (enter 3) # == 0
+    # 3: (expr)    # == 1
+    # 3: (leave 1) # == 1
+    # 4: (expr)    # == 0
+    # then we can find all trys by walking backwards from :enter statements,
+    # and all catches by looking at the statement after the :enter
+    n = length(code)
+    empty!(ip)
+    ip.offset = 0 # for _bits_findnext
+    push!(ip, n + 1)
+    handler_at = fill(0, n)
+
+    # start from all :enter statements and record the location of the try
+    for pc = 1:n
+        stmt = code[pc]
+        if isexpr(stmt, :enter)
+            l = stmt.args[1]::Int
+            handler_at[pc + 1] = pc
+            push!(ip, pc + 1)
+            handler_at[l] = pc
+            push!(ip, l)
+        end
+    end
+
+    # now forward those marks to all :leave statements
+    pc´´ = 0
+    while true
+        # make progress on the active ip set
+        pc = _bits_findnext(ip.bits, pc´´)::Int
+        pc > n && break
+        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
+            pc´ = pc + 1 # next program-counter (after executing instruction)
+            if pc == pc´´
+                pc´´ = pc´
+            end
+            delete!(ip, pc)
+            cur_hand = handler_at[pc]
+            @assert cur_hand != 0 "unbalanced try/catch"
+            stmt = code[pc]
+            if isa(stmt, GotoNode)
+                pc´ = stmt.label
+            elseif isa(stmt, GotoIfNot)
+                l = stmt.dest::Int
+                if handler_at[l] != cur_hand
+                    @assert handler_at[l] == 0 "unbalanced try/catch"
+                    handler_at[l] = cur_hand
+                    if l < pc´´
+                        pc´´ = l
+                    end
+                    push!(ip, l)
+                end
+            elseif isa(stmt, ReturnNode)
+                @assert !isdefined(stmt, :val) "unbalanced try/catch"
+                break
+            elseif isa(stmt, Expr)
+                head = stmt.head
+                if head === :enter
+                    cur_hand = pc
+                elseif head === :leave
+                    l = stmt.args[1]::Int
+                    for i = 1:l
+                        cur_hand = handler_at[cur_hand]
+                    end
+                    cur_hand == 0 && break
+                end
+            end
+
+            pc´ > n && break # can't proceed with the fast-path fall-through
+            if handler_at[pc´] != cur_hand
+                @assert handler_at[pc´] == 0 "unbalanced try/catch"
+                handler_at[pc´] = cur_hand
+            elseif !in(pc´, ip)
+                break  # already visited
+            end
+            pc = pc´
+        end
+    end
+
+    @assert first(ip) == n + 1
+    return handler_at
+end
+
 """
     Iterate through all callers of the given InferenceState in the abstract
     interpretation stack (including the given InferenceState itself), vising
@@ -143,12 +243,12 @@ end
 
 method_table(interp::AbstractInterpreter, sv::InferenceState) = sv.method_table
 
-function InferenceState(result::InferenceResult, cached::Bool, interp::AbstractInterpreter)
+function InferenceState(result::InferenceResult, cache::Symbol, interp::AbstractInterpreter)
     # prepare an InferenceState object for inferring lambda
     src = retrieve_code_info(result.linfo)
     src === nothing && return nothing
     validate_code_in_debug_mode(result.linfo, src, "lowered")
-    return InferenceState(result, src, cached, interp)
+    return InferenceState(result, src, cache, interp)
 end
 
 function sptypes_from_meth_instance(linfo::MethodInstance)
@@ -179,7 +279,7 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
             while temp isa UnionAll
                 temp = temp.body
             end
-            sigtypes = temp.parameters
+            sigtypes = (temp::DataType).parameters
             for j = 1:length(sigtypes)
                 tj = sigtypes[j]
                 if isType(tj) && tj.parameters[1] === Pi
@@ -211,7 +311,7 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
                     ty = UnionAll(tv, Type{tv})
                 end
             end
-        elseif isa(v, Core.TypeofVararg)
+        elseif isvarargtype(v)
             ty = Int
         else
             ty = Const(v)
@@ -233,12 +333,13 @@ end
 update_valid_age!(edge::InferenceState, sv::InferenceState) = update_valid_age!(sv, edge.valid_worlds)
 
 function record_ssa_assign(ssa_id::Int, @nospecialize(new), frame::InferenceState)
-    old = frame.src.ssavaluetypes[ssa_id]
+    ssavaluetypes = frame.src.ssavaluetypes::Vector{Any}
+    old = ssavaluetypes[ssa_id]
     if old === NOT_FOUND || !(new ⊑ old)
         # typically, we expect that old ⊑ new (that output information only
         # gets less precise with worse input information), but to actually
         # guarantee convergence we need to use tmerge here to ensure that is true
-        frame.src.ssavaluetypes[ssa_id] = old === NOT_FOUND ? new : tmerge(old, new)
+        ssavaluetypes[ssa_id] = old === NOT_FOUND ? new : tmerge(old, new)
         W = frame.ip
         s = frame.stmt_types
         for r in frame.ssavalue_uses[ssa_id]
@@ -296,3 +397,5 @@ function print_callstack(sv::InferenceState)
         sv = sv.parent
     end
 end
+
+get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index e50bb5bcdfc98..22f31ad1f3656 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -21,21 +21,37 @@ function push!(et::EdgeTracker, ci::CodeInstance)
     push!(et, ci.def)
 end
 
-struct InliningState{S <: Union{EdgeTracker, Nothing}, T, P}
+struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter}
     params::OptimizationParams
     et::S
     mi_cache::T
-    policy::P
+    interp::I
 end
 
-function default_inlining_policy(@nospecialize(src))
+function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8,
+                         mi::MethodInstance, argtypes::Vector{Any})
     if isa(src, CodeInfo) || isa(src, Vector{UInt8})
         src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
-        src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
+        src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
         return src_inferred && src_inlineable ? src : nothing
-    end
-    if isa(src, OptimizationState) && isdefined(src, :ir)
-        return src.src.inlineable ? src.ir : nothing
+    elseif isa(src, OptimizationState) && isdefined(src, :ir)
+        return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing
+    elseif src === nothing && is_stmt_inline(stmt_flag)
+        # if this statement is forced to be inlined, make an additional effort to find the
+        # inferred source in the local cache
+        # we still won't find a source for recursive call because the "single-level" inlining
+        # seems to be more trouble and complex than it's worth
+        inf_result = cache_lookup(mi, argtypes, get_inference_cache(interp))
+        inf_result === nothing && return nothing
+        src = inf_result.src
+        if isa(src, CodeInfo)
+            src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
+            return src_inferred ? src : nothing
+        elseif isa(src, OptimizationState)
+            return isdefined(src, :ir) ? src.ir : nothing
+        else
+            return nothing
+        end
     end
     return nothing
 end
@@ -48,7 +64,6 @@ mutable struct OptimizationState
     ir::Union{Nothing, IRCode}
     stmt_info::Vector{Any}
     mod::Module
-    nargs::Int
     sptypes::Vector{Any} # static parameters
     slottypes::Vector{Any}
     const_api::Bool
@@ -58,9 +73,9 @@ mutable struct OptimizationState
         inlining = InliningState(params,
             EdgeTracker(s_edges, frame.valid_worlds),
             WorldView(code_cache(interp), frame.world),
-            inlining_policy(interp))
+            interp)
         return new(frame.linfo,
-                   frame.src, nothing, frame.stmt_info, frame.mod, frame.nargs,
+                   frame.src, nothing, frame.stmt_info, frame.mod,
                    frame.sptypes, frame.slottypes, false,
                    inlining)
     end
@@ -70,6 +85,8 @@ mutable struct OptimizationState
         nssavalues = src.ssavaluetypes
         if nssavalues isa Int
             src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
+        else
+            nssavalues = length(src.ssavaluetypes::Vector{Any})
         end
         nslots = length(src.slotflags)
         slottypes = src.slottypes
@@ -79,18 +96,18 @@ mutable struct OptimizationState
         stmt_info = Any[nothing for i = 1:nssavalues]
         # cache some useful state computations
         def = linfo.def
-        mod, nargs = isa(def, Method) ? (def.module, def.nargs) : (def, 0)
+        mod = isa(def, Method) ? def.module : def
         # Allow using the global MI cache, but don't track edges.
         # This method is mostly used for unit testing the optimizer
         inlining = InliningState(params,
             nothing,
             WorldView(code_cache(interp), get_world_counter()),
-            inlining_policy(interp))
+            interp)
         return new(linfo,
-                   src, nothing, stmt_info, mod, nargs,
+                   src, nothing, stmt_info, mod,
                    sptypes_from_meth_instance(linfo), slottypes, false,
                    inlining)
-        end
+    end
 end
 
 function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter)
@@ -100,15 +117,15 @@ function OptimizationState(linfo::MethodInstance, params::OptimizationParams, in
 end
 
 function ir_to_codeinf!(opt::OptimizationState)
-    replace_code_newstyle!(opt.src, opt.ir::IRCode, opt.nargs - 1)
+    (; linfo, src) = opt
+    optdef = linfo.def
+    replace_code_newstyle!(src, opt.ir::IRCode, isa(optdef, Method) ? Int(optdef.nargs) : 0)
     opt.ir = nothing
-    let src = opt.src::CodeInfo
-        widen_all_consts!(src)
-        src.inferred = true
-        # finish updating the result struct
-        validate_code_in_debug_mode(opt.linfo, src, "optimized")
-        return src
-    end
+    widen_all_consts!(src)
+    src.inferred = true
+    # finish updating the result struct
+    validate_code_in_debug_mode(linfo, src, "optimized")
+    return src
 end
 
 #############
@@ -124,12 +141,20 @@ const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
 const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
 # const SLOT_CALLED      = 64
 
-# This statement was marked as @inbounds by the user. If replaced by inlining,
-# any contained boundschecks may be removed
-const IR_FLAG_INBOUNDS       = 0x01
+# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
+
+const IR_FLAG_NULL        = 0x00
+# This statement is marked as @inbounds by user.
+# Ff replaced by inlining, any contained boundschecks may be removed.
+const IR_FLAG_INBOUNDS    = 0x01 << 0
+# This statement is marked as @inline by user
+const IR_FLAG_INLINE      = 0x01 << 1
+# This statement is marked as @noinline by user
+const IR_FLAG_NOINLINE    = 0x01 << 2
+const IR_FLAG_THROW_BLOCK = 0x01 << 3
 # This statement may be removed if its result is unused. In particular it must
 # thus be both pure and effect free.
-const IR_FLAG_EFFECT_FREE    = 0x01 << 4
+const IR_FLAG_EFFECT_FREE = 0x01 << 4
 
 # known to be always effect-free (in particular nothrow)
 const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
@@ -138,7 +163,7 @@ const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
 const _PURE_OR_ERROR_BUILTINS = [
     fieldtype, apply_type, isa, UnionAll,
     getfield, arrayref, const_arrayref, isdefined, Core.sizeof,
-    Core.kwfunc, ifelse, Core._typevar, (<:)
+    Core.kwfunc, Core.ifelse, Core._typevar, (<:)
 ]
 
 const TOP_TUPLE = GlobalRef(Core, :tuple)
@@ -149,28 +174,9 @@ const TOP_TUPLE = GlobalRef(Core, :tuple)
 
 _topmod(sv::OptimizationState) = _topmod(sv.mod)
 
-function isinlineable(m::Method, me::OptimizationState, params::OptimizationParams, union_penalties::Bool, bonus::Int=0)
-    # compute the cost (size) of inlining this code
-    inlineable = false
-    cost_threshold = params.inline_cost_threshold
-    if m.module === _topmod(m.module)
-        # a few functions get special treatment
-        name = m.name
-        sig = m.sig
-        if ((name === :+ || name === :* || name === :min || name === :max) &&
-            isa(sig,DataType) &&
-            sig == Tuple{sig.parameters[1],Any,Any,Any,Vararg{Any}})
-            inlineable = true
-        elseif (name === :iterate || name === :unsafe_convert ||
-                name === :cconvert)
-            cost_threshold *= 4
-        end
-    end
-    if !inlineable
-        inlineable = inline_worthy(me.ir::IRCode, params, union_penalties, cost_threshold + bonus)
-    end
-    return inlineable
-end
+is_stmt_inline(stmt_flag::UInt8)      = stmt_flag & IR_FLAG_INLINE      ≠ 0
+is_stmt_noinline(stmt_flag::UInt8)    = stmt_flag & IR_FLAG_NOINLINE    ≠ 0
+is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0
 
 # These affect control flow within the function (so may not be removed
 # if there is no usage within the function), but don't affect the purity
@@ -180,7 +186,7 @@ function stmt_affects_purity(@nospecialize(stmt), ir)
         return false
     end
     if isa(stmt, GotoIfNot)
-        t = argextype(stmt.cond, ir, ir.sptypes)
+        t = argextype(stmt.cond, ir)
         return !(t ⊑ Bool)
     end
     if isa(stmt, Expr)
@@ -189,10 +195,131 @@ function stmt_affects_purity(@nospecialize(stmt), ir)
     return true
 end
 
-# Convert IRCode back to CodeInfo and compute inlining cost and sideeffects
+"""
+    stmt_effect_free(stmt, rt, src::Union{IRCode,IncrementalCompact})
+
+Determine whether a `stmt` is "side-effect-free", i.e. may be removed if it has no uses.
+"""
+function stmt_effect_free(@nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
+    isa(stmt, PiNode) && return true
+    isa(stmt, PhiNode) && return true
+    isa(stmt, ReturnNode) && return false
+    isa(stmt, GotoNode) && return false
+    isa(stmt, GotoIfNot) && return false
+    isa(stmt, Slot) && return false # Slots shouldn't occur in the IR at this point, but let's be defensive here
+    isa(stmt, GlobalRef) && return isdefined(stmt.mod, stmt.name)
+    if isa(stmt, Expr)
+        (; head, args) = stmt
+        if head === :static_parameter
+            etyp = (isa(src, IRCode) ? src.sptypes : src.ir.sptypes)[args[1]::Int]
+            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
+            return isa(etyp, Const)
+        end
+        if head === :call
+            f = argextype(args[1], src)
+            f = singleton_type(f)
+            f === nothing && return false
+            is_return_type(f) && return true
+            if isa(f, IntrinsicFunction)
+                intrinsic_effect_free_if_nothrow(f) || return false
+                return intrinsic_nothrow(f,
+                        Any[argextype(args[i], src) for i = 2:length(args)])
+            end
+            contains_is(_PURE_BUILTINS, f) && return true
+            contains_is(_PURE_OR_ERROR_BUILTINS, f) || return false
+            rt === Bottom && return false
+            return _builtin_nothrow(f, Any[argextype(args[i], src) for i = 2:length(args)], rt)
+        elseif head === :new
+            typ = argextype(args[1], src)
+            # `Expr(:new)` of unknown type could raise arbitrary TypeError.
+            typ, isexact = instanceof_tfunc(typ)
+            isexact || return false
+            isconcretedispatch(typ) || return false
+            typ = typ::DataType
+            fieldcount(typ) >= length(args) - 1 || return false
+            for fld_idx in 1:(length(args) - 1)
+                eT = argextype(args[fld_idx + 1], src)
+                fT = fieldtype(typ, fld_idx)
+                eT ⊑ fT || return false
+            end
+            return true
+        elseif head === :new_opaque_closure
+            length(args) < 5 && return false
+            typ = argextype(args[1], src)
+            typ, isexact = instanceof_tfunc(typ)
+            isexact || return false
+            typ ⊑ Tuple || return false
+            isva = argextype(args[2], src)
+            rt_lb = argextype(args[3], src)
+            rt_ub = argextype(args[4], src)
+            src = argextype(args[5], src)
+            if !(isva ⊑ Bool && rt_lb ⊑ Type && rt_ub ⊑ Type && src ⊑ Method)
+                return false
+            end
+            return true
+        elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
+            return true
+        else
+            # e.g. :loopinfo
+            return false
+        end
+    end
+    return true
+end
+
+"""
+    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
+    argextype(x, src::CodeInfo, sptypes::Vector{Any}) -> t
+
+Return the type of value `x` in the context of inferred source `src`.
+Note that `t` might be an extended lattice element.
+Use `widenconst(t)` to get the native Julia type of `x`.
+"""
+argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{Any} = ir.sptypes) =
+    argextype(x, ir, sptypes, ir.argtypes)
+function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{Any} = compact.ir.sptypes)
+    isa(x, AnySSAValue) && return types(compact)[x]
+    return argextype(x, compact, sptypes, compact.ir.argtypes)
+end
+argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{Any}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
+function argextype(
+    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{Any}, slottypes::Vector{Any})
+    if isa(x, Expr)
+        if x.head === :static_parameter
+            return sptypes[x.args[1]::Int]
+        elseif x.head === :boundscheck
+            return Bool
+        elseif x.head === :copyast
+            return argextype(x.args[1], src, sptypes, slottypes)
+        end
+        @assert false "argextype only works on argument-position values"
+    elseif isa(x, SlotNumber)
+        return slottypes[x.id]
+    elseif isa(x, TypedSlot)
+        return x.typ
+    elseif isa(x, SSAValue)
+        return abstract_eval_ssavalue(x, src)
+    elseif isa(x, Argument)
+        return slottypes[x.n]
+    elseif isa(x, QuoteNode)
+        return Const(x.value)
+    elseif isa(x, GlobalRef)
+        return abstract_eval_global(x.mod, x.name)
+    elseif isa(x, PhiNode)
+        return Any
+    elseif isa(x, PiNode)
+        return x.typ
+    else
+        return Const(x)
+    end
+end
+abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
+
+# compute inlining cost and sideeffects
 function finish(interp::AbstractInterpreter, opt::OptimizationState, params::OptimizationParams, ir::IRCode, @nospecialize(result))
-    (; def) = linfo = opt.linfo
-    nargs = Int(opt.nargs) - 1
+    (; src, linfo) = opt
+    (; def, specTypes) = linfo
 
     force_noinline = _any(@nospecialize(x) -> isexpr(x, :meta) && x.args[1] === :noinline, ir.meta)
 
@@ -208,13 +335,13 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
             for i in 1:length(ir.stmts)
                 node = ir.stmts[i]
                 stmt = node[:inst]
-                if stmt_affects_purity(stmt, ir) && !stmt_effect_free(stmt, node[:type], ir, ir.sptypes)
+                if stmt_affects_purity(stmt, ir) && !stmt_effect_free(stmt, node[:type], ir)
                     proven_pure = false
                     break
                 end
             end
             if proven_pure
-                for fl in opt.src.slotflags
+                for fl in src.slotflags
                     if (fl & SLOT_USEDUNDEF) != 0
                         proven_pure = false
                         break
@@ -223,7 +350,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
             end
         end
         if proven_pure
-            opt.src.pure = true
+            src.pure = true
         end
 
         if proven_pure
@@ -236,7 +363,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
             if !(isa(result, Const) && !is_inlineable_constant(result.val))
                 opt.const_api = true
             end
-            force_noinline || (opt.src.inlineable = true)
+            force_noinline || (src.inlineable = true)
         end
     end
 
@@ -245,7 +372,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
     # determine and cache inlineability
     union_penalties = false
     if !force_noinline
-        sig = unwrap_unionall(linfo.specTypes)
+        sig = unwrap_unionall(specTypes)
         if isa(sig, DataType) && sig.name === Tuple.name
             for P in sig.parameters
                 P = unwrap_unionall(P)
@@ -257,64 +384,66 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
         else
             force_noinline = true
         end
-        if !opt.src.inlineable && result === Union{}
+        if !src.inlineable && result === Union{}
             force_noinline = true
         end
     end
     if force_noinline
-        opt.src.inlineable = false
+        src.inlineable = false
     elseif isa(def, Method)
-        if opt.src.inlineable && isdispatchtuple(linfo.specTypes)
+        if src.inlineable && isdispatchtuple(specTypes)
             # obey @inline declaration if a dispatch barrier would not help
         else
-            bonus = 0
+            # compute the cost (size) of inlining this code
+            cost_threshold = default = params.inline_cost_threshold
             if result ⊑ Tuple && !isconcretetype(widenconst(result))
-                bonus = params.inline_tupleret_bonus
+                cost_threshold += params.inline_tupleret_bonus
             end
-            if opt.src.inlineable
-                # For functions declared @inline, increase the cost threshold 20x
-                bonus += params.inline_cost_threshold*19
+            # if the method is declared as `@inline`, increase the cost threshold 20x
+            if src.inlineable
+                cost_threshold += 19*default
+            end
+            # a few functions get special treatment
+            if def.module === _topmod(def.module)
+                name = def.name
+                if name === :iterate || name === :unsafe_convert || name === :cconvert
+                    cost_threshold += 4*default
+                end
             end
-            opt.src.inlineable = isinlineable(def, opt, params, union_penalties, bonus)
+            src.inlineable = inline_worthy(ir, params, union_penalties, cost_threshold)
         end
     end
 
-    nothing
+    return nothing
 end
 
 # run the optimization work
 function optimize(interp::AbstractInterpreter, opt::OptimizationState, params::OptimizationParams, @nospecialize(result))
-    nargs = Int(opt.nargs) - 1
-    @timeit "optimizer" ir = run_passes(opt.src, nargs, opt)
+    @timeit "optimizer" ir = run_passes(opt.src, opt)
     finish(interp, opt, params, ir, result)
 end
 
-function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState)
-    preserve_coverage = coverage_enabled(sv.mod)
-    ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, nargs, sv)
-    ir = slot2reg(ir, ci, nargs, sv)
-    #@Base.show ("after_construct", ir)
+function run_passes(ci::CodeInfo, sv::OptimizationState)
+    @timeit "convert"   ir = convert_to_ircode(ci, sv)
+    @timeit "slot2reg"  ir = slot2reg(ir, ci, sv)
     # TODO: Domsorting can produce an updated domtree - no need to recompute here
     @timeit "compact 1" ir = compact!(ir)
-    @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
-    #@timeit "verify 2" verify_ir(ir)
-    ir = compact!(ir)
-    #@Base.show ("before_sroa", ir)
-    @timeit "SROA" ir = getfield_elim_pass!(ir)
-    #@Base.show ir.new_nodes
-    #@Base.show ("after_sroa", ir)
-    ir = adce_pass!(ir)
-    #@Base.show ("after_adce", ir)
+    @timeit "Inlining"  ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
+    # @timeit "verify 2" verify_ir(ir)
+    @timeit "compact 2" ir = compact!(ir)
+    @timeit "SROA"      ir = sroa_pass!(ir)
+    @timeit "ADCE"      ir = adce_pass!(ir)
     @timeit "type lift" ir = type_lift_pass!(ir)
     @timeit "compact 3" ir = compact!(ir)
-    #@Base.show ir
     if JLOptions().debug_level == 2
         @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
     end
     return ir
 end
 
-function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, nargs::Int, sv::OptimizationState)
+function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
+    code = copy_exprargs(ci.code)
+    coverage = coverage_enabled(sv.mod)
     # Go through and add an unreachable node after every
     # Union{} call. Then reindex labels.
     idx = 1
@@ -323,15 +452,18 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, narg
     labelmap = coverage ? fill(0, length(code)) : changemap
     prevloc = zero(eltype(ci.codelocs))
     stmtinfo = sv.stmt_info
+    codelocs = ci.codelocs
     ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    ssaflags = ci.ssaflags
     while idx <= length(code)
-        codeloc = ci.codelocs[idx]
+        codeloc = codelocs[idx]
         if coverage && codeloc != prevloc && codeloc != 0
             # insert a side-effect instruction before the current instruction in the same basic block
             insert!(code, idx, Expr(:code_coverage_effect))
-            insert!(ci.codelocs, idx, codeloc)
+            insert!(codelocs, idx, codeloc)
             insert!(ssavaluetypes, idx, Nothing)
             insert!(stmtinfo, idx, nothing)
+            insert!(ssaflags, idx, IR_FLAG_NULL)
             changemap[oldidx] += 1
             if oldidx < length(labelmap)
                 labelmap[oldidx + 1] += 1
@@ -343,9 +475,10 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, narg
             if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
                 # insert unreachable in the same basic block after the current instruction (splitting it)
                 insert!(code, idx + 1, ReturnNode())
-                insert!(ci.codelocs, idx + 1, ci.codelocs[idx])
+                insert!(codelocs, idx + 1, codelocs[idx])
                 insert!(ssavaluetypes, idx + 1, Union{})
                 insert!(stmtinfo, idx + 1, nothing)
+                insert!(ssaflags, idx + 1, ssaflags[idx])
                 if oldidx < length(changemap)
                     changemap[oldidx + 1] += 1
                     coverage && (labelmap[oldidx + 1] += 1)
@@ -358,42 +491,24 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, narg
     end
     renumber_ir_elements!(code, changemap, labelmap)
 
-    inbounds_depth = 0 # Number of stacked inbounds
     meta = Any[]
-    flags = fill(0x00, length(code))
     for i = 1:length(code)
-        stmt = code[i]
-        if isexpr(stmt, :inbounds)
-            arg1 = stmt.args[1]
-            if arg1 === true # push
-                inbounds_depth += 1
-            elseif arg1 === false # clear
-                inbounds_depth = 0
-            elseif inbounds_depth > 0 # pop
-                inbounds_depth -= 1
-            end
-            stmt = nothing
-        else
-            stmt = normalize(stmt, meta)
-        end
-        code[i] = stmt
-        if !(stmt === nothing)
-            if inbounds_depth > 0
-                flags[i] |= IR_FLAG_INBOUNDS
-            end
-        end
+        code[i] = remove_meta!(code[i], meta)
     end
-    strip_trailing_junk!(ci, code, stmtinfo, flags)
+    strip_trailing_junk!(ci, code, stmtinfo)
     cfg = compute_basic_blocks(code)
     types = Any[]
-    stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, flags)
-    ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable::Union{Vector{LineInfoNode},Vector{Any}}), sv.slottypes, meta, sv.sptypes)
+    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
+    linetable = ci.linetable
+    isa(linetable, Vector{LineInfoNode}) || (linetable = collect(LineInfoNode, linetable::Vector{Any}))
+    ir = IRCode(stmts, cfg, linetable, sv.slottypes, meta, sv.sptypes)
     return ir
 end
 
-function normalize(@nospecialize(stmt), meta::Vector{Any})
+function remove_meta!(@nospecialize(stmt), meta::Vector{Any})
     if isa(stmt, Expr)
-        if stmt.head === :meta
+        head = stmt.head
+        if head === :meta
             args = stmt.args
             if length(args) > 0
                 push!(meta, stmt)
@@ -404,11 +519,13 @@ function normalize(@nospecialize(stmt), meta::Vector{Any})
     return stmt
 end
 
-function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState)
+function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
     # need `ci` for the slot metadata, IR for the code
+    svdef = sv.linfo.def
+    nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
     @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
     defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
-    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.slottypes) # consumes `ir`
+    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, sv.slottypes) # consumes `ir`
     return ir
 end
 
@@ -420,6 +537,7 @@ function is_pure_intrinsic_infer(f::IntrinsicFunction)
              f === Intrinsics.arraylen ||   # this one is volatile
              f === Intrinsics.sqrt_llvm ||  # this one may differ at runtime (by a few ulps)
              f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
+             f === Intrinsics.have_fma ||  # this one depends on the runtime environment
              f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime
 end
 
@@ -435,20 +553,19 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
 isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
 
 function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
-                        slottypes::Vector{Any}, union_penalties::Bool,
-                        params::OptimizationParams, error_path::Bool = false)
+                        union_penalties::Bool, params::OptimizationParams, error_path::Bool = false)
     head = ex.head
     if is_meta_expr_head(head)
         return 0
     elseif head === :call
         farg = ex.args[1]
-        ftyp = argextype(farg, src, sptypes, slottypes)
+        ftyp = argextype(farg, src, sptypes)
         if ftyp === IntrinsicFunction && farg isa SSAValue
             # if this comes from code that was already inlined into another function,
             # Consts have been widened. try to recover in simple cases.
             farg = isa(src, CodeInfo) ? src.code[farg.id] : src.stmts[farg.id][:inst]
             if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
-                ftyp = argextype(farg, src, sptypes, slottypes)
+                ftyp = argextype(farg, src, sptypes)
             end
         end
         f = singleton_type(ftyp)
@@ -470,15 +587,15 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
                 # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
                 return 0
             elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
-                atyp = argextype(ex.args[3], src, sptypes, slottypes)
+                atyp = argextype(ex.args[3], src, sptypes)
                 return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes, slottypes)))
+            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
                 return 1
             elseif f === Core.isa
                 # If we're in a union context, we penalize type computations
                 # on union types. In such cases, it is usually better to perform
                 # union splitting on the outside.
-                if union_penalties && isa(argextype(ex.args[2],  src, sptypes, slottypes), Union)
+                if union_penalties && isa(argextype(ex.args[2],  src, sptypes), Union)
                     return params.inline_nonleaf_penalty
                 end
             end
@@ -490,18 +607,18 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
             end
             return T_FFUNC_COST[fidx]
         end
-        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes, slottypes)
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
         if extyp === Union{}
             return 0
         end
         return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-    elseif head === :foreigncall || head === :invoke
+    elseif head === :foreigncall || head === :invoke || head == :invoke_modify
         # Calls whose "return type" is Union{} do not actually return:
         # they are errors. Since these are not part of the typical
         # run-time of the function, we omit them from
         # consideration. This way, non-inlined error branches do not
         # prevent inlining.
-        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes, slottypes)
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
         return extyp === Union{} ? 0 : 20
     elseif head === :(=)
         if ex.args[1] isa GlobalRef
@@ -511,7 +628,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         end
         a = ex.args[2]
         if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, slottypes, union_penalties, params, error_path))
+            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, union_penalties, params, error_path))
         end
         return cost
     elseif head === :copyast
@@ -527,13 +644,12 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
 end
 
 function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
-                                  slottypes::Vector{Any}, union_penalties::Bool, params::OptimizationParams,
-                                  throw_blocks::Union{Nothing,BitSet})
+                                  union_penalties::Bool, params::OptimizationParams)
     thiscost = 0
     dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
     if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, slottypes, union_penalties, params,
-                                  throw_blocks !== nothing && line in throw_blocks)::Int
+        thiscost = statement_cost(stmt, line, src, sptypes, union_penalties, params,
+                                  is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
     elseif stmt isa GotoNode
         # loops are generally always expensive
         # but assume that forward jumps are already counted for from
@@ -548,10 +664,9 @@ end
 function inline_worthy(ir::IRCode,
                        params::OptimizationParams, union_penalties::Bool=false, cost_threshold::Integer=params.inline_cost_threshold)
     bodycost::Int = 0
-    throw_blocks = params.unoptimize_throw_blocks ? find_throw_blocks(ir.stmts.inst, RefValue(ir)) : nothing
     for line = 1:length(ir.stmts)
         stmt = ir.stmts[line][:inst]
-        thiscost = statement_or_branch_cost(stmt, line, ir, ir.sptypes, ir.argtypes, union_penalties, params, throw_blocks)
+        thiscost = statement_or_branch_cost(stmt, line, ir, ir.sptypes, union_penalties, params)
         bodycost = plus_saturate(bodycost, thiscost)
         bodycost > cost_threshold && return false
     end
@@ -559,13 +674,11 @@ function inline_worthy(ir::IRCode,
 end
 
 function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any}, unionpenalties::Bool, params::OptimizationParams)
-    throw_blocks = params.unoptimize_throw_blocks ? find_throw_blocks(body) : nothing
     maxcost = 0
     for line = 1:length(body)
         stmt = body[line]
         thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
-                                            src isa CodeInfo ? src.slottypes : src.argtypes,
-                                            unionpenalties, params, throw_blocks)
+                                            unionpenalties, params)
         cost[line] = thiscost
         if thiscost > maxcost
             maxcost = thiscost
@@ -574,14 +687,6 @@ function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeI
     return maxcost
 end
 
-function is_known_call(e::Expr, @nospecialize(func), src, sptypes::Vector{Any}, slottypes::Vector{Any} = empty_slottypes)
-    if e.head !== :call
-        return false
-    end
-    f = argextype(e.args[1], src, sptypes, slottypes)
-    return isa(f, Const) && f.val === func
-end
-
 function renumber_ir_elements!(body::Vector{Any}, changemap::Vector{Int})
     return renumber_ir_elements!(body, changemap, changemap)
 end
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index 9a6071766271e..e54a09fe351b3 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -14,7 +14,6 @@ include("compiler/ssair/basicblock.jl")
 include("compiler/ssair/domtree.jl")
 include("compiler/ssair/ir.jl")
 include("compiler/ssair/slot2ssa.jl")
-include("compiler/ssair/queries.jl")
 include("compiler/ssair/passes.jl")
 include("compiler/ssair/inlining.jl")
 include("compiler/ssair/verify.jl")
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 3fea82cff7a20..55445f5c8032b 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -2,22 +2,14 @@
 
 @nospecialize
 
-struct InvokeData
-    entry::Method
-    types0
-    min_valid::UInt
-    max_valid::UInt
-end
-
 struct Signature
     f::Any
     ft::Any
-    atypes::Vector{Any}
-    atype::Type
-    Signature(f, ft, atypes) = new(f, ft, atypes)
-    Signature(f, ft, atypes, atype) = new(f, ft, atypes, atype)
+    argtypes::Vector{Any}
+    atype #::Type
+    Signature(f, ft, argtypes, atype = nothing) = new(f, ft, argtypes, atype)
 end
-with_atype(sig::Signature) = Signature(sig.f, sig.ft, sig.atypes, argtypes_to_type(sig.atypes))
+with_atype(sig::Signature) = Signature(sig.f, sig.ft, sig.argtypes, argtypes_to_type(sig.argtypes))
 
 struct ResolvedInliningSpec
     # The LineTable and IR of the inlinee
@@ -28,14 +20,13 @@ struct ResolvedInliningSpec
 end
 
 """
-    Represents a callsite that our analysis has determined is legal to inline,
-    but did not resolve during the analysis step to allow the outer inlining
-    pass to apply its own inlining policy decisions.
+Represents a callsite that our analysis has determined is legal to inline,
+but did not resolve during the analysis step to allow the outer inlining
+pass to apply its own inlining policy decisions.
 """
 struct DelayedInliningSpec
     match::Union{MethodMatch, InferenceResult}
-    atypes::Vector{Any}
-    stmttype::Any
+    argtypes::Vector{Any}
 end
 
 struct InliningTodo
@@ -44,23 +35,37 @@ struct InliningTodo
     spec::Union{ResolvedInliningSpec, DelayedInliningSpec}
 end
 
-InliningTodo(mi::MethodInstance, match::MethodMatch,
-    atypes::Vector{Any}, @nospecialize(stmttype)) = InliningTodo(mi, DelayedInliningSpec(match, atypes, stmttype))
+InliningTodo(mi::MethodInstance, match::MethodMatch, argtypes::Vector{Any}) =
+    InliningTodo(mi, DelayedInliningSpec(match, argtypes))
 
-InliningTodo(result::InferenceResult, atypes::Vector{Any}, @nospecialize(stmttype)) =
-    InliningTodo(result.linfo, DelayedInliningSpec(result, atypes, stmttype))
+InliningTodo(result::InferenceResult, argtypes::Vector{Any}) =
+    InliningTodo(result.linfo, DelayedInliningSpec(result, argtypes))
 
 struct ConstantCase
     val::Any
     ConstantCase(val) = new(val)
 end
 
+struct SomeCase
+    val::Any
+    SomeCase(val) = new(val)
+end
+
+struct InliningCase
+    sig  # ::Type
+    item # Union{InliningTodo, MethodInstance, ConstantCase}
+    function InliningCase(@nospecialize(sig), @nospecialize(item))
+        @assert isa(item, Union{InliningTodo, MethodInstance, ConstantCase}) "invalid inlining item"
+        return new(sig, item)
+    end
+end
+
 struct UnionSplit
     fully_covered::Bool
     atype # ::Type
-    cases::Vector{Pair{Any, Any}}
+    cases::Vector{InliningCase}
     bbs::Vector{Int}
-    UnionSplit(fully_covered::Bool, atype, cases::Vector{Pair{Any, Any}}) =
+    UnionSplit(fully_covered::Bool, atype, cases::Vector{InliningCase}) =
         new(fully_covered, atype, cases, Int[])
 end
 
@@ -78,7 +83,6 @@ end
 
 mutable struct CFGInliningState
     new_cfg_blocks::Vector{BasicBlock}
-    inserted_block_ranges::Vector{UnitRange{Int}}
     todo_bbs::Vector{Tuple{Int, Int}}
     first_bb::Int
     bb_rename::Vector{Int}
@@ -91,7 +95,6 @@ end
 function CFGInliningState(ir::IRCode)
     CFGInliningState(
         BasicBlock[],
-        UnitRange{Int}[],
         Tuple{Int, Int}[],
         0,
         zeros(Int, length(ir.cfg.blocks)),
@@ -109,7 +112,7 @@ function inline_into_block!(state::CFGInliningState, block::Int)
         new_range = state.first_bb+1:block
         l = length(state.new_cfg_blocks)
         state.bb_rename[new_range] = (l+1:l+length(new_range))
-        append!(state.new_cfg_blocks, map(copy, state.cfg.blocks[new_range]))
+        append!(state.new_cfg_blocks, (copy(block) for block in state.cfg.blocks[new_range]))
         push!(state.merged_orig_blocks, last(new_range))
     end
     state.first_bb = block
@@ -137,14 +140,13 @@ function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, stat
         need_split = true #!(idx == last_block_idx)
     end
 
-    if !need_split
-        delete!(state.merged_orig_blocks, last(new_range))
-    end
+    need_split || delete!(state.merged_orig_blocks, last(new_range))
 
     push!(state.todo_bbs, (length(state.new_cfg_blocks) - 1 + (need_split_before ? 1 : 0), post_bb_id))
 
     from_unionsplit || delete!(state.split_targets, length(state.new_cfg_blocks))
-    orig_succs = copy(state.new_cfg_blocks[end].succs)
+    local orig_succs
+    need_split && (orig_succs = copy(state.new_cfg_blocks[end].succs))
     empty!(state.new_cfg_blocks[end].succs)
     if need_split_before
         l = length(state.new_cfg_blocks)
@@ -164,7 +166,6 @@ function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, stat
         from_unionsplit || push!(state.split_targets, length(state.new_cfg_blocks))
     end
     new_block_range = (length(state.new_cfg_blocks)-length(inlinee_cfg.blocks)+1):length(state.new_cfg_blocks)
-    push!(state.inserted_block_ranges, new_block_range)
 
     # Fixup the edges of the newely added blocks
     for (old_block, new_block) in enumerate(bb_rename_range)
@@ -204,53 +205,51 @@ function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, stat
             end
         end
     end
+    any_edges || push!(state.dead_blocks, post_bb_id)
 
-    if !any_edges
-        push!(state.dead_blocks, post_bb_id)
-    end
+    return nothing
 end
 
-function cfg_inline_unionsplit!(ir::IRCode, idx::Int, item::UnionSplit, state::CFGInliningState)
-    block = block_for_inst(ir, idx)
-    inline_into_block!(state, block)
+function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
+                                (; fully_covered, #=atype,=# cases, bbs)::UnionSplit,
+                                state::CFGInliningState)
+    inline_into_block!(state, block_for_inst(ir, idx))
     from_bbs = Int[]
     delete!(state.split_targets, length(state.new_cfg_blocks))
     orig_succs = copy(state.new_cfg_blocks[end].succs)
     empty!(state.new_cfg_blocks[end].succs)
-    for (i, (_, case)) in enumerate(item.cases)
+    for i in 1:length(cases)
         # The condition gets sunk into the previous block
         # Add a block for the union-split body
         push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
         cond_bb = length(state.new_cfg_blocks)-1
         push!(state.new_cfg_blocks[end].preds, cond_bb)
         push!(state.new_cfg_blocks[cond_bb].succs, cond_bb+1)
+        case = cases[i].item
         if isa(case, InliningTodo)
             spec = case.spec::ResolvedInliningSpec
             if !spec.linear_inline_eligible
                 cfg_inline_item!(ir, idx, spec, state, true)
             end
         end
-        bb = length(state.new_cfg_blocks)
-        push!(from_bbs, bb)
+        push!(from_bbs, length(state.new_cfg_blocks))
         # TODO: Right now we unconditionally generate a fallback block
         # in case of subtyping errors - This is probably unnecessary.
-        if true # i != length(item.cases) || !item.fully_covered
+        if true # i != length(cases) || !fully_covered
             # This block will have the next condition or the final else case
             push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
             push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks))
             push!(state.new_cfg_blocks[end].preds, cond_bb)
-            push!(item.bbs, length(state.new_cfg_blocks))
+            push!(bbs, length(state.new_cfg_blocks))
         end
     end
     # The edge from the fallback block.
-    if !item.fully_covered
-        push!(from_bbs, length(state.new_cfg_blocks))
-    end
+    fully_covered || push!(from_bbs, length(state.new_cfg_blocks))
     # This block will be the block everyone returns to
     push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx), from_bbs, orig_succs))
     join_bb = length(state.new_cfg_blocks)
     push!(state.split_targets, join_bb)
-    push!(item.bbs, join_bb)
+    push!(bbs, join_bb)
     for bb in from_bbs
         push!(state.new_cfg_blocks[bb].succs, join_bb)
     end
@@ -258,8 +257,10 @@ end
 
 function finish_cfg_inline!(state::CFGInliningState)
     new_range = (state.first_bb + 1):length(state.cfg.blocks)
-    l = length(state.new_cfg_blocks)
-    state.bb_rename[new_range] = (l+1:l+length(new_range))
+    state.bb_rename[new_range] = let
+        l = length(state.new_cfg_blocks)
+        l+1:l+length(new_range)
+    end
     append!(state.new_cfg_blocks, state.cfg.blocks[new_range])
 
     # Rename edges original bbs
@@ -304,28 +305,45 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                          boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
     spec = item.spec::ResolvedInliningSpec
+    sparam_vals = item.mi.sparam_vals
+    def = item.mi.def::Method
     inline_cfg = spec.ir.cfg
-    stmt = compact.result[idx][:inst]
     linetable_offset::Int32 = length(linetable)
     # Append the linetable of the inlined function to our line table
     inlined_at = Int(compact.result[idx][:line])
-    for entry in spec.ir.linetable
-        push!(linetable, LineInfoNode(entry.module, entry.method, entry.file, entry.line,
-            (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset : inlined_at)))
+    topline::Int32 = linetable_offset + Int32(1)
+    coverage = coverage_enabled(def.module)
+    push!(linetable, LineInfoNode(def.module, def.name, def.file, Int(def.line), inlined_at))
+    oldlinetable = spec.ir.linetable
+    for oldline in 1:length(oldlinetable)
+        entry = oldlinetable[oldline]
+        newentry = LineInfoNode(entry.module, entry.method, entry.file, entry.line,
+            (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
+        if oldline == 1
+            # check for a duplicate on the first iteration (likely true)
+            if newentry === linetable[topline]
+                continue
+            else
+                linetable_offset += 1
+            end
+        end
+        push!(linetable, newentry)
+    end
+    if coverage && spec.ir.stmts[1][:line] + linetable_offset != topline
+        insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, topline))
     end
-    (; def) = mi = item.mi
     nargs_def = def.nargs::Int32
     isva = nargs_def > 0 && def.isva
+    sig = def.sig
     if isva
-        vararg = mk_tuplecall!(compact, argexprs[nargs_def:end], compact.result[idx][:line])
+        vararg = mk_tuplecall!(compact, argexprs[nargs_def:end], topline)
         argexprs = Any[argexprs[1:(nargs_def - 1)]..., vararg]
     end
-    is_opaque = isa(def, Method) && def.is_for_opaque_closure
-    if is_opaque
+    if def.is_for_opaque_closure
         # Replace the first argument by a load of the capture environment
         argexprs[1] = insert_node_here!(compact,
             NewInstruction(Expr(:call, GlobalRef(Core, :getfield), argexprs[1], QuoteNode(:captures)),
-            spec.ir.argtypes[1], compact.result[idx][:line]))
+            spec.ir.argtypes[1], topline))
     end
     flag = compact.result[idx][:flag]
     boundscheck_idx = boundscheck
@@ -339,7 +357,6 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
     local return_value
     # Special case inlining that maintains the current basic block if there's only one BB in the target
     if spec.linear_inline_eligible
-        terminator = spec.ir[SSAValue(last(inline_cfg.blocks[1].stmts))]
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
@@ -347,15 +364,14 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, def.sig, mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck_idx, compact)
             if isa(stmt′, ReturnNode)
                 val = stmt′.val
                 isa(val, SSAValue) && (compact.used_ssas[val.id] += 1)
                 return_value = SSAValue(idx′)
                 inline_compact[idx′] = val
-                inline_compact.result[idx′][:type] = (isa(val, Argument) || isa(val, Expr)) ?
-                    compact_exprtype(compact, val) :
-                    compact_exprtype(inline_compact, val)
+                inline_compact.result[idx′][:type] =
+                    argextype(val, isa(val, Argument) || isa(val, Expr) ? compact : inline_compact)
                 break
             end
             inline_compact[idx′] = stmt′
@@ -374,7 +390,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, def.sig, mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck_idx, compact)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -383,9 +399,8 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                     push!(pn.edges, inline_compact.active_result_bb-1)
                     if isa(val, GlobalRef) || isa(val, Expr)
                         stmt′ = val
-                        inline_compact.result[idx′][:type] = (isa(val, Argument) || isa(val, Expr)) ?
-                            compact_exprtype(compact, val) :
-                            compact_exprtype(inline_compact, val)
+                        inline_compact.result[idx′][:type] =
+                            argextype(val, isa(val, Expr) ? compact : inline_compact)
                         insert_node_here!(inline_compact, NewInstruction(GotoNode(post_bb_id),
                                           Any, compact.result[idx′][:line]),
                                           true)
@@ -394,7 +409,6 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                         push!(pn.values, val)
                         stmt′ = GotoNode(post_bb_id)
                     end
-
                 end
             elseif isa(stmt′, GotoNode)
                 stmt′ = GotoNode(stmt′.label + bb_offset)
@@ -421,27 +435,29 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
             return_value = pn.values[1]
         else
             return_value = insert_node_here!(compact,
-                NewInstruction(pn, compact_exprtype(compact, SSAValue(idx)), compact.result[idx][:line]))
+                NewInstruction(pn, argextype(SSAValue(idx), compact), compact.result[idx][:line]))
         end
     end
     return_value
 end
 
-const fatal_type_bound_error = ErrorException("fatal error in type inference (type bound)")
+const FATAL_TYPE_BOUND_ERROR = ErrorException("fatal error in type inference (type bound)")
 
 function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
                                argexprs::Vector{Any}, linetable::Vector{LineInfoNode},
-                               item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
+                               (; fully_covered, atype, cases, bbs)::UnionSplit,
+                               boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     stmt, typ, line = compact.result[idx][:inst], compact.result[idx][:type], compact.result[idx][:line]
-    atype = item.atype
-    generic_bb = item.bbs[end-1]
-    join_bb = item.bbs[end]
-    bb = compact.active_result_bb
+    join_bb = bbs[end]
     pn = PhiNode()
-    has_generic = false
-    @assert length(item.bbs) > length(item.cases)
-    for ((metharg, case), next_cond_bb) in zip(item.cases, item.bbs)
-        @assert !isa(metharg, UnionAll)
+    local bb = compact.active_result_bb
+    @assert length(bbs) > length(cases)
+    for i in 1:length(cases)
+        ithcase = cases[i]
+        metharg = ithcase.sig
+        case = ithcase.item
+        next_cond_bb = bbs[i]
+        @assert isa(metharg, DataType)
         cond = true
         aparams, mparams = atype.parameters::SimpleVector, metharg.parameters::SimpleVector
         @assert length(aparams) == length(mparams)
@@ -496,8 +512,8 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     end
     bb += 1
     # We're now in the fall through block, decide what to do
-    if item.fully_covered
-        e = Expr(:call, GlobalRef(Core, :throw), fatal_type_bound_error)
+    if fully_covered
+        e = Expr(:call, GlobalRef(Core, :throw), FATAL_TYPE_BOUND_ERROR)
         insert_node_here!(compact, NewInstruction(e, Union{}, line))
         insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
         finish_current_bb!(compact, 0)
@@ -510,9 +526,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     end
 
     # We're now in the join block.
-    compact.ssa_rename[compact.idx-1] = insert_node_here!(compact,
-        NewInstruction(pn, typ, line))
-    nothing
+    return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
 
 function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool)
@@ -566,14 +580,14 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
                 for aidx in 1:length(argexprs)
                     aexpr = argexprs[aidx]
                     if isa(aexpr, Expr) || isa(aexpr, GlobalRef)
-                        ninst = effect_free(NewInstruction(aexpr, compact_exprtype(compact, aexpr), compact.result[idx][:line]))
+                        ninst = effect_free(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line]))
                         argexprs[aidx] = insert_node_here!(compact, ninst)
                     end
                 end
                 if isa(item, InliningTodo)
                     compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs)
                 elseif isa(item, UnionSplit)
-                    ir_inline_unionsplit!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs)
+                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs)
                 end
                 compact[idx] = nothing
                 refinish && finish_current_bb!(compact, 0)
@@ -599,31 +613,32 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
 end
 
 # This assumes the caller has verified that all arguments to the _apply_iterate call are Tuples.
-function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int,
-        argexprs::Vector{Any}, atypes::Vector{Any}, arginfos::Vector{Any},
-        arg_start::Int, istate::InliningState)
-
+function rewrite_apply_exprargs!(
+    ir::IRCode, idx::Int, stmt::Expr, argtypes::Vector{Any},
+    arginfos::Vector{MaybeAbstractIterationInfo}, arg_start::Int, istate::InliningState, todo::Vector{Pair{Int, Any}})
+    flag = ir.stmts[idx][:flag]
+    argexprs = stmt.args
     new_argexprs = Any[argexprs[arg_start]]
-    new_atypes = Any[atypes[arg_start]]
+    new_argtypes = Any[argtypes[arg_start]]
     # loop over original arguments and flatten any known iterators
     for i in (arg_start+1):length(argexprs)
         def = argexprs[i]
-        def_type = atypes[i]
+        def_type = argtypes[i]
         thisarginfo = arginfos[i-arg_start]
         if thisarginfo === nothing
             if def_type isa PartialStruct
                 # def_type.typ <: Tuple is assumed
-                def_atypes = def_type.fields
+                def_argtypes = def_type.fields
             else
-                def_atypes = Any[]
+                def_argtypes = Any[]
                 if isa(def_type, Const) # && isa(def_type.val, Union{Tuple, SimpleVector}) is implied
                     for p in def_type.val
-                        push!(def_atypes, Const(p))
+                        push!(def_argtypes, Const(p))
                     end
                 else
-                    ti = widenconst(def_type)
+                    ti = widenconst(def_type)::DataType # checked by `is_valid_type_for_apply_rewrite`
                     if ti.name === NamedTuple_typename
-                        ti = ti.parameters[2]
+                        ti = ti.parameters[2]::DataType # checked by `is_valid_type_for_apply_rewrite`
                     end
                     for p in ti.parameters
                         if isa(p, DataType) && isdefined(p, :instance)
@@ -632,13 +647,13 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
                         elseif isconstType(p)
                             p = Const(p.parameters[1])
                         end
-                        push!(def_atypes, p)
+                        push!(def_argtypes, p)
                     end
                 end
             end
-            # now push flattened types into new_atypes and getfield exprs into new_argexprs
-            for j in 1:length(def_atypes)
-                def_atype = def_atypes[j]
+            # now push flattened types into new_argtypes and getfield exprs into new_argexprs
+            for j in 1:length(def_argtypes)
+                def_atype = def_argtypes[j]
                 if isa(def_atype, Const) && is_inlineable_constant(def_atype.val)
                     new_argexpr = quoted(def_atype.val)
                 else
@@ -646,7 +661,7 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
                     new_argexpr = insert_node!(ir, idx, NewInstruction(new_call, def_atype))
                 end
                 push!(new_argexprs, new_argexpr)
-                push!(new_atypes, def_atype)
+                push!(new_argtypes, def_atype)
             end
         else
             state = Core.svec()
@@ -655,30 +670,28 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
                 new_stmt = Expr(:call, argexprs[2], def, state...)
                 state1 = insert_node!(ir, idx, NewInstruction(new_stmt, call.rt))
                 new_sig = with_atype(call_sig(ir, new_stmt)::Signature)
-                info = call.info
-                handled = false
-                if isa(info, ConstCallInfo)
-                    if maybe_handle_const_call!(ir, state1.id, new_stmt, info, new_sig,
-                        call.rt, istate, false, todo)
-                        handled = true
-                    else
-                        info = info.call
-                    end
+                new_info = call.info
+                if isa(new_info, ConstCallInfo)
+                    maybe_handle_const_call!(
+                        ir, state1.id, new_stmt, new_info, flag,
+                        new_sig, istate, todo) && @goto analyzed
+                    new_info = new_info.call # cascade to the non-constant handling
                 end
-                if !handled && (isa(info, MethodMatchInfo) || isa(info, UnionSplitInfo))
-                    info = isa(info, MethodMatchInfo) ?
-                        MethodMatchInfo[info] : info.matches
+                if isa(new_info, MethodMatchInfo) || isa(new_info, UnionSplitInfo)
+                    new_infos = isa(new_info, MethodMatchInfo) ? MethodMatchInfo[new_info] : new_info.matches
                     # See if we can inline this call to `iterate`
-                    analyze_single_call!(ir, todo, state1.id, new_stmt,
-                        new_sig, call.rt, info, istate)
+                    analyze_single_call!(
+                        ir, state1.id, new_stmt, new_infos, flag,
+                        new_sig, istate, todo)
                 end
+                @label analyzed
                 if i != length(thisarginfo.each)
                     valT = getfield_tfunc(call.rt, Const(1))
                     val_extracted = insert_node!(ir, idx, NewInstruction(
                         Expr(:call, GlobalRef(Core, :getfield), state1, 1),
                         valT))
                     push!(new_argexprs, val_extracted)
-                    push!(new_atypes, valT)
+                    push!(new_argtypes, valT)
                     state_extracted = insert_node!(ir, idx, NewInstruction(
                         Expr(:call, GlobalRef(Core, :getfield), state1, 2),
                         getfield_tfunc(call.rt, Const(2))))
@@ -687,56 +700,41 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
             end
         end
     end
-    return new_argexprs, new_atypes
-end
-
-function rewrite_invoke_exprargs!(argexprs::Vector{Any})
-    argexpr0 = argexprs[2]
-    argexprs = argexprs[4:end]
-    pushfirst!(argexprs, argexpr0)
-    return argexprs
-end
-
-function singleton_type(@nospecialize(ft))
-    if isa(ft, Const)
-        return ft.val
-    elseif ft isa DataType && isdefined(ft, :instance)
-        return ft.instance
-    end
-    return nothing
+    stmt.args = new_argexprs
+    return new_argtypes
 end
 
 function compileable_specialization(et::Union{EdgeTracker, Nothing}, match::MethodMatch)
-    mi = specialize_method(match, false, true)
+    mi = specialize_method(match; compilesig=true)
     mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
     return mi
 end
 
-function compileable_specialization(et::Union{EdgeTracker, Nothing}, result::InferenceResult)
-    mi = specialize_method(result.linfo.def::Method, result.linfo.specTypes,
-        result.linfo.sparam_vals, false, true)
+function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo)::InferenceResult)
+    mi = specialize_method(linfo.def::Method, linfo.specTypes, linfo.sparam_vals; compilesig=true)
     mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
     return mi
 end
 
-function resolve_todo(todo::InliningTodo, state::InliningState)
-    spec = todo.spec::DelayedInliningSpec
+function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
+    mi = todo.mi
+    (; match, argtypes) = todo.spec::DelayedInliningSpec
+    et = state.et
 
     #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
     isconst, src = false, nothing
-    if isa(spec.match, InferenceResult)
-        let inferred_src = spec.match.src
-            if isa(inferred_src, Const)
-                if !is_inlineable_constant(inferred_src.val)
-                    return compileable_specialization(state.et, spec.match)
-                end
-                isconst, src = true, quoted(inferred_src.val)
-            else
-                isconst, src = false, inferred_src
+    if isa(match, InferenceResult)
+        inferred_src = match.src
+        if isa(inferred_src, Const)
+            if !is_inlineable_constant(inferred_src.val)
+                return compileable_specialization(et, match)
             end
+            isconst, src = true, quoted(inferred_src.val)
+        else
+            isconst, src = false, inferred_src
         end
     else
-        linfo = get(state.mi_cache, todo.mi, nothing)
+        linfo = get(state.mi_cache, mi, nothing)
         if linfo isa CodeInstance
             if invoke_api(linfo) == 2
                 # in this case function can be inlined to a constant
@@ -749,49 +747,57 @@ function resolve_todo(todo::InliningTodo, state::InliningState)
         end
     end
 
-    et = state.et
-
     if isconst && et !== nothing
-        push!(et, todo.mi)
+        push!(et, mi)
         return ConstantCase(src)
     end
 
-    if src !== nothing
-        src = state.policy(src)
+    # the duplicated check might have been done already within `analyze_method!`, but still
+    # we need it here too since we may come here directly using a constant-prop' result
+    if !state.params.inlining || is_stmt_noinline(flag)
+        return compileable_specialization(et, match)
     end
 
+    src = inlining_policy(state.interp, src, flag, mi, argtypes)
+
     if src === nothing
-        return compileable_specialization(et, spec.match)
+        return compileable_specialization(et, match)
     end
 
     if isa(src, IRCode)
         src = copy(src)
     end
 
-    et !== nothing && push!(et, todo.mi)
-    return InliningTodo(todo.mi, src)
+    et !== nothing && push!(et, mi)
+    return InliningTodo(mi, src)
 end
 
-function resolve_todo(todo::UnionSplit, state::InliningState)
-    UnionSplit(todo.fully_covered, todo.atype,
-        Pair{Any,Any}[sig=>resolve_todo(item, state) for (sig, item) in todo.cases])
+function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8)
+    ncases = length(cases)
+    newcases = Vector{InliningCase}(undef, ncases)
+    for i in 1:ncases
+        (; sig, item) = cases[i]
+        newitem = resolve_todo(item, state, flag)
+        push!(newcases, InliningCase(sig, newitem))
+    end
+    return UnionSplit(fully_covered, atype, newcases)
 end
 
 function validate_sparams(sparams::SimpleVector)
     for i = 1:length(sparams)
-        (isa(sparams[i], TypeVar) || isa(sparams[i], Core.TypeofVararg)) && return false
+        (isa(sparams[i], TypeVar) || isvarargtype(sparams[i])) && return false
     end
     return true
 end
 
-function analyze_method!(match::MethodMatch, atypes::Vector{Any},
-                         state::InliningState, @nospecialize(stmttyp))
+function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
+                         flag::UInt8, state::InliningState)
     method = match.method
     methsig = method.sig
 
     # Check that we habe the correct number of arguments
     na = Int(method.nargs)
-    npassedargs = length(atypes)
+    npassedargs = length(argtypes)
     if na != npassedargs && !(na > 0 && method.isva)
         # we have a method match only because an earlier
         # inference step shortened our call args list, even
@@ -805,21 +811,19 @@ function analyze_method!(match::MethodMatch, atypes::Vector{Any},
 
     et = state.et
 
-    if !state.params.inlining
+    if !state.params.inlining || is_stmt_noinline(flag)
         return compileable_specialization(et, match)
     end
 
     # See if there exists a specialization for this method signature
-    mi = specialize_method(match, true) # Union{Nothing, MethodInstance}
-    if !isa(mi, MethodInstance)
-        return compileable_specialization(et, match)
-    end
+    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
+    isa(mi, MethodInstance) || return compileable_specialization(et, match)
 
-    todo = InliningTodo(mi, match, atypes, stmttyp)
+    todo = InliningTodo(mi, match, argtypes)
     # If we don't have caches here, delay resolving this MethodInstance
     # until the batch inlining step (or an external post-processing pass)
     state.mi_cache === nothing && return todo
-    return resolve_todo(todo, state)
+    return resolve_todo(todo, state, flag)
 end
 
 function InliningTodo(mi::MethodInstance, ir::IRCode)
@@ -836,71 +840,26 @@ function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Array{UInt8, 1}})
     end
 end
 
-# Neither the product iterator not CartesianIndices are available
-# here, so use this poor man's version
-struct SimpleCartesian
-    ranges::Vector{UnitRange{Int}}
-end
-function iterate(s::SimpleCartesian, state::Vector{Int}=Int[1 for _ in 1:length(s.ranges)])
-    state[end] > last(s.ranges[end]) && return nothing
-    vals = copy(state)
-    any = false
-    for i = 1:length(s.ranges)
-        if state[i] < last(s.ranges[i])
-            for j = 1:(i-1)
-                state[j] = first(s.ranges[j])
-            end
-            state[i] += 1
-            any = true
-            break
-        end
-    end
-    if !any
-        state[end] += 1
-    end
-    (vals, state)
-end
-
-# Given a signure, iterate over the signatures to union split over
-struct UnionSplitSignature
-    it::SimpleCartesian
-    typs::Vector{Any}
-end
-
-function UnionSplitSignature(atypes::Vector{Any})
-    typs = Any[uniontypes(widenconst(atypes[i])) for i = 1:length(atypes)]
-    ranges = UnitRange{Int}[1:length(typs[i]) for i = 1:length(typs)]
-    return UnionSplitSignature(SimpleCartesian(ranges), typs)
-end
-
-function iterate(split::UnionSplitSignature, state::Vector{Int}...)
-    y = iterate(split.it, state...)
-    y === nothing && return nothing
-    idxs, state = y
-    sig = Any[split.typs[i][j] for (i, j) in enumerate(idxs)]
-    return (sig, state)
-end
-
-function handle_single_case!(ir::IRCode, stmt::Expr, idx::Int, @nospecialize(case), isinvoke::Bool, todo::Vector{Pair{Int, Any}})
+function handle_single_case!(
+    ir::IRCode, idx::Int, stmt::Expr,
+    @nospecialize(case), todo::Vector{Pair{Int, Any}}, isinvoke::Bool = false)
     if isa(case, ConstantCase)
         ir[SSAValue(idx)] = case.val
     elseif isa(case, MethodInstance)
-        if isinvoke
-            stmt.args = rewrite_invoke_exprargs!(stmt.args)
-        end
+        isinvoke && rewrite_invoke_exprargs!(stmt)
         stmt.head = :invoke
         pushfirst!(stmt.args, case)
     elseif case === nothing
         # Do, well, nothing
     else
-        if isinvoke
-            stmt.args = rewrite_invoke_exprargs!(stmt.args)
-        end
+        isinvoke && rewrite_invoke_exprargs!(stmt)
         push!(todo, idx=>(case::InliningTodo))
     end
     nothing
 end
 
+rewrite_invoke_exprargs!(expr::Expr) = (expr.args = invoke_rewrite(expr.args); expr)
+
 function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::OptimizationParams)
     if isa(typ, Const) && isa(typ.val, SimpleVector)
         length(typ.val) > params.MAX_TUPLE_SPLAT && return false
@@ -912,9 +871,7 @@ function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::Optimizatio
     typ = widenconst(typ)
     if isa(typ, DataType) && typ.name === NamedTuple_typename
         typ = typ.parameters[2]
-        while isa(typ, TypeVar)
-            typ = typ.ub
-        end
+        typ = unwraptv(typ)
     end
     isa(typ, DataType) || return false
     if typ.name === Tuple.name
@@ -924,14 +881,12 @@ function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::Optimizatio
     end
 end
 
-function inline_splatnew!(ir::IRCode, idx::Int)
-    stmt = ir.stmts[idx][:inst]::Expr
-    ty = ir.stmts[idx][:type]
-    nf = nfields_tfunc(ty)
+function inline_splatnew!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(rt))
+    nf = nfields_tfunc(rt)
     if nf isa Const
         eargs = stmt.args
         tup = eargs[2]
-        tt = argextype(tup, ir, ir.sptypes)
+        tt = argextype(tup, ir)
         tnf = nfields_tfunc(tt)
         # TODO: hoisting this tnf.val === nf.val check into codegen
         # would enable us to almost always do this transform
@@ -953,25 +908,24 @@ end
 
 function call_sig(ir::IRCode, stmt::Expr)
     isempty(stmt.args) && return nothing
-    ft = argextype(stmt.args[1], ir, ir.sptypes)
+    ft = argextype(stmt.args[1], ir)
     has_free_typevars(ft) && return nothing
     f = singleton_type(ft)
     f === Core.Intrinsics.llvmcall && return nothing
     f === Core.Intrinsics.cglobal && return nothing
-    atypes = Vector{Any}(undef, length(stmt.args))
-    atypes[1] = ft
+    argtypes = Vector{Any}(undef, length(stmt.args))
+    argtypes[1] = ft
     for i = 2:length(stmt.args)
-        a = argextype(stmt.args[i], ir, ir.sptypes)
+        a = argextype(stmt.args[i], ir)
         (a === Bottom || isvarargtype(a)) && return nothing
-        atypes[i] = a
+        argtypes[i] = a
     end
-
-    Signature(f, ft, atypes)
+    return Signature(f, ft, argtypes)
 end
 
-function inline_apply!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sig::Signature,
-                       state::InliningState)
-    stmt = ir.stmts[idx][:inst]
+function inline_apply!(
+    ir::IRCode, idx::Int, stmt::Expr, sig::Signature,
+    state::InliningState, todo::Vector{Pair{Int, Any}})
     while sig.f === Core._apply_iterate
         info = ir.stmts[idx][:info]
         if isa(info, UnionSplitApplyCallInfo)
@@ -987,17 +941,17 @@ function inline_apply!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sig::
             new_info = info = false
         end
         arg_start = 3
-        atypes = sig.atypes
-        if arg_start > length(atypes)
+        argtypes = sig.argtypes
+        if arg_start > length(argtypes)
             return nothing
         end
-        ft = atypes[arg_start]
+        ft = argtypes[arg_start]
         if ft isa Const && ft.val === Core.tuple
             # if one argument is a tuple already, and the rest are empty, we can just return it
             # e.g. rewrite `((t::Tuple)...,)` to `t`
             nonempty_idx = 0
-            for i = (arg_start + 1):length(atypes)
-                ti = atypes[i]
+            for i = (arg_start + 1):length(argtypes)
+                ti = argtypes[i]
                 ti ⊑ Tuple{} && continue
                 if ti ⊑ Tuple && nonempty_idx == 0
                     nonempty_idx = i
@@ -1013,25 +967,27 @@ function inline_apply!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sig::
         end
         # Try to figure out the signature of the function being called
         # and if rewrite_apply_exprargs can deal with this form
-        infos = Any[]
-        for i = (arg_start + 1):length(atypes)
+        arginfos = MaybeAbstractIterationInfo[]
+        for i = (arg_start + 1):length(argtypes)
             thisarginfo = nothing
-            if !is_valid_type_for_apply_rewrite(atypes[i], state.params)
+            if !is_valid_type_for_apply_rewrite(argtypes[i], state.params)
                 if isa(info, ApplyCallInfo) && info.arginfo[i-arg_start] !== nothing
                     thisarginfo = info.arginfo[i-arg_start]
                 else
                     return nothing
                 end
             end
-            push!(infos, thisarginfo)
+            push!(arginfos, thisarginfo)
         end
         # Independent of whether we can inline, the above analysis allows us to rewrite
         # this apply call to a regular call
-        stmt.args, atypes = rewrite_apply_exprargs!(ir, todo, idx, stmt.args, atypes, infos, arg_start, state)
+        argtypes = rewrite_apply_exprargs!(
+            ir, idx, stmt, argtypes,
+            arginfos, arg_start, state, todo)
         ir.stmts[idx][:info] = new_info
         has_free_typevars(ft) && return nothing
         f = singleton_type(ft)
-        sig = Signature(f, ft, atypes)
+        sig = Signature(f, ft, argtypes)
     end
     sig
 end
@@ -1043,51 +999,39 @@ is_builtin(s::Signature) =
     isa(s.f, Builtin) ||
     s.ft ⊑ Builtin
 
-function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, (; match, result)::InvokeCallInfo,
-        state::InliningState, todo::Vector{Pair{Int, Any}})
-    stmt = ir.stmts[idx][:inst]
-    calltype = ir.stmts[idx][:type]
-
+function inline_invoke!(
+    ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    match = info.match
     if !match.fully_covers
         # TODO: We could union split out the signature check and continue on
         return nothing
     end
-
-    atypes = sig.atypes
-    atype0 = atypes[2]
-    atypes = atypes[4:end]
-    pushfirst!(atypes, atype0)
-
+    argtypes = invoke_rewrite(sig.argtypes)
+    result = info.result
     if isa(result, InferenceResult)
-        item = InliningTodo(result, atypes, calltype)
-        validate_sparams(item.mi.sparam_vals) || return nothing
-        if argtypes_to_type(atypes) <: item.mi.def.sig
-            state.mi_cache !== nothing && (item = resolve_todo(item, state))
-            handle_single_case!(ir, stmt, idx, item, true, todo)
+        (; mi) = item = InliningTodo(result, argtypes)
+        validate_sparams(mi.sparam_vals) || return nothing
+        if argtypes_to_type(argtypes) <: mi.def.sig
+            state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+            handle_single_case!(ir, idx, stmt, item, todo, true)
             return nothing
         end
     end
-
-    result = analyze_method!(match, atypes, state, calltype)
-    handle_single_case!(ir, stmt, idx, result, true, todo)
+    item = analyze_method!(match, argtypes, flag, state)
+    handle_single_case!(ir, idx, stmt, item, todo, true)
     return nothing
 end
 
 function narrow_opaque_closure!(ir::IRCode, stmt::Expr, @nospecialize(info), state::InliningState)
     if isa(info, OpaqueClosureCreateInfo)
-        unspec_call_info = info.unspec.info
-        if isa(unspec_call_info, ConstCallInfo)
-            unspec_call_info = unspec_call_info.call
-        end
-        isa(unspec_call_info, OpaqueClosureCallInfo) || return
-        lbt = argextype(stmt.args[3], ir, ir.sptypes)
+        lbt = argextype(stmt.args[3], ir)
         lb, exact = instanceof_tfunc(lbt)
         exact || return
-        ubt = argextype(stmt.args[4], ir, ir.sptypes)
+        ubt = argextype(stmt.args[4], ir)
         ub, exact = instanceof_tfunc(ubt)
         exact || return
         # Narrow opaque closure type
-
         newT = widenconst(tmeet(tmerge(lb, info.unspec.rt), ub))
         if newT != ub
             # N.B.: Narrowing the ub requires a backdge on the mi whose type
@@ -1101,8 +1045,8 @@ end
 # As a matter of convenience, this pass also computes effect-freenes.
 # For primitives, we do that right here. For proper calls, we will
 # discover this when we consult the caches.
-function check_effect_free!(ir::IRCode, @nospecialize(stmt), @nospecialize(calltype), idx::Int)
-    if stmt_effect_free(stmt, calltype, ir, ir.sptypes)
+function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt))
+    if stmt_effect_free(stmt, rt, ir)
         ir.stmts[idx][:flag] |= IR_FLAG_EFFECT_FREE
     end
 end
@@ -1110,20 +1054,20 @@ end
 # Handles all analysis and inlining of intrinsics and builtins. In particular,
 # this method does not access the method table or otherwise process generic
 # functions.
-function process_simple!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, state::InliningState)
+function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vector{Pair{Int, Any}})
     stmt = ir.stmts[idx][:inst]
-    calltype = ir.stmts[idx][:type]
+    rt = ir.stmts[idx][:type]
     if !(stmt isa Expr)
-        check_effect_free!(ir, stmt, calltype, idx)
+        check_effect_free!(ir, idx, stmt, rt)
         return nothing
     end
     if stmt.head !== :call
         if stmt.head === :splatnew
-            inline_splatnew!(ir, idx)
+            inline_splatnew!(ir, idx, stmt, rt)
         elseif stmt.head === :new_opaque_closure
             narrow_opaque_closure!(ir, stmt, ir.stmts[idx][:info], state)
         end
-        check_effect_free!(ir, stmt, calltype, idx)
+        check_effect_free!(ir, idx, stmt, rt)
         return nothing
     end
 
@@ -1133,17 +1077,33 @@ function process_simple!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sta
     sig === nothing && return nothing
 
     # Handle _apply_iterate
-    sig = inline_apply!(ir, todo, idx, sig, state)
+    sig = inline_apply!(ir, idx, stmt, sig, state, todo)
     sig === nothing && return nothing
 
     # Check if we match any of the early inliners
-    res = early_inline_special_case(ir, sig, stmt, state.params, calltype)
-    if res !== nothing
-        ir.stmts[idx][:inst] = res
+    earlyres = early_inline_special_case(ir, stmt, rt, sig, state.params)
+    if isa(earlyres, SomeCase)
+        ir.stmts[idx][:inst] = earlyres.val
+        return nothing
+    end
+    if (sig.f === modifyfield! || sig.ft ⊑ typeof(modifyfield!)) && 5 <= length(stmt.args) <= 6
+        let info = ir.stmts[idx][:info]
+            info isa MethodResultPure && (info = info.info)
+            info isa ConstCallInfo && (info = info.call)
+            info isa MethodMatchInfo || return nothing
+            length(info.results) == 1 || return nothing
+            match = info.results[1]::MethodMatch
+            match.fully_covers || return nothing
+            case = compileable_specialization(state.et, match)
+            case === nothing && return nothing
+            stmt.head = :invoke_modify
+            pushfirst!(stmt.args, case)
+            ir.stmts[idx][:inst] = stmt
+        end
         return nothing
     end
 
-    check_effect_free!(ir, stmt, calltype, idx)
+    check_effect_free!(ir, idx, stmt, rt)
 
     if sig.f !== Core.invoke && is_builtin(sig)
         # No inlining for builtins (other invoke/apply)
@@ -1153,31 +1113,36 @@ function process_simple!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sta
     sig = with_atype(sig)
 
     # Special case inliners for regular functions
-    if late_inline_special_case!(ir, sig, idx, stmt, state.params) || is_return_type(sig.f)
-        check_effect_free!(ir, ir.stmts[idx][:inst], calltype, idx)
+    lateres = late_inline_special_case!(ir, idx, stmt, rt, sig, state.params)
+    if isa(lateres, SomeCase)
+        ir[SSAValue(idx)] = lateres.val
+        check_effect_free!(ir, idx, lateres.val, rt)
+        return nothing
+    elseif is_return_type(sig.f)
+        check_effect_free!(ir, idx, stmt, rt)
         return nothing
     end
 
-    return sig
+    return stmt, sig
 end
 
-function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, @nospecialize(stmt),
-        sig::Signature, @nospecialize(calltype), infos::Vector{MethodMatchInfo},
-        state::InliningState)
-    cases = Pair{Any, Any}[]
-    signature_union = Union{}
-    only_method = nothing  # keep track of whether there is one matching method
-    too_many = false
+# TODO inline non-`isdispatchtuple`, union-split callsites
+function analyze_single_call!(
+    ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    (; argtypes, atype) = sig
+    cases = InliningCase[]
+    local signature_union = Bottom
+    local only_method = nothing  # keep track of whether there is one matching method
     local meth
     local fully_covered = true
     for i in 1:length(infos)
         info = infos[i]
         meth = info.results
-        if meth === missing || meth.ambig
+        if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
-            too_many = true
-            break
+            return
         elseif length(meth) == 0
             # No applicable methods; try next union split
             continue
@@ -1191,86 +1156,120 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int
             only_method = false
         end
         for match in meth
-            signature_union = Union{signature_union, match.spec_types}
-            if !isdispatchtuple(match.spec_types)
+            spec_types = match.spec_types
+            signature_union = Union{signature_union, spec_types}
+            if !isdispatchtuple(spec_types)
                 fully_covered = false
                 continue
             end
-            case = analyze_method!(match, sig.atypes, state, calltype)
-            if case === nothing
+            item = analyze_method!(match, argtypes, flag, state)
+            if item === nothing
                 fully_covered = false
                 continue
-            elseif _any(p->p[1] === match.spec_types, cases)
+            elseif _any(case->case.sig === spec_types, cases)
                 continue
             end
-            push!(cases, Pair{Any,Any}(match.spec_types, case))
+            push!(cases, InliningCase(spec_types, item))
         end
     end
 
-    too_many && return
-
-    signature_fully_covered = sig.atype <: signature_union
-    # If we're fully covered and there's only one applicable method,
-    # we inline, even if the signature is not a dispatch tuple
-    if signature_fully_covered && length(cases) == 0 && only_method isa Method
+    # if the signature is fully or mostly covered and there is only one applicable method,
+    # we can try to inline it even if the signature is not a dispatch tuple
+    if length(cases) == 0 && only_method isa Method
         if length(infos) > 1
             (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any),
-                sig.atype, only_method.sig)::SimpleVector
-            match = MethodMatch(metharg, methsp, only_method, true)
+                atype, only_method.sig)::SimpleVector
+            match = MethodMatch(metharg, methsp::SimpleVector, only_method, true)
         else
             meth = meth::MethodLookupResult
             @assert length(meth) == 1
             match = meth[1]
         end
-        fully_covered = true
-        case = analyze_method!(match, sig.atypes, state, calltype)
-        case === nothing && return
-        push!(cases, Pair{Any,Any}(match.spec_types, case))
-    end
-    if !signature_fully_covered
-        fully_covered = false
+        item = analyze_method!(match, argtypes, flag, state)
+        item === nothing && return
+        push!(cases, InliningCase(match.spec_types, item))
+        fully_covered = match.fully_covers
+    else
+        fully_covered &= atype <: signature_union
     end
 
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
     if fully_covered && length(cases) == 1
-        handle_single_case!(ir, stmt, idx, cases[1][2], false, todo)
-        return
+        handle_single_case!(ir, idx, stmt, cases[1].item, todo)
+    elseif length(cases) > 0
+        push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
     end
-    length(cases) == 0 && return
-    push!(todo, idx=>UnionSplit(fully_covered, sig.atype, cases))
     return nothing
 end
 
-function maybe_handle_const_call!(ir::IRCode, idx::Int, stmt::Expr,
-        info::ConstCallInfo, sig::Signature, @nospecialize(calltype),
-        state::InliningState,
-        isinvoke::Bool, todo::Vector{Pair{Int, Any}})
-    # when multiple matches are found, bail out and later inliner will union-split this signature
-    # TODO effectively use multiple constant analysis results here
-    length(info.results) == 1 || return false
-    result = info.results[1]
-    isa(result, InferenceResult) || return false
-
-    (; mi) = item = InliningTodo(result, sig.atypes, calltype)
-    validate_sparams(mi.sparam_vals) || return true
-    mthd_sig = mi.def.sig
-    mistypes = mi.specTypes
-    state.mi_cache !== nothing && (item = resolve_todo(item, state))
-    if sig.atype <: mthd_sig
-        handle_single_case!(ir, stmt, idx, item, isinvoke, todo)
-        return true
-    else
-        item === nothing && return true
-        # Union split out the error case
-        item = UnionSplit(false, sig.atype, Pair{Any, Any}[mistypes => item])
-        if isinvoke
-            stmt.args = rewrite_invoke_exprargs!(stmt.args)
+# try to create `InliningCase`s using constant-prop'ed results
+# currently it works only when constant-prop' succeeded for all (union-split) signatures
+# TODO use any of constant-prop'ed results, and leave the other unhandled cases to later
+# TODO this function contains a lot of duplications with `analyze_single_call!`, factor them out
+function maybe_handle_const_call!(
+    ir::IRCode, idx::Int, stmt::Expr, info::ConstCallInfo, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    (; argtypes, atype) = sig
+    results = info.results
+    cases = InliningCase[] # TODO avoid this allocation for single cases ?
+    local fully_covered = true
+    local signature_union = Bottom
+    for result in results
+        isa(result, InferenceResult) || return false
+        (; mi) = item = InliningTodo(result, argtypes)
+        spec_types = mi.specTypes
+        signature_union = Union{signature_union, spec_types}
+        if !isdispatchtuple(spec_types)
+            fully_covered = false
+            continue
         end
-        push!(todo, idx=>item)
-        return true
+        if !validate_sparams(mi.sparam_vals)
+            fully_covered = false
+            continue
+        end
+        state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+        if item === nothing
+            fully_covered = false
+            continue
+        end
+        push!(cases, InliningCase(spec_types, item))
     end
+
+    # if the signature is fully covered and there is only one applicable method,
+    # we can try to inline it even if the signature is not a dispatch tuple
+    if length(cases) == 0 && length(results) == 1
+        (; mi) = item = InliningTodo(results[1]::InferenceResult, argtypes)
+        state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+        validate_sparams(mi.sparam_vals) || return true
+        item === nothing && return true
+        push!(cases, InliningCase(mi.specTypes, item))
+        fully_covered = atype <: mi.specTypes
+    else
+        fully_covered &= atype <: signature_union
+    end
+
+    # If we only have one case and that case is fully covered, we may either
+    # be able to do the inlining now (for constant cases), or push it directly
+    # onto the todo list
+    if fully_covered && length(cases) == 1
+        handle_single_case!(ir, idx, stmt, cases[1].item, todo)
+    elseif length(cases) > 0
+        push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
+    end
+    return true
+end
+
+function handle_const_opaque_closure_call!(
+    ir::IRCode, idx::Int, stmt::Expr, result::InferenceResult, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    item = InliningTodo(result, sig.argtypes)
+    isdispatchtuple(item.mi.specTypes) || return
+    validate_sparams(item.mi.sparam_vals) || return
+    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+    handle_single_case!(ir, idx, stmt, item, todo)
+    return nothing
 end
 
 function assemble_inline_todo!(ir::IRCode, state::InliningState)
@@ -1278,53 +1277,59 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
     todo = Pair{Int, Any}[]
     et = state.et
     for idx in 1:length(ir.stmts)
-        sig = process_simple!(ir, todo, idx, state)
-        sig === nothing && continue
+        simpleres = process_simple!(ir, idx, state, todo)
+        simpleres === nothing && continue
+        stmt, sig = simpleres
 
-        stmt = ir.stmts[idx][:inst]
-        calltype = ir.stmts[idx][:type]
         info = ir.stmts[idx][:info]
 
         # Check whether this call was @pure and evaluates to a constant
         if info isa MethodResultPure
-            if calltype isa Const && is_inlineable_constant(calltype.val)
-                ir.stmts[idx][:inst] = quoted(calltype.val)
+            rt = ir.stmts[idx][:type]
+            if rt isa Const && is_inlineable_constant(rt.val)
+                ir.stmts[idx][:inst] = quoted(rt.val)
                 continue
             end
             ir.stmts[idx][:flag] |= IR_FLAG_EFFECT_FREE
             info = info.info
         end
-
-        # Inference determined this couldn't be analyzed. Don't question it.
         if info === false
+            # Inference determined this couldn't be analyzed. Don't question it.
             continue
         end
 
-        # If inference arrived at this result by using constant propagation,
-        # it'll have performed a specialized analysis for just this case. Use its
-        # result.
-        if isa(info, ConstCallInfo)
-            if maybe_handle_const_call!(ir, idx, stmt, info, sig, calltype, state, sig.f === Core.invoke, todo)
-                continue
-            else
-                info = info.call
-            end
-        end
+        flag = ir.stmts[idx][:flag]
 
         if isa(info, OpaqueClosureCallInfo)
-            result = analyze_method!(info.match, sig.atypes, state, calltype)
-            handle_single_case!(ir, stmt, idx, result, false, todo)
+            result = info.result
+            if isa(result, InferenceResult)
+                handle_const_opaque_closure_call!(
+                    ir, idx, stmt, result, flag,
+                    sig, state, todo)
+            else
+                item = analyze_method!(info.match, sig.argtypes, flag, state)
+                handle_single_case!(ir, idx, stmt, item, todo)
+            end
             continue
         end
 
         # Handle invoke
         if sig.f === Core.invoke
             if isa(info, InvokeCallInfo)
-                inline_invoke!(ir, idx, sig, info, state, todo)
+                inline_invoke!(ir, idx, stmt, info, flag, sig, state, todo)
             end
             continue
         end
 
+        # if inference arrived here with constant-prop'ed result(s),
+        # we can perform a specialized analysis for just this case
+        if isa(info, ConstCallInfo)
+            maybe_handle_const_call!(
+                ir, idx, stmt, info, flag,
+                sig, state, todo) && continue
+            info = info.call # cascade to the non-constant handling
+        end
+
         # Ok, now figure out what method to call
         if isa(info, MethodMatchInfo)
             infos = MethodMatchInfo[info]
@@ -1334,14 +1339,14 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
             continue
         end
 
-        analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state)
+        analyze_single_call!(ir, idx, stmt, infos, flag, sig, state, todo)
     end
     todo
 end
 
 function mk_tuplecall!(compact::IncrementalCompact, args::Vector{Any}, line_idx::Int32)
     e = Expr(:call, TOP_TUPLE, args...)
-    etyp = tuple_tfunc(Any[compact_exprtype(compact, args[i]) for i in 1:length(args)])
+    etyp = tuple_tfunc(Any[argextype(args[i], compact) for i in 1:length(args)])
     return insert_node_here!(compact, NewInstruction(e, etyp, line_idx))
 end
 
@@ -1361,33 +1366,33 @@ function ispuretopfunction(@nospecialize(f))
         istopfunction(f, :promote_type)
 end
 
-function early_inline_special_case(ir::IRCode, s::Signature, e::Expr, params::OptimizationParams,
-                                   @nospecialize(etype))
-    f, ft, atypes = s.f, s.ft, s.atypes
-    if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes) == 3
+function early_inline_special_case(
+    ir::IRCode, stmt::Expr, @nospecialize(type), sig::Signature,
+    params::OptimizationParams)
+    (; f, ft, argtypes) = sig
+    if (f === typeassert || ft ⊑ typeof(typeassert)) && length(argtypes) == 3
         # typeassert(x::S, T) => x, when S<:T
-        a3 = atypes[3]
-        if (isType(a3) && !has_free_typevars(a3) && atypes[2] ⊑ a3.parameters[1]) ||
-            (isa(a3, Const) && isa(a3.val, Type) && atypes[2] ⊑ a3.val)
-            val = e.args[2]
-            val === nothing && return QuoteNode(val)
-            return val
+        a3 = argtypes[3]
+        if (isType(a3) && !has_free_typevars(a3) && argtypes[2] ⊑ a3.parameters[1]) ||
+            (isa(a3, Const) && isa(a3.val, Type) && argtypes[2] ⊑ a3.val)
+            val = stmt.args[2]
+            return SomeCase(val === nothing ? QuoteNode(val) : val)
         end
     end
 
     if params.inlining
-        if isa(etype, Const) # || isconstType(etype)
-            val = etype.val
+        if isa(type, Const) # || isconstType(type)
+            val = type.val
             is_inlineable_constant(val) || return nothing
             if isa(f, IntrinsicFunction)
-                if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, atypes[2:end])
-                    return quoted(val)
+                if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, argtypes[2:end])
+                    return SomeCase(quoted(val))
                 end
             elseif ispuretopfunction(f) || contains_is(_PURE_BUILTINS, f)
-                return quoted(val)
+                return SomeCase(quoted(val))
             elseif contains_is(_PURE_OR_ERROR_BUILTINS, f)
-                if _builtin_nothrow(f, atypes[2:end], etype)
-                    return quoted(val)
+                if _builtin_nothrow(f, argtypes[2:end], type)
+                    return SomeCase(quoted(val))
                 end
             end
         end
@@ -1396,46 +1401,42 @@ function early_inline_special_case(ir::IRCode, s::Signature, e::Expr, params::Op
     return nothing
 end
 
-function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::Expr, params::OptimizationParams)
-    f, ft, atypes = sig.f, sig.ft, sig.atypes
-    typ = ir.stmts[idx][:type]
-    if params.inlining && length(atypes) == 3 && istopfunction(f, :!==)
+function late_inline_special_case!(
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(type), sig::Signature,
+    params::OptimizationParams)
+    (; f, ft, argtypes) = sig
+    isinlining = params.inlining
+    if isinlining && length(argtypes) == 3 && istopfunction(f, :!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
         # and that works, even though inference generally avoids inferring the `!==` Method
-        if isa(typ, Const)
-            ir[SSAValue(idx)] = quoted(typ.val)
-            return true
+        if isa(type, Const)
+            return SomeCase(quoted(type.val))
         end
         cmp_call = Expr(:call, GlobalRef(Core, :(===)), stmt.args[2], stmt.args[3])
         cmp_call_ssa = insert_node!(ir, idx, effect_free(NewInstruction(cmp_call, Bool)))
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
-        ir[SSAValue(idx)] = not_call
-        return true
-    elseif params.inlining && length(atypes) == 3 && istopfunction(f, :(>:))
+        return SomeCase(not_call)
+    elseif isinlining && length(argtypes) == 3 && istopfunction(f, :(>:))
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
-        if isa(typ, Const) && _builtin_nothrow(<:, Any[atypes[3], atypes[2]], typ)
-            ir[SSAValue(idx)] = quoted(typ.val)
-            return true
+        if isa(type, Const) && _builtin_nothrow(<:, Any[argtypes[3], argtypes[2]], type)
+            return SomeCase(quoted(type.val))
         end
         subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2])
-        ir[SSAValue(idx)] = subtype_call
-        return true
-    elseif params.inlining && f === TypeVar && 2 <= length(atypes) <= 4 && (atypes[2] ⊑ Symbol)
-        ir[SSAValue(idx)] = Expr(:call, GlobalRef(Core, :_typevar), stmt.args[2],
+        return SomeCase(subtype_call)
+    elseif isinlining && f === TypeVar && 2 <= length(argtypes) <= 4 && (argtypes[2] ⊑ Symbol)
+        typevar_call = Expr(:call, GlobalRef(Core, :_typevar), stmt.args[2],
             length(stmt.args) < 4 ? Bottom : stmt.args[3],
             length(stmt.args) == 2 ? Any : stmt.args[end])
-        return true
+        return SomeCase(typevar_call)
     elseif is_return_type(f)
-        if isconstType(typ)
-            ir[SSAValue(idx)] = quoted(typ.parameters[1])
-            return true
-        elseif isa(typ, Const)
-            ir[SSAValue(idx)] = quoted(typ.val)
-            return true
+        if isconstType(type)
+            return SomeCase(quoted(type.parameters[1]))
+        elseif isa(type, Const)
+            return SomeCase(quoted(type.val))
         end
     end
-    return false
+    return nothing
 end
 
 function ssa_substitute!(idx::Int, @nospecialize(val), arg_replacements::Vector{Any},
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index bc268d33b1a30..35f976756dcdd 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -172,7 +172,7 @@ end
 NewInstruction(@nospecialize(stmt), @nospecialize(type)) =
     NewInstruction(stmt, type, nothing)
 NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) =
-    NewInstruction(stmt, type, nothing, line, 0x00, false)
+    NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false)
 
 effect_free(inst::NewInstruction) =
     NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true)
@@ -193,7 +193,7 @@ function InstructionStream(len::Int)
     info = Array{Any}(undef, len)
     fill!(info, nothing)
     lines = fill(Int32(0), len)
-    flags = fill(0x00, len)
+    flags = fill(IR_FLAG_NULL, len)
     return InstructionStream(insts, types, info, lines, flags)
 end
 InstructionStream() = InstructionStream(0)
@@ -221,7 +221,7 @@ function resize!(stmts::InstructionStream, len)
     resize!(stmts.flag, len)
     for i in (old_length + 1):len
         stmts.line[i] = 0
-        stmts.flag[i] = 0x00
+        stmts.flag[i] = IR_FLAG_NULL
         stmts.info[i] = nothing
     end
     return stmts
@@ -351,13 +351,8 @@ getindex(it::UseRefIterator) = it.use[1].stmt
 #    use::Int
 #end
 
-struct OOBToken
-end
-
-struct UndefToken
-end
-const undef_token = UndefToken()
-
+struct OOBToken end; const OOB_TOKEN = OOBToken()
+struct UndefToken end; const UNDEF_TOKEN = UndefToken()
 
 function getindex(x::UseRef)
     stmt = x.stmt
@@ -365,45 +360,46 @@ function getindex(x::UseRef)
         rhs = stmt.args[2]
         if isa(rhs, Expr)
             if is_relevant_expr(rhs)
-                x.op > length(rhs.args) && return OOBToken()
+                x.op > length(rhs.args) && return OOB_TOKEN
                 return rhs.args[x.op]
             end
         end
-        x.op == 1 || return OOBToken()
+        x.op == 1 || return OOB_TOKEN
         return rhs
     elseif isa(stmt, Expr) # @assert is_relevant_expr(stmt)
-        x.op > length(stmt.args) && return OOBToken()
+        x.op > length(stmt.args) && return OOB_TOKEN
         return stmt.args[x.op]
     elseif isa(stmt, GotoIfNot)
-        x.op == 1 || return OOBToken()
+        x.op == 1 || return OOB_TOKEN
         return stmt.cond
     elseif isa(stmt, ReturnNode)
-        isdefined(stmt, :val) || return OOBToken()
-        x.op == 1 || return OOBToken()
+        isdefined(stmt, :val) || return OOB_TOKEN
+        x.op == 1 || return OOB_TOKEN
         return stmt.val
     elseif isa(stmt, PiNode)
-        isdefined(stmt, :val) || return OOBToken()
-        x.op == 1 || return OOBToken()
+        isdefined(stmt, :val) || return OOB_TOKEN
+        x.op == 1 || return OOB_TOKEN
         return stmt.val
     elseif isa(stmt, UpsilonNode)
-        isdefined(stmt, :val) || return OOBToken()
-        x.op == 1 || return OOBToken()
+        isdefined(stmt, :val) || return OOB_TOKEN
+        x.op == 1 || return OOB_TOKEN
         return stmt.val
     elseif isa(stmt, PhiNode)
-        x.op > length(stmt.values) && return OOBToken()
-        isassigned(stmt.values, x.op) || return UndefToken()
+        x.op > length(stmt.values) && return OOB_TOKEN
+        isassigned(stmt.values, x.op) || return UNDEF_TOKEN
         return stmt.values[x.op]
     elseif isa(stmt, PhiCNode)
-        x.op > length(stmt.values) && return OOBToken()
-        isassigned(stmt.values, x.op) || return UndefToken()
+        x.op > length(stmt.values) && return OOB_TOKEN
+        isassigned(stmt.values, x.op) || return UNDEF_TOKEN
         return stmt.values[x.op]
     else
-        return OOBToken()
+        return OOB_TOKEN
     end
 end
 
 function is_relevant_expr(e::Expr)
-    return e.head in (:call, :invoke, :new, :splatnew, :(=), :(&),
+    return e.head in (:call, :invoke, :invoke_modify,
+                      :new, :splatnew, :(=), :(&),
                       :gc_preserve_begin, :gc_preserve_end,
                       :foreigncall, :isdefined, :copyast,
                       :undefcheck, :throw_undef_if_not,
@@ -467,8 +463,8 @@ iterate(it::UseRefIterator) = (it.use[1].op = 0; iterate(it, nothing))
     while true
         use.op += 1
         y = use[]
-        y === OOBToken() && return nothing
-        y === UndefToken() || return it.use
+        y === OOB_TOKEN && return nothing
+        y === UNDEF_TOKEN || return it.use
     end
 end
 
@@ -524,7 +520,7 @@ function insert_node!(ir::IRCode, pos::Int, inst::NewInstruction, attach_after::
     node[:line] = something(inst.line, ir.stmts[pos][:line])
     flag = inst.flag
     if !inst.effect_free_computed
-        if stmt_effect_free(inst.stmt, inst.type, ir, ir.sptypes)
+        if stmt_effect_free(inst.stmt, inst.type, ir)
             flag |= IR_FLAG_EFFECT_FREE
         end
     end
@@ -723,7 +719,7 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
     elseif isa(before, OldSSAValue)
         pos = before.id
         if pos < compact.idx
-            renamed = compact.ssa_rename[pos]
+            renamed = compact.ssa_rename[pos]::AnySSAValue
             count_added_node!(compact, inst.stmt)
             line = something(inst.line, compact.result[renamed.id][:line])
             node = add!(compact.new_new_nodes, renamed.id, attach_after)
@@ -769,7 +765,7 @@ function insert_node_here!(compact::IncrementalCompact, inst::NewInstruction, re
         resize!(compact, result_idx)
     end
     flag = inst.flag
-    if !inst.effect_free_computed && stmt_effect_free(inst.stmt, inst.type, compact, compact.ir.sptypes)
+    if !inst.effect_free_computed && stmt_effect_free(inst.stmt, inst.type, compact)
         flag |= IR_FLAG_EFFECT_FREE
     end
     node = compact.result[result_idx]
@@ -1315,10 +1311,12 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
         compact.result[old_result_idx][:inst]), (compact.idx, active_bb)
 end
 
-function maybe_erase_unused!(extra_worklist, compact, idx, callback = x->nothing)
+function maybe_erase_unused!(
+    extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int,
+    callback = null_dce_callback)
     stmt = compact.result[idx][:inst]
     stmt === nothing && return false
-    if compact_exprtype(compact, SSAValue(idx)) === Bottom
+    if argextype(SSAValue(idx), compact) === Bottom
         effect_free = false
     else
         effect_free = compact.result[idx][:flag] & IR_FLAG_EFFECT_FREE != 0
@@ -1408,19 +1406,21 @@ function just_fixup!(compact::IncrementalCompact)
     end
 end
 
-function simple_dce!(compact::IncrementalCompact)
+function simple_dce!(compact::IncrementalCompact, callback = null_dce_callback)
     # Perform simple DCE for unused values
     extra_worklist = Int[]
     for (idx, nused) in Iterators.enumerate(compact.used_ssas)
         idx >= compact.result_idx && break
         nused == 0 || continue
-        maybe_erase_unused!(extra_worklist, compact, idx)
+        maybe_erase_unused!(extra_worklist, compact, idx, callback)
     end
     while !isempty(extra_worklist)
-        maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist))
+        maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist), callback)
     end
 end
 
+null_dce_callback(x::SSAValue) = return
+
 function non_dce_finish!(compact::IncrementalCompact)
     result_idx = compact.result_idx
     resize!(compact.result, result_idx - 1)
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index 49d9aef973e29..88f529d2814de 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -29,9 +29,10 @@ function inflate_ir(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any})
             code[i] = stmt
         end
     end
-    ssavaluetypes = ci.ssavaluetypes
     nstmts = length(code)
-    ssavaluetypes = ci.ssavaluetypes isa Vector{Any} ? copy(ci.ssavaluetypes) : Any[ Any for i = 1:(ci.ssavaluetypes::Int) ]
+    ssavaluetypes = let ssavaluetypes = ci.ssavaluetypes
+        ssavaluetypes isa Vector{Any} ? copy(ssavaluetypes) : Any[ Any for i = 1:(ssavaluetypes::Int) ]
+    end
     stmts = InstructionStream(code, ssavaluetypes, Any[nothing for i = 1:nstmts], copy(ci.codelocs), copy(ci.ssaflags))
     ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable), argtypes, Any[], sptypes)
     return ir
@@ -40,15 +41,15 @@ end
 function replace_code_newstyle!(ci::CodeInfo, ir::IRCode, nargs::Int)
     @assert isempty(ir.new_nodes)
     # All but the first `nargs` slots will now be unused
-    resize!(ci.slotflags, nargs + 1)
+    resize!(ci.slotflags, nargs)
     stmts = ir.stmts
     ci.code, ci.ssavaluetypes, ci.codelocs, ci.ssaflags, ci.linetable =
         stmts.inst, stmts.type, stmts.line, stmts.flag, ir.linetable
     for metanode in ir.meta
         push!(ci.code, metanode)
         push!(ci.codelocs, 1)
-        push!(ci.ssavaluetypes, Any)
-        push!(ci.ssaflags, 0x00)
+        push!(ci.ssavaluetypes::Vector{Any}, Any)
+        push!(ci.ssaflags, IR_FLAG_NULL)
     end
     # Translate BB Edges to statement edges
     # (and undo normalization for now)
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 37f5d38940e05..000bb1849edea 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1,16 +1,24 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+function is_known_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,IncrementalCompact})
+    isexpr(x, :call) || return false
+    ft = argextype(x.args[1], ir)
+    return singleton_type(ft) === func
+end
+
 """
-    This struct keeps track of all uses of some mutable struct allocated
-    in the current function. `uses` are all instances of `getfield` on the
-    struct. `defs` are all instances of `setfield!` on the struct. The terminology
-    refers to the uses/defs of the ``slot bundle'' that the mutable struct represents.
-
-    In addition we keep track of all instances of a foreigncall preserve of this mutable
-    struct. Somewhat counterintuitively, we don't actually need to make sure that the
-    struct itself is live (or even allocated) at a ccall site. If there are no other places
-    where the struct escapes (and thus e.g. where its address is taken), it need not be
-    allocated. We do however, need to make sure to preserve any elements of this struct.
+    du::SSADefUse
+
+This struct keeps track of all uses of some mutable struct allocated in the current function:
+- `du.uses::Vector{Int}` are all instances of `getfield` on the struct
+- `du.defs::Vector{Int}` are all instances of `setfield!` on the struct
+The terminology refers to the uses/defs of the "slot bundle" that the mutable struct represents.
+
+In addition we keep track of all instances of a `:foreigncall` that preserves of this mutable
+struct in `du.ccall_preserve_uses`. Somewhat counterintuitively, we don't actually need to
+make sure that the struct itself is live (or even allocated) at a `ccall` site.
+If there are no other places where the struct escapes (and thus e.g. where its address is taken),
+it need not be allocated. We do however, need to make sure to preserve any elements of this struct.
 """
 struct SSADefUse
     uses::Vector{Int}
@@ -19,36 +27,30 @@ struct SSADefUse
 end
 SSADefUse() = SSADefUse(Int[], Int[], Int[])
 
-function try_compute_fieldidx_expr(@nospecialize(typ), @nospecialize(use_expr))
-    field = use_expr.args[3]
-    isa(field, QuoteNode) && (field = field.value)
-    isa(field, Union{Int, Symbol}) || return nothing
-    return try_compute_fieldidx(typ, field)
-end
+compute_live_ins(cfg::CFG, du::SSADefUse) = compute_live_ins(cfg, du.defs, du.uses)
 
-function lift_defuse(cfg::CFG, ssa::SSADefUse)
-    # We remove from `uses` any block where all uses are dominated
-    # by a def. This prevents insertion of dead phi nodes at the top
-    # of such a block if that block happens to be in a loop
-    ordered = Tuple{Int, Int, Bool}[(x, block_for_inst(cfg, x), true) for x in ssa.uses]
-    for x in ssa.defs
-        push!(ordered, (x, block_for_inst(cfg, x), false))
-    end
-    ordered = sort(ordered, by=x->x[1])
-    bb_defs = Int[]
-    bb_uses = Int[]
-    last_bb = last_def_bb = 0
-    for (_, bb, is_use) in ordered
-        if bb != last_bb && is_use
-            push!(bb_uses, bb)
-        end
-        last_bb = bb
-        if last_def_bb != bb && !is_use
-            push!(bb_defs, bb)
-            last_def_bb = bb
+function try_compute_field_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr)
+    field = stmt.args[3]
+    # fields are usually literals, handle them manually
+    if isa(field, QuoteNode)
+        field = field.value
+    elseif isa(field, Int)
+    # try to resolve other constants, e.g. global reference
+    else
+        field = argextype(field, ir)
+        if isa(field, Const)
+            field = field.val
+        else
+            return nothing
         end
     end
-    SSADefUse(bb_uses, bb_defs, Int[])
+    isa(field, Union{Int, Symbol}) || return nothing
+    return field
+end
+
+function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, typ::DataType)
+    field = try_compute_field_stmt(ir, stmt)
+    return try_compute_fieldidx(typ, field)
 end
 
 function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int)
@@ -82,34 +84,13 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector
     def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx)
 end
 
-function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int)
-    # Find the first dominating def
-    curblock = stmtblock = block_for_inst(ir.cfg, use_idx)
-    curblock = find_curblock(domtree, allblocks, curblock)
-    defblockdefs = let curblock = curblock
-        Int[stmt for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock]
-    end
-    def = 0
-    if !isempty(defblockdefs)
-        if curblock != stmtblock
-            # Find the last def in this block
-            def = 0
-            for x in defblockdefs
-                def = max(def, x)
-            end
-        else
-            # Find the last def before our use
-            def = 0
-            for x in defblockdefs
-                def = max(def, x >= use_idx ? 0 : x)
-            end
-        end
-    end
+function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use::Int)
+    def, useblock, curblock = find_def_for_use(ir, domtree, allblocks, du, use)
     if def == 0
         if !haskey(phinodes, curblock)
             # If this happens, we need to search the predecessors for defs. Which
             # one doesn't matter - if it did, we'd have had a phinode
-            return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds))
+            return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[useblock].preds))
         end
         # The use is the phinode
         return phinodes[curblock]
@@ -118,7 +99,75 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I
     end
 end
 
-function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), pi_callback=(pi, idx)->false)
+# even when the allocation contains an uninitialized field, we try an extra effort to check
+# if this load at `idx` have any "safe" `setfield!` calls that define the field
+function has_safe_def(
+    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse,
+    newidx::Int, idx::Int)
+    def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx)
+    # will throw since we already checked this `:new` site doesn't define this field
+    def == newidx && return false
+    # found a "safe" definition
+    def ≠ 0 && return true
+    # we may still be able to replace this load with `PhiNode`
+    # examine if all predecessors of `block` have any "safe" definition
+    block = block_for_inst(ir, idx)
+    seen = BitSet(block)
+    worklist = BitSet(ir.cfg.blocks[block].preds)
+    isempty(worklist) && return false
+    while !isempty(worklist)
+        pred = pop!(worklist)
+        # if this block has already been examined, bail out to avoid infinite cycles
+        pred in seen && return false
+        idx = last(ir.cfg.blocks[pred].stmts)
+        # NOTE `idx` isn't a load, thus we can use inclusive coondition within the `find_def_for_use`
+        def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx, true)
+        # will throw since we already checked this `:new` site doesn't define this field
+        def == newidx && return false
+        push!(seen, pred)
+        # found a "safe" definition for this predecessor
+        def ≠ 0 && continue
+        # check for the predecessors of this predecessor
+        for newpred in ir.cfg.blocks[pred].preds
+            push!(worklist, newpred)
+        end
+    end
+    return true
+end
+
+# find the first dominating def for the given use
+function find_def_for_use(
+    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, use::Int, inclusive::Bool=false)
+    useblock = block_for_inst(ir.cfg, use)
+    curblock = find_curblock(domtree, allblocks, useblock)
+    local def = 0
+    for idx in du.defs
+        if block_for_inst(ir.cfg, idx) == curblock
+            if curblock != useblock
+                # Find the last def in this block
+                def = max(def, idx)
+            else
+                # Find the last def before our use
+                if inclusive
+                    def = max(def, idx ≤ use ? idx : 0)
+                else
+                    def = max(def, idx < use ? idx : 0)
+                end
+            end
+        end
+    end
+    return def, useblock, curblock
+end
+
+function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint))
+    if isa(val, Union{OldSSAValue, SSAValue})
+        val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
+    end
+    return walk_to_defs(compact, val, typeconstraint)
+end
+
+function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
+                     callback = (@nospecialize(pi), @nospecialize(idx)) -> false)
     while true
         if isa(defssa, OldSSAValue)
             if already_inserted(compact, defssa)
@@ -132,7 +181,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
         end
         def = compact[defssa]
         if isa(def, PiNode)
-            if pi_callback(def, defssa)
+            if callback(def, defssa)
                 return defssa
             end
             def = def.val
@@ -143,7 +192,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
             end
             defssa = def
         elseif isa(def, AnySSAValue)
-            pi_callback(def, defssa)
+            callback(def, defssa)
             if isa(def, SSAValue)
                 is_old(compact, defssa) && (def = OldSSAValue(def.id))
             end
@@ -156,32 +205,31 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
     end
 end
 
-function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defidx), @nospecialize(typeconstraint) = types(compact)[defidx])
+function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
+                                @nospecialize(typeconstraint))
     callback = function (@nospecialize(pi), @nospecialize(idx))
-        isa(pi, PiNode) && (typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ)))
+        if isa(pi, PiNode)
+            typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ))
+        end
         return false
     end
-    def = simple_walk(compact, defidx, callback)
+    def = simple_walk(compact, defssa, callback)
     return Pair{Any, Any}(def, typeconstraint)
 end
 
 """
-    walk_to_defs(compact, val, intermediaries)
+    walk_to_defs(compact, val, typeconstraint)
 
-Starting at `val` walk use-def chains to get all the leaves feeding into
-this val (pruning those leaves rules out by path conditions).
+Starting at `val` walk use-def chains to get all the leaves feeding into this `val`
+(pruning those leaves rules out by path conditions).
 """
-function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), visited_phinodes::Vector{Any}=Any[])
-    isa(defssa, AnySSAValue) || return Any[defssa]
+function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint))
+    visited_phinodes = AnySSAValue[]
+    isa(defssa, AnySSAValue) || return Any[defssa], visited_phinodes
     def = compact[defssa]
-    isa(def, PhiNode) || return Any[defssa]
-    # Step 2: Figure out what the struct is defined as
-    ## Track definitions through PiNode/PhiNode
-    found_def = false
-    ## Track which PhiNodes, SSAValue intermediaries
-    ## we forwarded through.
-    visited = IdDict{Any, Any}()
-    worklist_defs = Any[]
+    isa(def, PhiNode) || return Any[defssa], visited_phinodes
+    visited_constraints = IdDict{AnySSAValue, Any}()
+    worklist_defs = AnySSAValue[]
     worklist_constraints = Any[]
     leaves = Any[]
     push!(worklist_defs, defssa)
@@ -189,7 +237,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
     while !isempty(worklist_defs)
         defssa = pop!(worklist_defs)
         typeconstraint = pop!(worklist_constraints)
-        visited[defssa] = typeconstraint
+        visited_constraints[defssa] = typeconstraint
         def = compact[defssa]
         if isa(def, PhiNode)
             push!(visited_phinodes, defssa)
@@ -200,8 +248,8 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
                 if is_old(compact, defssa) && isa(val, SSAValue)
                     val = OldSSAValue(val.id)
                 end
-                edge_typ = widenconst(compact_exprtype(compact, val))
-                typeintersect(edge_typ, typeconstraint) === Union{} && continue
+                edge_typ = widenconst(argextype(val, compact))
+                hasintersect(edge_typ, typeconstraint) || continue
                 push!(possible_predecessors, n)
             end
             for n in possible_predecessors
@@ -213,15 +261,15 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
                 if isa(val, AnySSAValue)
                     new_def, new_constraint = simple_walk_constraint(compact, val, typeconstraint)
                     if isa(new_def, AnySSAValue)
-                        if !haskey(visited, new_def)
+                        if !haskey(visited_constraints, new_def)
                             push!(worklist_defs, new_def)
                             push!(worklist_constraints, new_constraint)
-                        elseif !(new_constraint <: visited[new_def])
+                        elseif !(new_constraint <: visited_constraints[new_def])
                             # We have reached the same definition via a different
                             # path, with a different type constraint. We may have
                             # to redo some work here with the wider typeconstraint
                             push!(worklist_defs, new_def)
-                            push!(worklist_constraints, tmerge(new_constraint, visited[new_def]))
+                            push!(worklist_constraints, tmerge(new_constraint, visited_constraints[new_def]))
                         end
                         continue
                     end
@@ -241,12 +289,12 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
             push!(leaves, defssa)
         end
     end
-    leaves
+    return leaves, visited_phinodes
 end
 
-function process_immutable_preserve(new_preserves::Vector{Any}, compact::IncrementalCompact, def::Expr)
+function record_immutable_preserve!(new_preserves::Vector{Any}, def::Expr, compact::IncrementalCompact)
     for arg in (isexpr(def, :new) ? def.args : def.args[2:end])
-        if !isbitstype(widenconst(compact_exprtype(compact, arg)))
+        if !isbitstype(widenconst(argextype(arg, compact)))
             push!(new_preserves, arg)
         end
     end
@@ -274,55 +322,32 @@ function is_getfield_captures(@nospecialize(def), compact::IncrementalCompact)
     isa(def, Expr) || return false
     length(def.args) >= 3 || return false
     is_known_call(def, getfield, compact) || return false
-    which = compact_exprtype(compact, def.args[3])
+    which = argextype(def.args[3], compact)
     isa(which, Const) || return false
     which.val === :captures || return false
-    oc = compact_exprtype(compact, def.args[2])
+    oc = argextype(def.args[2], compact)
     return oc ⊑ Core.OpaqueClosure
 end
 
-function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
-        @nospecialize(result_t), field::Int, leaves::Vector{Any})
+struct LiftedValue
+    x
+    LiftedValue(@nospecialize x) = new(x)
+end
+const LiftedLeaves = IdDict{Any, Union{Nothing,LiftedValue}}
+
+# try to compute lifted values that can replace `getfield(x, field)` call
+# where `x` is an immutable struct that are defined at any of `leaves`
+function lift_leaves(compact::IncrementalCompact,
+                     @nospecialize(result_t), field::Int, leaves::Vector{Any})
     # For every leaf, the lifted value
-    lifted_leaves = IdDict{Any, Any}()
+    lifted_leaves = LiftedLeaves()
     maybe_undef = false
     for leaf in leaves
-        leaf_key = leaf
+        cache_key = leaf
         if isa(leaf, AnySSAValue)
-            function lift_arg(ref::Core.Compiler.UseRef)
-                lifted = ref[]
-                if is_old(compact, leaf) && isa(lifted, SSAValue)
-                    lifted = OldSSAValue(lifted.id)
-                end
-                if isa(lifted, GlobalRef) || isa(lifted, Expr)
-                    lifted = insert_node!(compact, leaf, effect_free(NewInstruction(lifted, compact_exprtype(compact, lifted))))
-                    ref[] = lifted
-                    (isa(leaf, SSAValue) && (leaf.id < compact.result_idx)) && push!(compact.late_fixup, leaf.id)
-                end
-                lifted_leaves[leaf_key] = RefValue{Any}(lifted)
-                nothing
-            end
-            function walk_leaf(@nospecialize(leaf))
-                if isa(leaf, OldSSAValue) && already_inserted(compact, leaf)
-                    leaf = compact.ssa_rename[leaf.id]
-                    if isa(leaf, AnySSAValue)
-                        leaf = simple_walk(compact, leaf)
-                    end
-                    if isa(leaf, AnySSAValue)
-                        def = compact[leaf]
-                    else
-                        def = leaf
-                    end
-                elseif isa(leaf, AnySSAValue)
-                    def = compact[leaf]
-                else
-                    def = leaf
-                end
-                return Pair{Any, Any}(def, leaf)
-            end
-            (def, leaf) = walk_leaf(leaf)
-            if is_tuple_call(compact, def) && 1 <= field < length(def.args)
-                lift_arg(UseRef(def, 1 + field))
+            (def, leaf) = walk_to_def(compact, leaf)
+            if is_known_call(def, tuple, compact) && 1 ≤ field < length(def.args)
+                lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves)
                 continue
             elseif isexpr(def, :new)
                 typ = widenconst(types(compact)[leaf])
@@ -331,7 +356,7 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
                 end
                 (isa(typ, DataType) && !isabstracttype(typ)) || return nothing
                 @assert !ismutabletype(typ)
-                if length(def.args) < 1 + field
+                if length(def.args) < 1+field
                     if field > fieldcount(typ)
                         return nothing
                     end
@@ -340,7 +365,7 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
                         # On this branch, this will be a guaranteed UndefRefError.
                         # We use the regular undef mechanic to lift this to a boolean slot
                         maybe_undef = true
-                        lifted_leaves[leaf_key] = nothing
+                        lifted_leaves[cache_key] = nothing
                         continue
                     end
                     return nothing
@@ -354,16 +379,7 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
                     end
                     compact[leaf] = def
                 end
-                lifted = def.args[1+field]
-                if is_old(compact, leaf) && isa(lifted, SSAValue)
-                    lifted = OldSSAValue(lifted.id)
-                end
-                if isa(lifted, GlobalRef) || isa(lifted, Expr)
-                    lifted = insert_node!(compact, leaf, effect_free(NewInstruction(lifted, compact_exprtype(compact, lifted))))
-                    def.args[1+field] = lifted
-                    (isa(leaf, SSAValue) && (leaf.id < compact.result_idx)) && push!(compact.late_fixup, leaf.id)
-                end
-                lifted_leaves[leaf_key] = RefValue{Any}(lifted)
+                lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves)
                 continue
             elseif is_getfield_captures(def, compact)
                 # Walk to new_opaque_closure
@@ -371,14 +387,14 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
                 if isa(ocleaf, AnySSAValue)
                     ocleaf = simple_walk(compact, ocleaf)
                 end
-                ocdef, _ = walk_leaf(ocleaf)
-                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 <= field <= length(ocdef.args)-5
-                    lift_arg(UseRef(ocdef, 5 + field))
+                ocdef, _ = walk_to_def(compact, ocleaf)
+                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-5
+                    lift_arg!(compact, leaf, cache_key, ocdef, 5+field, lifted_leaves)
                     continue
                 end
                 return nothing
             else
-                typ = compact_exprtype(compact, leaf)
+                typ = argextype(leaf, compact)
                 if !isa(typ, Const)
                     # TODO: (disabled since #27126)
                     # If the leaf is an old ssa value, insert a getfield here
@@ -401,62 +417,115 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
             else
                 return nothing
             end
-        elseif isa(leaf, Union{Argument, Expr})
+        elseif isa(leaf, Argument) || isa(leaf, Expr)
             return nothing
         end
-        !ismutable(leaf) || return nothing
+        ismutable(leaf) && return nothing
         isdefined(leaf, field) || return nothing
         val = getfield(leaf, field)
         is_inlineable_constant(val) || return nothing
-        lifted_leaves[leaf_key] = RefValue{Any}(quoted(val))
+        lifted_leaves[cache_key] = LiftedValue(quoted(val))
     end
-    lifted_leaves, maybe_undef
+    return lifted_leaves, maybe_undef
 end
 
-make_MaybeUndef(@nospecialize(typ)) = isa(typ, MaybeUndef) ? typ : MaybeUndef(typ)
+function lift_arg!(
+    compact::IncrementalCompact, @nospecialize(leaf), @nospecialize(cache_key),
+    stmt::Expr, argidx::Int, lifted_leaves::LiftedLeaves)
+    lifted = stmt.args[argidx]
+    if is_old(compact, leaf) && isa(lifted, SSAValue)
+        lifted = OldSSAValue(lifted.id)
+    end
+    if isa(lifted, GlobalRef) || isa(lifted, Expr)
+        lifted = insert_node!(compact, leaf, effect_free(NewInstruction(lifted, argextype(lifted, compact))))
+        stmt.args[argidx] = lifted
+        if isa(leaf, SSAValue) && leaf.id < compact.result_idx
+            push!(compact.late_fixup, leaf.id)
+        end
+    end
+    lifted_leaves[cache_key] = LiftedValue(lifted)
+    nothing
+end
 
-function lift_comparison!(compact::IncrementalCompact, idx::Int,
-        @nospecialize(c1), @nospecialize(c2), stmt::Expr,
-        lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
-    if isa(c1, Const)
-        cmp = c1
-        typeconstraint = widenconst(c2)
-        val = stmt.args[3]
+function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf))
+    if isa(leaf, OldSSAValue) && already_inserted(compact, leaf)
+        leaf = compact.ssa_rename[leaf.id]
+        if isa(leaf, AnySSAValue)
+            leaf = simple_walk(compact, leaf)
+        end
+        if isa(leaf, AnySSAValue)
+            def = compact[leaf]
+        else
+            def = leaf
+        end
+    elseif isa(leaf, AnySSAValue)
+        def = compact[leaf]
     else
-        cmp = c2
-        typeconstraint = widenconst(c1)
-        val = stmt.args[2]
+        def = leaf
     end
+    return Pair{Any, Any}(def, leaf)
+end
 
-    is_type_only = isdefined(typeof(cmp), :instance)
+make_MaybeUndef(@nospecialize(typ)) = isa(typ, MaybeUndef) ? typ : MaybeUndef(typ)
 
-    if isa(val, Union{OldSSAValue, SSAValue})
-        val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
+"""
+    lift_comparison!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+
+Replaces `φ(x, y)::Union{X,Y} === constant` by `φ(x === constant, y === constant)`,
+where `x === constant` and `y === constant` can be replaced with constant `Bool`eans.
+It helps codegen avoid generating expensive code for `===` with `Union` types.
+In particular, this is supposed to improve the performance of the iteration protocol:
+```julia
+while x !== nothing
+    x = iterate(...)::Union{Nothing,Tuple{Any,Any}}
+end
+```
+"""
+function lift_comparison!(compact::IncrementalCompact,
+    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    args = stmt.args
+    length(args) == 3 || return
+
+    lhs, rhs = args[2], args[3]
+    vl = argextype(lhs, compact)
+    vr = argextype(rhs, compact)
+    if isa(vl, Const)
+        isa(vr, Const) && return
+        cmp = vl
+        typeconstraint = widenconst(vr)
+        val = rhs
+    elseif isa(vr, Const)
+        cmp = vr
+        typeconstraint = widenconst(vl)
+        val = lhs
+    else
+        return
     end
 
-    visited_phinodes = Any[]
-    leaves = walk_to_defs(compact, val, typeconstraint, visited_phinodes)
+    valtyp = widenconst(argextype(val, compact))
+    isa(valtyp, Union) || return # bail out if there won't be a good chance for lifting
+
+    leaves, visited_phinodes = collect_leaves(compact, val, valtyp)
+    length(leaves) ≤ 1 && return # bail out if we don't have multiple leaves
 
     # Let's check if we evaluate the comparison for each one of the leaves
-    lifted_leaves = IdDict{Any, Any}()
+    lifted_leaves = nothing
     for leaf in leaves
-        r = egal_tfunc(compact_exprtype(compact, leaf), cmp)
+        r = egal_tfunc(argextype(leaf, compact), cmp)
         if isa(r, Const)
-            lifted_leaves[leaf] = RefValue{Any}(r.val)
+            if lifted_leaves === nothing
+                lifted_leaves = LiftedLeaves()
+            end
+            lifted_leaves[leaf] = LiftedValue(r.val)
         else
-            # TODO: In some cases it might be profitable to hoist the ===
-            # here.
-            return
+            return # TODO In some cases it might be profitable to hoist the === here
         end
     end
 
-    lifted_val = perform_lifting!(compact, visited_phinodes, cmp, lifting_cache, Bool, lifted_leaves, val)
-    @assert lifted_val !== nothing
+    lifted_val = perform_lifting!(compact,
+        visited_phinodes, cmp, lifting_cache, Bool,
+        lifted_leaves::LiftedLeaves, val)::LiftedValue
 
-    #global assertion_counter
-    #assertion_counter::Int += 1
-    #insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), lifted_val), nothing, 0, true)
-    #return
     compact[idx] = lifted_val.x
 end
 
@@ -473,14 +542,15 @@ function is_old(compact, @nospecialize(old_node_ssa))
 end
 
 function perform_lifting!(compact::IncrementalCompact,
-        visited_phinodes::Vector{Any}, @nospecialize(cache_key),
-        lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-        @nospecialize(result_t), lifted_leaves::IdDict{Any, Any}, @nospecialize(stmt_val))
-    reverse_mapping = IdDict{Any, Any}(ssa => id for (id, ssa) in enumerate(visited_phinodes))
+    visited_phinodes::Vector{AnySSAValue}, @nospecialize(cache_key),
+    lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
+    @nospecialize(result_t), lifted_leaves::LiftedLeaves, @nospecialize(stmt_val))
+    reverse_mapping = IdDict{AnySSAValue, Int}(ssa => id for (id, ssa) in enumerate(visited_phinodes))
 
     # Insert PhiNodes
     lifted_phis = LiftedPhi[]
     for item in visited_phinodes
+        # FIXME this cache retrieval is obviously broken
         if (item, cache_key) in keys(lifting_cache)
             ssa = lifting_cache[Pair{AnySSAValue, Any}(item, cache_key)]
             push!(lifted_phis, LiftedPhi(ssa, compact[ssa]::PhiNode, false))
@@ -501,11 +571,10 @@ function perform_lifting!(compact::IncrementalCompact,
             edge = old_node.edges[i]
             isassigned(old_node.values, i) || continue
             val = old_node.values[i]
-            orig_val = val
             if is_old(compact, old_node_ssa) && isa(val, SSAValue)
                 val = OldSSAValue(val.id)
             end
-            if isa(val, Union{NewSSAValue, SSAValue, OldSSAValue})
+            if isa(val, AnySSAValue)
                 val = simple_walk(compact, val)
             end
             if val in keys(lifted_leaves)
@@ -515,12 +584,13 @@ function perform_lifting!(compact::IncrementalCompact,
                     resize!(new_node.values, length(new_node.values)+1)
                     continue
                 end
-                lifted_val = lifted_val.x
-                if isa(lifted_val, Union{NewSSAValue, SSAValue, OldSSAValue})
-                    lifted_val = simple_walk(compact, lifted_val, (pi, idx)->true)
+                val = lifted_val.x
+                if isa(val, AnySSAValue)
+                    callback = (@nospecialize(pi), @nospecialize(idx)) -> true
+                    val = simple_walk(compact, val, callback)
                 end
-                push!(new_node.values, lifted_val)
-            elseif isa(val, Union{NewSSAValue, SSAValue, OldSSAValue}) && val in keys(reverse_mapping)
+                push!(new_node.values, val)
+            elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
                 push!(new_node.edges, edge)
                 push!(new_node.values, lifted_phis[reverse_mapping[val]].ssa)
             else
@@ -539,87 +609,68 @@ function perform_lifting!(compact::IncrementalCompact,
     end
 
     if stmt_val in keys(lifted_leaves)
-        stmt_val = lifted_leaves[stmt_val]
-    elseif isa(stmt_val, Union{NewSSAValue, SSAValue, OldSSAValue}) && stmt_val in keys(reverse_mapping)
-        stmt_val = RefValue{Any}(lifted_phis[reverse_mapping[stmt_val]].ssa)
+        return lifted_leaves[stmt_val]
+    elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping)
+        return LiftedValue(lifted_phis[reverse_mapping[stmt_val]].ssa)
     end
 
-    return stmt_val
+    return stmt_val # N.B. should never happen
 end
 
-assertion_counter = 0
-function getfield_elim_pass!(ir::IRCode)
+# NOTE we use `IdSet{Int}` instead of `BitSet` for in these passes since they work on IR after inlining,
+# which can be very large sometimes, and program counters in question are often very sparse
+const SPCSet = IdSet{Int}
+
+"""
+    sroa_pass!(ir::IRCode) -> newir::IRCode
+
+`getfield` elimination pass, a.k.a. Scalar Replacements of Aggregates optimization.
+
+This pass is based on a local field analysis by def-use chain walking.
+It looks for struct allocation sites ("definitions"), and `getfield` calls as well as
+`:foreigncall`s that preserve the structs ("usages"). If "definitions" have enough information,
+then this pass will replace corresponding usages with forwarded values.
+`mutable struct`s require additional cares and need to be handled separately from immutables.
+For `mutable struct`s, `setfield!` calls account for "definitions" also, and the pass should
+give up the lifting conservatively when there are any "intermediate usages" that may escape
+the mutable struct (e.g. non-inlined generic function call that takes the mutable struct as
+its argument).
+
+In a case when all usages are fully eliminated, `struct` allocation may also be erased as
+a result of succeeding dead code elimination.
+"""
+function sroa_pass!(ir::IRCode)
     compact = IncrementalCompact(ir)
-    insertions = Vector{Any}()
-    defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}()
+    defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations
     lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
-    revisit_worklist = Int[]
-    #ndone, nmax = 0, 200
     for ((_, idx), stmt) in compact
+        # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
         isa(stmt, Expr) || continue
-        #ndone >= nmax && continue
-        #ndone += 1
-        result_t = compact_exprtype(compact, SSAValue(idx))
-        is_getfield = is_setfield = false
+        is_setfield = false
         field_ordering = :unspecified
-        is_ccall = false
-        # Step 1: Check whether the statement we're looking at is a getfield/setfield!
         if is_known_call(stmt, setfield!, compact)
-            is_setfield = true
             4 <= length(stmt.args) <= 5 || continue
+            is_setfield = true
             if length(stmt.args) == 5
-                field_ordering = compact_exprtype(compact, stmt.args[5])
+                field_ordering = argextype(stmt.args[5], compact)
             end
         elseif is_known_call(stmt, getfield, compact)
-            is_getfield = true
             3 <= length(stmt.args) <= 5 || continue
             if length(stmt.args) == 5
-                field_ordering = compact_exprtype(compact, stmt.args[5])
+                field_ordering = argextype(stmt.args[5], compact)
             elseif length(stmt.args) == 4
-                field_ordering = compact_exprtype(compact, stmt.args[4])
+                field_ordering = argextype(stmt.args[4], compact)
                 widenconst(field_ordering) === Bool && (field_ordering = :unspecified)
             end
-        elseif is_known_call(stmt, isa, compact)
-            # TODO
-            continue
-        elseif is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
-            # Canonicalize
-            #   X = typeassert(Y, T)::S
-            # into
-            #   typeassert(Y, T)
-            #   X = PiNode(Y, S)
-            # N.B.: Inference may have a more precise type for `S`, than
-            #       just T, but from here on out, there's no problem with
-            #       using just using that.
-            # so subsequent analysis only has to deal with the latter
-            # form. TODO: This isn't the best place to put this.
-            # Also, we should probably have a version of typeassert
-            # that's defined not to return its value to make life easier
-            # for the backend.
-            pi = insert_node_here!(compact,
-                NewInstruction(
-                    PiNode(stmt.args[2], compact.result[idx][:type]),
-                    compact.result[idx][:type],
-                    compact.result[idx][:line]), true)
-            compact.ssa_rename[compact.idx-1] = pi
-            continue
-        elseif is_known_call(stmt, (===), compact) && length(stmt.args) == 3
-            c1 = compact_exprtype(compact, stmt.args[2])
-            c2 = compact_exprtype(compact, stmt.args[3])
-            if !(isa(c1, Const) || isa(c2, Const))
-                continue
-            end
-            (isa(c1, Const) && isa(c2, Const)) && continue
-            lift_comparison!(compact, idx, c1, c2, stmt, lifting_cache)
-            continue
         elseif isexpr(stmt, :foreigncall)
             nccallargs = length(stmt.args[3]::SimpleVector)
+            preserved = Int[]
             new_preserves = Any[]
-            old_preserves = stmt.args[(6+nccallargs):end]
-            for (pidx, preserved_arg) in enumerate(old_preserves)
+            for pidx in (6+nccallargs):length(stmt.args)
+                preserved_arg = stmt.args[pidx]
                 isa(preserved_arg, SSAValue) || continue
-                let intermediaries = IdSet()
-                    callback = function(@nospecialize(pi), ssa::AnySSAValue)
+                let intermediaries = SPCSet()
+                    callback = function (@nospecialize(pi), @nospecialize(ssa))
                         push!(intermediaries, ssa.id)
                         return false
                     end
@@ -627,45 +678,57 @@ function getfield_elim_pass!(ir::IRCode)
                     isa(def, SSAValue) || continue
                     defidx = def.id
                     def = compact[defidx]
-                    if is_tuple_call(compact, def)
-                        process_immutable_preserve(new_preserves, compact, def)
-                        old_preserves[pidx] = nothing
+                    if is_known_call(def, tuple, compact)
+                        record_immutable_preserve!(new_preserves, def, compact)
+                        push!(preserved, preserved_arg.id)
                         continue
                     elseif isexpr(def, :new)
-                        typ = widenconst(compact_exprtype(compact, SSAValue(defidx)))
+                        typ = widenconst(argextype(SSAValue(defidx), compact))
                         if isa(typ, UnionAll)
                             typ = unwrap_unionall(typ)
                         end
                         if typ isa DataType && !ismutabletype(typ)
-                            process_immutable_preserve(new_preserves, compact, def)
-                            old_preserves[pidx] = nothing
+                            record_immutable_preserve!(new_preserves, def, compact)
+                            push!(preserved, preserved_arg.id)
                             continue
                         end
                     else
                         continue
                     end
-                    mid, defuse = get!(defuses, defidx, (IdSet{Int}(), SSADefUse()))
+                    if defuses === nothing
+                        defuses = IdDict{Int, Tuple{SPCSet, SSADefUse}}()
+                    end
+                    mid, defuse = get!(defuses, defidx, (SPCSet(), SSADefUse()))
                     push!(defuse.ccall_preserve_uses, idx)
                     union!(mid, intermediaries)
                 end
                 continue
             end
             if !isempty(new_preserves)
-                old_preserves = filter(ssa->ssa !== nothing, old_preserves)
-                new_expr = Expr(:foreigncall, stmt.args[1:(6+nccallargs-1)]...,
-                    old_preserves..., new_preserves...)
-                compact[idx] = new_expr
+                compact[idx] = form_new_preserves(stmt, preserved, new_preserves)
             end
             continue
+        # TODO: This isn't the best place to put these
+        elseif is_known_call(stmt, typeassert, compact)
+            canonicalize_typeassert!(compact, idx, stmt)
+            continue
+        elseif is_known_call(stmt, (===), compact)
+            lift_comparison!(compact, idx, stmt, lifting_cache)
+            continue
+        # elseif is_known_call(stmt, isa, compact)
+            # TODO do a similar optimization as `lift_comparison!` for `===`
         else
             continue
         end
-        ## Normalize the field argument to getfield/setfield
-        field = stmt.args[3]
-        isa(field, QuoteNode) && (field = field.value)
-        isa(field, Union{Int, Symbol}) || continue
 
-        struct_typ = unwrap_unionall(widenconst(compact_exprtype(compact, stmt.args[2])))
+        # analyze this `getfield` / `setfield!` call
+
+        field = try_compute_field_stmt(compact, stmt)
+        field === nothing && continue
+
+        val = stmt.args[2]
+
+        struct_typ = unwrap_unionall(widenconst(argextype(val, compact)))
         if isa(struct_typ, Union) && struct_typ <: Tuple
             struct_typ = unswitchtupleunion(struct_typ)
         end
@@ -676,19 +739,21 @@ function getfield_elim_pass!(ir::IRCode)
             continue
         end
 
-        def, typeconstraint = stmt.args[2], struct_typ
-
+        # analyze this mutable struct here for the later pass
         if ismutabletype(struct_typ)
-            isa(def, SSAValue) || continue
-            let intermediaries = IdSet()
-                callback = function(@nospecialize(pi), ssa::AnySSAValue)
+            isa(val, SSAValue) || continue
+            let intermediaries = SPCSet()
+                callback = function (@nospecialize(pi), @nospecialize(ssa))
                     push!(intermediaries, ssa.id)
                     return false
                 end
-                def = simple_walk(compact, def, callback)
+                def = simple_walk(compact, val, callback)
                 # Mutable stuff here
                 isa(def, SSAValue) || continue
-                mid, defuse = get!(defuses, def.id, (IdSet{Int}(), SSADefUse()))
+                if defuses === nothing
+                    defuses = IdDict{Int, Tuple{SPCSet, SSADefUse}}()
+                end
+                mid, defuse = get!(defuses, def.id, (SPCSet(), SSADefUse()))
                 if is_setfield
                     push!(defuse.defs, idx)
                 else
@@ -698,41 +763,28 @@ function getfield_elim_pass!(ir::IRCode)
             end
             continue
         elseif is_setfield
-            continue
-        end
-
-        if isa(def, Union{OldSSAValue, SSAValue})
-            def, typeconstraint = simple_walk_constraint(compact, def, typeconstraint)
+            continue # invalid `setfield!` call, but just ignore here
         end
 
-        visited_phinodes = Any[]
-        leaves = walk_to_defs(compact, def, typeconstraint, visited_phinodes)
-
-        isempty(leaves) && continue
+        # perform SROA on immutable structs here on
 
         field = try_compute_fieldidx(struct_typ, field)
         field === nothing && continue
 
-        r = lift_leaves(compact, stmt, result_t, field, leaves)
-        r === nothing && continue
-        lifted_leaves, any_undef = r
+        leaves, visited_phinodes = collect_leaves(compact, val, struct_typ)
+        isempty(leaves) && continue
+
+        result_t = argextype(SSAValue(idx), compact)
+        lifted_result = lift_leaves(compact, result_t, field, leaves)
+        lifted_result === nothing && continue
+        lifted_leaves, any_undef = lifted_result
 
         if any_undef
             result_t = make_MaybeUndef(result_t)
         end
 
-#        @Base.show result_t
-#        @Base.show stmt
-#        for (k,v) in lifted_leaves
-#            @Base.show (k, v)
-#            if isa(k, AnySSAValue)
-#                @Base.show compact[k]
-#            end
-#            if isa(v, RefValue) && isa(v.x, AnySSAValue)
-#                @Base.show compact[v.x]
-#            end
-#        end
-        val = perform_lifting!(compact, visited_phinodes, field, lifting_cache, result_t, lifted_leaves, stmt.args[2])
+        val = perform_lifting!(compact,
+            visited_phinodes, field, lifting_cache, result_t, lifted_leaves, val)
 
         # Insert the undef check if necessary
         if any_undef
@@ -746,29 +798,33 @@ function getfield_elim_pass!(ir::IRCode)
             @assert val !== nothing
         end
 
-        global assertion_counter
-        assertion_counter::Int += 1
-        #insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), val), nothing, 0, true)
-        #continue
         compact[idx] = val === nothing ? nothing : val.x
     end
 
-
     non_dce_finish!(compact)
-    # Copy the use count, `simple_dce!` may modify it and for our predicate
-    # below we need it consistent with the state of the IR here (after tracking
-    # phi node arguments, but before dce).
-    used_ssas = copy(compact.used_ssas)
-    simple_dce!(compact)
-    ir = complete(compact)
-
-    # Compute domtree, needed below, now that we have finished compacting the
-    # IR. This needs to be after we iterate through the IR with
-    # `IncrementalCompact` because removing dead blocks can invalidate the
-    # domtree.
+    if defuses !== nothing
+        # now go through analyzed mutable structs and see which ones we can eliminate
+        # NOTE copy the use count here, because `simple_dce!` may modify it and we need it
+        # consistent with the state of the IR here (after tracking `PhiNode` arguments,
+        # but before the DCE) for our predicate within `sroa_mutables!`, but we also
+        # try an extra effort using a callback so that reference counts are updated
+        used_ssas = copy(compact.used_ssas)
+        simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1)
+        ir = complete(compact)
+        sroa_mutables!(ir, defuses, used_ssas)
+        return ir
+    else
+        simple_dce!(compact)
+        return complete(compact)
+    end
+end
+
+function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int})
+    # Compute domtree, needed below, now that we have finished compacting the IR.
+    # This needs to be after we iterate through the IR with `IncrementalCompact`
+    # because removing dead blocks can invalidate the domtree.
     @timeit "domtree 2" domtree = construct_domtree(ir.cfg.blocks)
 
-    # Now go through any mutable structs and see which ones we can eliminate
     for (idx, (intermediaries, defuse)) in defuses
         intermediaries = collect(intermediaries)
         # Check if there are any uses we did not account for. If so, the variable
@@ -785,76 +841,81 @@ function getfield_elim_pass!(ir::IRCode)
         # Find the type for this allocation
         defexpr = ir[SSAValue(idx)]
         isexpr(defexpr, :new) || continue
-        typ = ir.stmts[idx][:type]
+        newidx = idx
+        typ = ir.stmts[newidx][:type]
         if isa(typ, UnionAll)
             typ = unwrap_unionall(typ)
         end
         # Could still end up here if we tried to setfield! and immutable, which would
         # error at runtime, but is not illegal to have in the IR.
         ismutabletype(typ) || continue
+        typ = typ::DataType
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
-        ok = true
         for use in defuse.uses
-            stmt = ir[SSAValue(use)]
+            stmt = ir[SSAValue(use)] # == `getfield` call
             # We may have discovered above that this use is dead
             # after the getfield elim of immutables. In that case,
             # it would have been deleted. That's fine, just ignore
             # the use in that case.
             stmt === nothing && continue
-            field = try_compute_fieldidx_expr(typ, stmt)
-            field === nothing && (ok = false; break)
+            field = try_compute_fieldidx_stmt(ir, stmt::Expr, typ)
+            field === nothing && @goto skip
             push!(fielddefuse[field].uses, use)
         end
-        ok || continue
-        for use in defuse.defs
-            field = try_compute_fieldidx_expr(typ, ir[SSAValue(use)])
-            field === nothing && (ok = false; break)
-            push!(fielddefuse[field].defs, use)
+        for def in defuse.defs
+            stmt = ir[SSAValue(def)]::Expr # == `setfield!` call
+            field = try_compute_fieldidx_stmt(ir, stmt, typ)
+            field === nothing && @goto skip
+            push!(fielddefuse[field].defs, def)
         end
-        ok || continue
         # Check that the defexpr has defined values for all the fields
         # we're accessing. In the future, we may want to relax this,
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
-        for (fidx, du) in pairs(fielddefuse)
+        ndefuse = length(fielddefuse)
+        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# Vector{Int}}}(undef, ndefuse)
+        for fidx in 1:ndefuse
+            du = fielddefuse[fidx]
             isempty(du.uses) && continue
+            push!(du.defs, newidx)
+            ldu = compute_live_ins(ir.cfg, du)
+            phiblocks = isempty(ldu.live_in_bbs) ? Int[] : iterated_dominance_frontier(ir.cfg, ldu, domtree)
+            allblocks = sort(vcat(phiblocks, ldu.def_bbs))
+            blocks[fidx] = phiblocks, allblocks
             if fidx + 1 > length(defexpr.args)
-                ok = false
-                break
+                for use in du.uses
+                    has_safe_def(ir, domtree, allblocks, du, newidx, use) || @goto skip
+                end
             end
         end
-        ok || continue
-        preserve_uses = IdDict{Int, Vector{Any}}((idx=>Any[] for idx in IdSet{Int}(defuse.ccall_preserve_uses)))
         # Everything accounted for. Go field by field and perform idf
-        for (fidx, du) in pairs(fielddefuse)
+        preserve_uses = isempty(defuse.ccall_preserve_uses) ? nothing :
+            IdDict{Int, Vector{Any}}((idx=>Any[] for idx in SPCSet(defuse.ccall_preserve_uses)))
+        for fidx in 1:ndefuse
+            du = fielddefuse[fidx]
             ftyp = fieldtype(typ, fidx)
             if !isempty(du.uses)
-                push!(du.defs, idx)
-                ldu = compute_live_ins(ir.cfg, du)
-                phiblocks = Int[]
-                if !isempty(ldu.live_in_bbs)
-                    phiblocks = idf(ir.cfg, ldu, domtree)
-                end
+                phiblocks, allblocks = blocks[fidx]
                 phinodes = IdDict{Int, SSAValue}()
                 for b in phiblocks
-                    n = PhiNode()
                     phinodes[b] = insert_node!(ir, first(ir.cfg.blocks[b].stmts),
-                        NewInstruction(n, ftyp))
+                        NewInstruction(PhiNode(), ftyp))
                 end
                 # Now go through all uses and rewrite them
-                allblocks = sort(vcat(phiblocks, ldu.def_bbs))
                 for stmt in du.uses
                     ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt)
                 end
                 if !isbitstype(ftyp)
-                    for (use, list) in preserve_uses
-                        push!(list, compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, use))
+                    if preserve_uses !== nothing
+                        for (use, list) in preserve_uses
+                            push!(list, compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, use))
+                        end
                     end
                 end
                 for b in phiblocks
+                    n = ir[phinodes[b]]::PhiNode
                     for p in ir.cfg.blocks[b].preds
-                        n = ir[phinodes[b]]::PhiNode
                         push!(n.edges, p)
                         push!(n.values, compute_value_for_block(ir, domtree,
                             allblocks, du, phinodes, fidx, p))
@@ -862,32 +923,65 @@ function getfield_elim_pass!(ir::IRCode)
                 end
             end
             for stmt in du.defs
-                stmt == idx && continue
+                stmt == newidx && continue
                 ir[SSAValue(stmt)] = nothing
             end
-            continue
         end
-        isempty(defuse.ccall_preserve_uses) && continue
-        push!(intermediaries, idx)
+        preserve_uses === nothing && continue
+        push!(intermediaries, newidx)
         # Insert the new preserves
         for (use, new_preserves) in preserve_uses
-            useexpr = ir[SSAValue(use)]::Expr
-            nccallargs = length(useexpr.args[3]::SimpleVector)
-            old_preserves = let intermediaries = intermediaries
-                filter(ssa->!isa(ssa, SSAValue) || !(ssa.id in intermediaries), useexpr.args[(6+nccallargs):end])
-            end
-            new_expr = Expr(:foreigncall, useexpr.args[1:(6+nccallargs-1)]...,
-                old_preserves..., new_preserves...)
-            ir[SSAValue(use)] = new_expr
+            ir[SSAValue(use)] = form_new_preserves(ir[SSAValue(use)]::Expr, intermediaries, new_preserves)
         end
+
+        @label skip
     end
-    ir
+end
+
+function form_new_preserves(origex::Expr, preserved::Vector{Int}, new_preserves::Vector{Any})
+    newex = Expr(:foreigncall)
+    nccallargs = length(origex.args[3]::SimpleVector)
+    for i in 1:(6+nccallargs-1)
+        push!(newex.args, origex.args[i])
+    end
+    for i in (6+nccallargs):length(origex.args)
+        x = origex.args[i]
+        if isa(x, SSAValue) && x.id in preserved
+            continue
+        end
+        push!(newex.args, x)
+    end
+    for i in 1:length(new_preserves)
+        push!(newex.args, new_preserves[i])
+    end
+    return newex
+end
+
+"""
+    canonicalize_typeassert!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+
+Canonicalizes `X = typeassert(Y, T)::S` into `typeassert(Y, T); X = π(Y, S)`
+so that subsequent analysis only has to deal with the latter form.
+
+N.B. Inference may have a more precise type for `S`, than just `T`, but from here on out,
+there's no problem with just using that.
+We should probably have a version of `typeassert` that's defined not to return its value to
+make life easier for the backend.
+"""
+function canonicalize_typeassert!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+    length(stmt.args) == 3 || return
+    pi = insert_node_here!(compact,
+        NewInstruction(
+            PiNode(stmt.args[2], compact.result[idx][:type]),
+            compact.result[idx][:type],
+            compact.result[idx][:line]), true)
+    compact.ssa_rename[compact.idx-1] = pi
 end
 
 function adce_erase!(phi_uses::Vector{Int}, extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int)
     # return whether this made a change
     if isa(compact.result[idx][:inst], PhiNode)
-        return maybe_erase_unused!(extra_worklist, compact, idx, val -> phi_uses[val.id] -= 1)
+        return maybe_erase_unused!(extra_worklist, compact, idx, val::SSAValue -> phi_uses[val.id] -= 1)
     else
         return maybe_erase_unused!(extra_worklist, compact, idx)
     end
@@ -902,7 +996,7 @@ function count_uses(@nospecialize(stmt), uses::Vector{Int})
     end
 end
 
-function mark_phi_cycles(compact::IncrementalCompact, safe_phis::BitSet, phi::Int)
+function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::Int)
     worklist = Int[]
     push!(worklist, phi)
     while !isempty(worklist)
@@ -918,6 +1012,25 @@ function mark_phi_cycles(compact::IncrementalCompact, safe_phis::BitSet, phi::In
     end
 end
 
+"""
+    adce_pass!(ir::IRCode) -> newir::IRCode
+
+Aggressive Dead Code Elimination pass.
+
+In addition to a simple DCE for unused values and allocations,
+this pass also nullifies `typeassert` calls that can be proved to be no-op,
+in order to allow LLVM to emit simpler code down the road.
+
+Note that this pass is more effective after SROA optimization (i.e. `sroa_pass!`),
+since SROA often allows this pass to:
+- eliminate allocation of object whose field references are all replaced with scalar values, and
+- nullify `typeassert` call whose first operand has been replaced with a scalar value
+  (, which may have introduced new type information that inference did not understand)
+
+Also note that currently this pass _needs_ to run after `sroa_pass!`, because
+the `typeassert` elimination depends on the transformation by `canonicalize_typeassert!` done
+within `sroa_pass!` which redirects references of `typeassert`ed value to the corresponding `PiNode`.
+"""
 function adce_pass!(ir::IRCode)
     phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes))
     all_phis = Int[]
@@ -925,6 +1038,12 @@ function adce_pass!(ir::IRCode)
     for ((_, idx), stmt) in compact
         if isa(stmt, PhiNode)
             push!(all_phis, idx)
+        elseif is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
+            # nullify safe `typeassert` calls
+            ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact))
+            if isexact && argextype(stmt.args[2], compact) ⊑ ty
+                compact[idx] = nothing
+            end
         end
     end
     non_dce_finish!(compact)
@@ -945,11 +1064,11 @@ function adce_pass!(ir::IRCode)
     changed = true
     while changed
         changed = false
-        safe_phis = BitSet()
+        safe_phis = SPCSet()
         for phi in all_phis
             # Save any phi cycles that have non-phi uses
             if compact.used_ssas[phi] - phi_uses[phi] != 0
-                mark_phi_cycles(compact, safe_phis, phi)
+                mark_phi_cycles!(compact, safe_phis, phi)
             end
         end
         for phi in all_phis
@@ -967,8 +1086,6 @@ function adce_pass!(ir::IRCode)
 end
 
 function type_lift_pass!(ir::IRCode)
-    type_ctx_uses = Vector{Vector{Int}}[]
-    has_non_type_ctx_uses = IdSet{Int}()
     lifted_undef = IdDict{Int, Any}()
     insts = ir.stmts
     for idx in 1:length(insts)
@@ -1066,7 +1183,7 @@ function type_lift_pass!(ir::IRCode)
                                         if haskey(processed, id)
                                             val = processed[id]
                                         else
-                                            push!(worklist, (id, up_id, new_phi, i))
+                                            push!(worklist, (id, up_id, new_phi::SSAValue, i))
                                             continue
                                         end
                                     else
@@ -1207,12 +1324,12 @@ function cfg_simplify!(ir::IRCode)
         # Compute (renamed) successors and predecessors given (renamed) block
         function compute_succs(i)
             orig_bb = follow_merged_succ(result_bbs[i])
-            return map(i -> bb_rename_succ[i], bbs[orig_bb].succs)
+            return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
         end
         function compute_preds(i)
             orig_bb = result_bbs[i]
             preds = bbs[orig_bb].preds
-            return map(pred -> bb_rename_pred[pred], preds)
+            return Int[bb_rename_pred[pred] for pred in preds]
         end
 
         BasicBlock[
diff --git a/base/compiler/ssair/queries.jl b/base/compiler/ssair/queries.jl
deleted file mode 100644
index 503db9b7d8774..0000000000000
--- a/base/compiler/ssair/queries.jl
+++ /dev/null
@@ -1,103 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Determine whether a statement is side-effect-free, i.e. may be removed if it has no uses.
-"""
-function stmt_effect_free(@nospecialize(stmt), @nospecialize(rt), src, sptypes::Vector{Any})
-    isa(stmt, PiNode) && return true
-    isa(stmt, PhiNode) && return true
-    isa(stmt, ReturnNode) && return false
-    isa(stmt, GotoNode) && return false
-    isa(stmt, GotoIfNot) && return false
-    isa(stmt, Slot) && return false # Slots shouldn't occur in the IR at this point, but let's be defensive here
-    isa(stmt, GlobalRef) && return isdefined(stmt.mod, stmt.name)
-    if isa(stmt, Expr)
-        e = stmt::Expr
-        head = e.head
-        if head === :static_parameter
-            etyp = sptypes[e.args[1]]
-            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
-            return isa(etyp, Const)
-        end
-        ea = e.args
-        if head === :call
-            f = argextype(ea[1], src, sptypes)
-            f = singleton_type(f)
-            f === nothing && return false
-            is_return_type(f) && return true
-            if isa(f, IntrinsicFunction)
-                intrinsic_effect_free_if_nothrow(f) || return false
-                return intrinsic_nothrow(f,
-                        Any[argextype(ea[i], src, sptypes) for i = 2:length(ea)])
-            end
-            contains_is(_PURE_BUILTINS, f) && return true
-            contains_is(_PURE_OR_ERROR_BUILTINS, f) || return false
-            rt === Bottom && return false
-            return _builtin_nothrow(f, Any[argextype(ea[i], src, sptypes) for i = 2:length(ea)], rt)
-        elseif head === :new
-            a = ea[1]
-            typ = argextype(a, src, sptypes)
-            # `Expr(:new)` of unknown type could raise arbitrary TypeError.
-            typ, isexact = instanceof_tfunc(typ)
-            isexact || return false
-            isconcretedispatch(typ) || return false
-            typ = typ::DataType
-            fieldcount(typ) >= length(ea) - 1 || return false
-            for fld_idx in 1:(length(ea) - 1)
-                eT = argextype(ea[fld_idx + 1], src, sptypes)
-                fT = fieldtype(typ, fld_idx)
-                eT ⊑ fT || return false
-            end
-            return true
-        elseif head === :new_opaque_closure
-            length(ea) < 5 && return false
-            a = ea[1]
-            typ = argextype(a, src, sptypes)
-            typ, isexact = instanceof_tfunc(typ)
-            isexact || return false
-            typ ⊑ Tuple || return false
-            isva = argextype(ea[2], src, sptypes)
-            rt_lb = argextype(ea[3], src, sptypes)
-            rt_ub = argextype(ea[4], src, sptypes)
-            src = argextype(ea[5], src, sptypes)
-            if !(isva ⊑ Bool && rt_lb ⊑ Type && rt_ub ⊑ Type && src ⊑ Method)
-                return false
-            end
-            return true
-        elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
-            return true
-        else
-            # e.g. :loopinfo
-            return false
-        end
-    end
-    return true
-end
-
-function abstract_eval_ssavalue(s::SSAValue, src::IRCode)
-    return types(src)[s]
-end
-
-function abstract_eval_ssavalue(s::SSAValue, src::IncrementalCompact)
-    return types(src)[s]
-end
-
-function compact_exprtype(compact::IncrementalCompact, @nospecialize(value))
-    if isa(value, AnySSAValue)
-        return types(compact)[value]
-    elseif isa(value, Argument)
-        return compact.ir.argtypes[value.n]
-    end
-    return argextype(value, compact.ir, compact.ir.sptypes)
-end
-argextype(@nospecialize(value), compact::IncrementalCompact, sptypes::Vector{Any}) = compact_exprtype(compact, value)
-
-is_tuple_call(ir::IRCode, @nospecialize(def)) = isa(def, Expr) && is_known_call(def, tuple, ir, ir.sptypes)
-is_tuple_call(compact::IncrementalCompact, @nospecialize(def)) = isa(def, Expr) && is_known_call(def, tuple, compact)
-function is_known_call(e::Expr, @nospecialize(func), src::IncrementalCompact)
-    if e.head !== :call
-        return false
-    end
-    f = compact_exprtype(src, e.args[1])
-    return singleton_type(f) === func
-end
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index ba3a637f61e93..7977826426b12 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -47,7 +47,7 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         # XXX: this is wrong if `sig` is not a concretetype method
         # more correct would be to use `fieldtype(sig, i)`, but that would obscure / discard Varargs information in show
         sig = linfo.specTypes == Tuple ? Core.svec() : Base.unwrap_unionall(linfo.specTypes).parameters::Core.SimpleVector
-        print_arg(i) = sprint() do io
+        print_arg(i) = sprint(; context=io) do io
             show_unquoted(io, stmt.args[i], indent)
             if (i - 1) <= length(sig)
                 print(io, "::", sig[i - 1])
@@ -79,14 +79,15 @@ show_unquoted(io::IO, val::Argument, indent::Int, prec::Int) = show_unquoted(io,
 
 show_unquoted(io::IO, stmt::PhiNode, indent::Int, ::Int) = show_unquoted_phinode(io, stmt, indent, "%")
 function show_unquoted_phinode(io::IO, stmt::PhiNode, indent::Int, prefix::String)
-    args = map(1:length(stmt.edges)) do i
+    args = String[let
         e = stmt.edges[i]
         v = !isassigned(stmt.values, i) ? "#undef" :
-            sprint() do io′
+            sprint(; context=io) do io′
                 show_unquoted(io′, stmt.values[i], indent)
             end
-        return "$prefix$e => $v"
-    end
+        "$prefix$e => $v"
+        end for i in 1:length(stmt.edges)
+    ]
     print(io, "φ ", '(')
     join(io, args, ", ")
     print(io, ')')
@@ -381,7 +382,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                     # if so, drop all existing calls to it from the top of the context
                     # AND check if instead the context was previously printed that way
                     # but now has removed the recursive frames
-                    let method = method_name(context[nctx])
+                    let method = method_name(context[nctx]) # last matching frame
                         if (nctx < nframes && method_name(DI[nframes - nctx]) === method) ||
                            (nctx < length(context) && method_name(context[nctx + 1]) === method)
                             update_line_only = true
@@ -390,8 +391,15 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                             end
                         end
                     end
-                elseif length(context) > 0
-                    update_line_only = true
+                end
+                # look at the first non-matching element to see if we are only changing the line number
+                if !update_line_only && nctx < length(context) && nctx < nframes
+                    let CtxLine = context[nctx + 1],
+                        FrameLine = DI[nframes - nctx]
+                        if method_name(CtxLine) === method_name(FrameLine)
+                            update_line_only = true
+                        end
+                    end
                 end
             elseif nctx < length(context) && nctx < nframes
                 # look at the first non-matching element to see if we are only changing the line number
@@ -628,7 +636,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
         if new_node_type === UNDEF # try to be robust against errors
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            line_info_postprinter(io, new_node_type, node_idx in used)
+            line_info_postprinter(IOContext(io, :idx => node_idx), new_node_type, node_idx in used)
         end
         println(io)
         i += 1
@@ -643,7 +651,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
             # This is an error, but can happen if passes don't update their type information
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            line_info_postprinter(io, type, idx in used)
+            line_info_postprinter(IOContext(io, :idx => idx), type, idx in used)
         end
     end
     println(io)
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index a89b03d04ff59..0c7c9e3c72d25 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -33,21 +33,11 @@ function scan_entry!(result::Vector{SlotInfo}, idx::Int, @nospecialize(stmt))
 end
 
 
-function lift_defuse(cfg::CFG, defuse)
-    map(defuse) do slot
-        SlotInfo(
-            Int[block_for_inst(cfg, x) for x in slot.defs],
-            Int[block_for_inst(cfg, x) for x in slot.uses],
-            slot.any_newvar
-        )
-    end
-end
-
 function scan_slot_def_use(nargs::Int, ci::CodeInfo, code::Vector{Any})
     nslots = length(ci.slotflags)
     result = SlotInfo[SlotInfo() for i = 1:nslots]
     # Set defs for arguments
-    for var in result[1:(1+nargs)]
+    for var in result[1:nargs]
         push!(var.defs, 0)
     end
     for idx in 1:length(code)
@@ -98,10 +88,10 @@ end
 function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecialize(stmt::Union{SlotNumber, TypedSlot}), @nospecialize(ssa))
     # We don't really have the information here to get rid of these.
     # We'll do so later
-    if ssa === undef_token
+    if ssa === UNDEF_TOKEN
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any))
-        return undef_token
+        return UNDEF_TOKEN
     end
     if !isa(ssa, Argument) && !(ssa === nothing) && ((ci.slotflags[slot] & SLOT_USEDUNDEF) != 0)
         # insert a temporary node. type_lift_pass! will remove it
@@ -144,7 +134,7 @@ function fixemup!(cond, rename, ir::IRCode, ci::CodeInfo, idx::Int, @nospecializ
                 return true
             else
                 ssa = rename(val)
-                if ssa === undef_token
+                if ssa === UNDEF_TOKEN
                     return false
                 elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue)
                     return true
@@ -162,7 +152,7 @@ function fixemup!(cond, rename, ir::IRCode, ci::CodeInfo, idx::Int, @nospecializ
             x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename(val))
             # We inserted an undef error node. Delete subsequent statement
             # to avoid confusing the optimizer
-            if x === undef_token
+            if x === UNDEF_TOKEN
                 return nothing
             end
             op[] = x
@@ -183,17 +173,18 @@ function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), r
     return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt)
 end
 
-function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}, flags::Vector{UInt8})
+function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any})
     # Remove `nothing`s at the end, we don't handle them well
     # (we expect the last instruction to be a terminator)
     ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    (; codelocs, ssaflags) = ci
     for i = length(code):-1:1
         if code[i] !== nothing
             resize!(code, i)
             resize!(ssavaluetypes, i)
-            resize!(ci.codelocs, i)
+            resize!(codelocs, i)
             resize!(info, i)
-            resize!(flags, i)
+            resize!(ssaflags, i)
             break
         end
     end
@@ -203,9 +194,9 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}
     if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
         push!(code, ReturnNode())
         push!(ssavaluetypes, Union{})
-        push!(ci.codelocs, 0)
+        push!(codelocs, 0)
         push!(info, nothing)
-        push!(flags, 0x00)
+        push!(ssaflags, IR_FLAG_NULL)
     end
     nothing
 end
@@ -240,41 +231,49 @@ struct BlockLiveness
     live_in_bbs::Vector{Int}
 end
 
-# Run iterated dominance frontier
-#
-# The algorithm we have here essentially follows LLVM, which itself is a
-# a cleaned up version of the linear-time algorithm described in
-#
-#  A Linear Time Algorithm for Placing phi-Nodes (by Sreedhar and Gao)
-#
-# The algorithm here, is quite straightforward. Suppose we have a CFG:
-#
-# A -> B -> D -> F
-#  \-> C -------/
-#
-# and a corresponding dominator tree:
-#
-# A
-# |- B - D
-# |- C
-# |- F
-#
-# Now, for every definition of our slot, we simply walk down the dominator
-# tree and look for any edges that leave the sub-domtree rooted by our definition.
-#
-# E.g. in our example above, if we have a definition in `B`, we look at its successors,
-#      which is only `D`, which is dominated by `B` and hence doesn't need a phi node.
-#      Then we descend down the subtree rooted at `B` and end up in `D`. `D` has a successor
-#      `F`, which is not part of the current subtree, (i.e. not dominated by `B`), so it
-#      needs a phi node.
-#
-# Now, the key insight of that algorithm is that we have two defs, in blocks `A` and `B`,
-# and `A` dominates `B`, then we do not need to recurse into `B`, because the set of
-# potential backedges from a subtree rooted at `B` (to outside the subtree) is a strict
-# subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted
-# at `A`). Note however that this does not work the other way. Thus, the algorithm
-# needs to make sure that we always visit `B` before `A`.
-function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
+"""
+    iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
+        -> phinodes::Vector{Int}
+
+Run iterated dominance frontier.
+The algorithm we have here essentially follows LLVM, which itself is a
+a cleaned up version of the linear-time algorithm described in [^SG95].
+
+The algorithm here, is quite straightforward. Suppose we have a CFG:
+
+    A -> B -> D -> F
+     \\-> C ------>/
+
+and a corresponding dominator tree:
+
+    A
+    |- B - D
+    |- C
+    |- F
+
+Now, for every definition of our slot, we simply walk down the dominator
+tree and look for any edges that leave the sub-domtree rooted by our definition.
+
+In our example above, if we have a definition in `B`, we look at its successors,
+which is only `D`, which is dominated by `B` and hence doesn't need a ϕ-node.
+Then we descend down the subtree rooted at `B` and end up in `D`. `D` has a successor
+`F`, which is not part of the current subtree, (i.e. not dominated by `B`),
+so it needs a ϕ-node.
+
+Now, the key insight of that algorithm is that we have two defs, in blocks `A` and `B`,
+and `A` dominates `B`, then we do not need to recurse into `B`, because the set of
+potential backedges from a subtree rooted at `B` (to outside the subtree) is a strict
+subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted
+at `A`). Note however that this does not work the other way. Thus, the algorithm
+needs to make sure that we always visit `B` before `A`.
+
+[^SG95]: Vugranam C. Sreedhar and Guang R. Gao. 1995.
+         A linear time algorithm for placing φ-nodes.
+         In Proceedings of the 22nd ACM SIGPLAN-SIGACT symposium on Principles of programming languages (POPL '95).
+         Association for Computing Machinery, New York, NY, USA, 62–73.
+         DOI: <https://doi.org/10.1145/199448.199464>.
+"""
+function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
     # This should be a priority queue, but TODO - sorted array for now
     defs = liveness.def_bbs
     pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)]
@@ -372,11 +371,11 @@ function rename_phinode_edges(node, bb, result_order, bb_rename)
 end
 
 """
-    Sort the basic blocks in `ir` into domtree order (i.e. if bb`` is higher in
-    the domtree than bb2, it will come first in the linear order). The resulting
-    ir has the property that a linear traversal of basic blocks will also be a
-    RPO traversal and in particular, any use of an SSA value must come after (by linear
-    order) its definition.
+Sort the basic blocks in `ir` into domtree order (i.e. if `bb1` is higher in
+the domtree than `bb2`, it will come first in the linear order). The resulting
+`ir` has the property that a linear traversal of basic blocks will also be a
+RPO traversal and in particular, any use of an SSA value must come after
+(by linear order) its definition.
 """
 function domsort_ssa!(ir::IRCode, domtree::DomTree)
     # First compute the new order of basic blocks
@@ -524,12 +523,14 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
     return new_ir
 end
 
-function compute_live_ins(cfg::CFG, defuse)
+compute_live_ins(cfg::CFG, slot::SlotInfo) = compute_live_ins(cfg, slot.defs, slot.uses)
+
+function compute_live_ins(cfg::CFG, defs::Vector{Int}, uses::Vector{Int})
     # We remove from `uses` any block where all uses are dominated
     # by a def. This prevents insertion of dead phi nodes at the top
     # of such a block if that block happens to be in a loop
-    ordered = Tuple{Int, Int, Bool}[(x, block_for_inst(cfg, x), true) for x in defuse.uses]
-    for x in defuse.defs
+    ordered = Tuple{Int, Int, Bool}[(x, block_for_inst(cfg, x), true) for x in uses]
+    for x in defs
         push!(ordered, (x, block_for_inst(cfg, x), false))
     end
     ordered = sort(ordered, by=x->x[1])
@@ -586,11 +587,10 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode
     return new_typ
 end
 
-function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int,
-                        slottypes::Vector{Any})
+function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
+                        defuses::Vector{SlotInfo}, slottypes::Vector{Any})
     code = ir.stmts.inst
     cfg = ir.cfg
-    left = Int[]
     catch_entry_blocks = Tuple{Int, Int}[]
     for idx in 1:length(code)
         stmt = code[idx]
@@ -616,7 +616,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
     for (_, exc) in catch_entry_blocks
         phicnodes[exc] = Vector{Tuple{SlotNumber, NewSSAValue, PhiCNode}}()
     end
-    @timeit "idf" for (idx, slot) in Iterators.enumerate(defuse)
+    @timeit "idf" for (idx, slot) in Iterators.enumerate(defuses)
         # No uses => no need for phi nodes
         isempty(slot.uses) && continue
         # TODO: Restore this optimization
@@ -659,7 +659,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 end
             end
         end
-        phiblocks = idf(cfg, live, domtree)
+        phiblocks = iterated_dominance_frontier(cfg, live, domtree)
         for block in phiblocks
             push!(phi_slots[block], idx)
             node = PhiNode()
@@ -667,14 +667,13 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts))
             push!(phi_nodes[block], ssa=>node)
         end
-        push!(left, idx)
     end
     # Perform SSA renaming
     initial_incoming_vals = Any[
-        if 0 in defuse[x].defs
+        if 0 in defuses[x].defs
             Argument(x)
-        elseif !defuse[x].any_newvar
-            undef_token
+        elseif !defuses[x].any_newvar
+            UNDEF_TOKEN
         else
             SSAValue(-2)
         end for x in 1:length(ci.slotflags)
@@ -711,7 +710,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 continue
             end
             push!(node.edges, pred)
-            if incoming_val === undef_token
+            if incoming_val === UNDEF_TOKEN
                 resize!(node.values, length(node.values)+1)
             else
                 push!(node.values, incoming_val)
@@ -721,7 +720,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             if isa(incoming_val, NewSSAValue)
                 push!(type_refine_phi, ssaval.id)
             end
-            typ = incoming_val === undef_token ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes)
+            typ = incoming_val === UNDEF_TOKEN ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes)
             old_entry = new_nodes.stmts[ssaval.id]
             if isa(typ, DelayedTyp)
                 push!(type_refine_phi, ssaval.id)
@@ -743,7 +742,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         if eidx !== nothing
             for (slot, _, node) in phicnodes[catch_entry_blocks[eidx][2]]
                 ival = incoming_vals[slot_id(slot)]
-                ivalundef = ival === undef_token
+                ivalundef = ival === UNDEF_TOKEN
                 unode = ivalundef ? UpsilonNode() : UpsilonNode(ival)
                 typ = ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)
                 push!(node.values,
@@ -756,7 +755,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             stmt = code[idx]
             (isa(stmt, PhiNode) || (isexpr(stmt, :(=)) && isa(stmt.args[2], PhiNode))) && continue
             if isa(stmt, NewvarNode)
-                incoming_vals[slot_id(stmt.slot)] = undef_token
+                incoming_vals[slot_id(stmt.slot)] = UNDEF_TOKEN
                 code[idx] = nothing
             else
                 stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals)
@@ -770,13 +769,13 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                     id = slot_id(stmt.args[1])
                     val = stmt.args[2]
                     typ = typ_for_val(val, ci, ir.sptypes, idx, slottypes)
-                    # Having undef_token appear on the RHS is possible if we're on a dead branch.
+                    # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch.
                     # Do something reasonable here, by marking the LHS as undef as well.
-                    if val !== undef_token
+                    if val !== UNDEF_TOKEN
                         incoming_vals[id] = SSAValue(make_ssa!(ci, code, idx, id, typ)::Int)
                     else
                         code[idx] = nothing
-                        incoming_vals[id] = undef_token
+                        incoming_vals[id] = UNDEF_TOKEN
                     end
                     eidx = item
                     while haskey(exc_handlers, eidx)
@@ -784,7 +783,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                         cidx = findfirst(x->slot_id(x[1]) == id, phicnodes[exc])
                         if cidx !== nothing
                             node = UpsilonNode(incoming_vals[id])
-                            if incoming_vals[id] === undef_token
+                            if incoming_vals[id] === UNDEF_TOKEN
                                 node = UpsilonNode()
                                 typ = MaybeUndef(Union{})
                             end
@@ -825,7 +824,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             new_dest = block_for_inst(cfg, stmt.dest)
             if new_dest == bb+1
                 # Drop this node - it's a noop
-                new_code[idx] = stmt.cond
+                new_code[idx] = Expr(:call, GlobalRef(Core, :typeassert), stmt.cond, GlobalRef(Core, :Bool))
             else
                 new_code[idx] = GotoIfNot(stmt.cond, new_dest)
             end
@@ -873,7 +872,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         changed = false
         for new_idx in type_refine_phi
             node = new_nodes.stmts[new_idx]
-            new_typ = recompute_type(node[:inst], ci, ir, ir.sptypes, slottypes)
+            new_typ = recompute_type(node[:inst]::Union{PhiNode,PhiCNode}, ci, ir, ir.sptypes, slottypes)
             if !(node[:type] ⊑ new_typ) || !(new_typ ⊑ node[:type])
                 node[:type] = new_typ
                 changed = true
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index 653923ace6e8e..c7c34edd84308 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -202,6 +202,10 @@ function verify_ir(ir::IRCode, print::Bool=true)
                         @verify_error "SSAValue as assignment LHS"
                         error("")
                     end
+                    if stmt.args[2] isa GlobalRef
+                        # undefined GlobalRef as assignment RHS is OK
+                        continue
+                    end
                 elseif stmt.head === :gc_preserve_end
                     # We allow gc_preserve_end tokens to span across try/catch
                     # blocks, which isn't allowed for regular SSA values, so
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index a6ffee299c4f5..6360f1697d417 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -1,73 +1,77 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+@nospecialize
+
 """
-    struct MethodMatchInfo
+    call::CallMeta
 
-Captures the result of a `method_matches` lookup for the given call. This
-info may then be used by the optimizer to inline the matches, without having
-to re-consult the method table. This info is illegal on any statement that is
-not a call to a generic function.
+A simple struct that captures both the return type (`call.rt`)
+and any additional information (`call.info`) for a given generic call.
 """
-struct MethodMatchInfo
-    results::Union{Missing, MethodLookupResult}
+struct CallMeta
+    rt::Any
+    info::Any
 end
 
 """
-    struct MethodResultPure
+    info::MethodMatchInfo
 
-This struct represents a method result constant was proven to be
-effect-free, including being no-throw (typically because the value was computed
-by calling an `@pure` function).
+Captures the result of a `:jl_matching_methods` lookup for the given call (`info.results`).
+This info may then be used by the optimizer to inline the matches, without having
+to re-consult the method table. This info is illegal on any statement that is
+not a call to a generic function.
 """
-struct MethodResultPure
-    info::Any
-end
-let instance = MethodResultPure(false)
-    global MethodResultPure
-    MethodResultPure() = instance
+struct MethodMatchInfo
+    results::MethodLookupResult
 end
 
 """
-    struct UnionSplitInfo
+    info::UnionSplitInfo
 
 If inference decides to partition the method search space by splitting unions,
 it will issue a method lookup query for each such partition. This info indicates
-that such partitioning happened and wraps the corresponding MethodMatchInfo for
-each partition. This info is illegal on any statement that is not a call to a
-generic function.
+that such partitioning happened and wraps the corresponding `MethodMatchInfo` for
+each partition (`info.matches::Vector{MethodMatchInfo}`).
+This info is illegal on any statement that is not a call to a generic function.
 """
 struct UnionSplitInfo
     matches::Vector{MethodMatchInfo}
 end
 
 """
-    struct CallMeta
+    info::MethodResultPure
 
-A simple struct that captures both the return type (`rt`) and any additional information
-(`info`) for a given generic call.
+This struct represents a method result constant was proven to be
+effect-free, including being no-throw (typically because the value was computed
+by calling an `@pure` function).
 """
-struct CallMeta
-    rt::Any
-    info::Any
+struct MethodResultPure
+    info::Union{MethodMatchInfo,UnionSplitInfo,Bool}
+end
+let instance = MethodResultPure(false)
+    global MethodResultPure
+    MethodResultPure() = instance
 end
 
 """
-    struct AbstractIterationInfo
+    info::AbstractIterationInfo
 
 Captures all the information for abstract iteration analysis of a single value.
-Each (abstract) call to `iterate`, corresponds to one entry in `each`.
+Each (abstract) call to `iterate`, corresponds to one entry in `info.each::Vector{CallMeta}`.
 """
 struct AbstractIterationInfo
     each::Vector{CallMeta}
 end
 
+const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
+
 """
-    struct ApplyCallInfo
+    info::ApplyCallInfo
 
 This info applies to any call of `_apply_iterate(...)` and captures both the
 info of the actual call being applied and the info for any implicit call
 to the `iterate` function. Note that it is possible for the call itself
-to be yet another `_apply_iterate`, in which case the `.call` field will
+to be yet another `_apply_iterate`, in which case the `info.call` field will
 be another `ApplyCallInfo`. This info is illegal on any statement that is
 not an `_apply_iterate` call.
 """
@@ -75,13 +79,13 @@ struct ApplyCallInfo
     # The info for the call itself
     call::Any
     # AbstractIterationInfo for each argument, if applicable
-    arginfo::Vector{Union{Nothing, AbstractIterationInfo}}
+    arginfo::Vector{MaybeAbstractIterationInfo}
 end
 
 """
-    struct UnionSplitApplyCallInfo
+    info::UnionSplitApplyCallInfo
 
-Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than MethodMatchInfo.
+Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
 This info is illegal on any statement that is not an `_apply_iterate` call.
 """
 struct UnionSplitApplyCallInfo
@@ -89,42 +93,70 @@ struct UnionSplitApplyCallInfo
 end
 
 """
-    struct ConstCallInfo
+    info::ConstCallInfo
 
-Precision for this call was improved using constant information. This info
-keeps a reference to the result that was used (or created for these)
-constant information.
+The precision of this call was improved using constant information.
+In addition to the original call information `info.call`, this info also keeps
+the inference results with constant information `info.results::Vector{Union{Nothing,InferenceResult}}`.
 """
 struct ConstCallInfo
-    call::Any
+    call::Union{MethodMatchInfo,UnionSplitInfo}
     results::Vector{Union{Nothing,InferenceResult}}
 end
 
 """
-    struct InvokeCallInfo
+    info::InvokeCallInfo
 
-Represents a resolved call to `invoke`, carrying the Method match of the
-method being processed.
+Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
 """
 struct InvokeCallInfo
     match::MethodMatch
     result::Union{Nothing,InferenceResult}
 end
 
+"""
+    info::OpaqueClosureCallInfo
+
+Represents a resolved call of opaque closure, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
+"""
 struct OpaqueClosureCallInfo
     match::MethodMatch
+    result::Union{Nothing,InferenceResult}
 end
 
+"""
+    info::OpaqueClosureCreateInfo
+
+This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
+carrying out inference result of an unreal, partially specialized call (i.e. specialized on
+the closure environment, but not on the argument types of the opaque closure) in order to
+allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
+"""
 struct OpaqueClosureCreateInfo
     unspec::CallMeta
+    function OpaqueClosureCreateInfo(unspec::CallMeta)
+        @assert isa(unspec.info, OpaqueClosureCallInfo)
+        return new(unspec)
+    end
 end
 
 # Stmt infos that are used by external consumers, but not by optimization.
 # These are not produced by default and must be explicitly opted into by
 # the AbstractInterpreter.
 
+"""
+    info::ReturnTypeCallInfo
+
+Represents a resolved call of `Core.Compiler.return_type`.
+`info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
+was supposed to analyze.
+"""
 struct ReturnTypeCallInfo
-    # The info corresponding to the call that return_type was supposed to
-    # analyze.
     info::Any
 end
+
+@specialize
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index e078567f9a59b..4444242bafc9c 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -10,7 +10,7 @@ const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
 
 const INT_INF = typemax(Int) # integer infinity
 
-const N_IFUNC = reinterpret(Int32, arraylen) + 1
+const N_IFUNC = reinterpret(Int32, have_fma) + 1
 const T_IFUNC = Vector{Tuple{Int, Int, Any}}(undef, N_IFUNC)
 const T_IFUNC_COST = Vector{Int}(undef, N_IFUNC)
 const T_FFUNC_KEY = Vector{Any}()
@@ -67,7 +67,7 @@ add_tfunc(throw, 1, 1, (@nospecialize(x)) -> Bottom, 0)
 # if istype is true, the actual runtime value will definitely be a type (e.g. this is false for Union{Type{Int}, Int})
 function instanceof_tfunc(@nospecialize(t))
     if isa(t, Const)
-        if isa(t.val, Type)
+        if isa(t.val, Type) && valid_as_lattice(t.val)
             return t.val, true, isconcretetype(t.val), true
         end
         return Bottom, true, false, false # runtime throws on non-Type
@@ -75,10 +75,11 @@ function instanceof_tfunc(@nospecialize(t))
     t = widenconst(t)
     if t === Bottom
         return Bottom, true, true, false # runtime unreachable
-    elseif t === typeof(Bottom) || typeintersect(t, Type) === Bottom
+    elseif t === typeof(Bottom) || !hasintersect(t, Type)
         return Bottom, true, false, false # literal Bottom or non-Type
     elseif isType(t)
         tp = t.parameters[1]
+        valid_as_lattice(tp) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
         return tp, !has_free_typevars(tp), isconcretetype(tp), true
     elseif isa(t, UnionAll)
         t′ = unwrap_unionall(t)
@@ -213,6 +214,7 @@ cglobal_tfunc(@nospecialize(fptr)) = Ptr{Cvoid}
 cglobal_tfunc(@nospecialize(fptr), @nospecialize(t)) = (isType(t) ? Ptr{t.parameters[1]} : Ptr)
 cglobal_tfunc(@nospecialize(fptr), t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr)
 add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc, 5)
+add_tfunc(Core.Intrinsics.have_fma, 1, 1, @nospecialize(x)->Bool, 1)
 
 function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
     if isa(cnd, Const)
@@ -230,7 +232,7 @@ function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
     end
     return tmerge(x, y)
 end
-add_tfunc(ifelse, 3, 3, ifelse_tfunc, 1)
+add_tfunc(Core.ifelse, 3, 3, ifelse_tfunc, 1)
 
 function egal_tfunc(@nospecialize(x), @nospecialize(y))
     xx = widenconditional(x)
@@ -245,7 +247,7 @@ function egal_tfunc(@nospecialize(x), @nospecialize(y))
         return Const(false)
     elseif isa(xx, Const) && isa(yy, Const)
         return Const(xx.val === yy.val)
-    elseif typeintersect(widenconst(xx), widenconst(yy)) === Bottom
+    elseif !hasintersect(widenconst(xx), widenconst(yy))
         return Const(false)
     elseif (isa(xx, Const) && y === typeof(xx.val) && isdefined(y, :instance)) ||
            (isa(yy, Const) && x === typeof(yy.val) && isdefined(x, :instance))
@@ -257,9 +259,9 @@ add_tfunc(===, 2, 2, egal_tfunc, 1)
 
 function isdefined_nothrow(argtypes::Array{Any, 1})
     length(argtypes) == 2 || return false
-    return typeintersect(widenconst(argtypes[1]), Module) === Union{} ?
-        (argtypes[2] ⊑ Symbol || argtypes[2] ⊑ Int) :
-         argtypes[2] ⊑ Symbol
+    return hasintersect(widenconst(argtypes[1]), Module) ?
+           argtypes[2] ⊑ Symbol :
+           (argtypes[2] ⊑ Symbol || argtypes[2] ⊑ Int)
 end
 isdefined_tfunc(arg1, sym, order) = (@nospecialize; isdefined_tfunc(arg1, sym))
 function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
@@ -274,8 +276,9 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
     a1 = unwrap_unionall(a1)
     if isa(a1, DataType) && !isabstracttype(a1)
         if a1 === Module
-            Symbol <: widenconst(sym) || return Bottom
-            if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) && isdefined(arg1.val, sym.val)
+            hasintersect(widenconst(sym), Symbol) || return Bottom
+            if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) &&
+               isdefined(arg1.val::Module, sym.val::Symbol)
                 return Const(true)
             end
         elseif isa(sym, Const)
@@ -312,6 +315,9 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
                 end
             end
         end
+    elseif isa(a1, Union)
+        return tmerge(isdefined_tfunc(a1.a, sym),
+                      isdefined_tfunc(a1.b, sym))
     end
     return Bool
 end
@@ -334,7 +340,7 @@ function sizeof_nothrow(@nospecialize(x))
     if t === Bottom
         # x must be an instance (not a Type) or is the Bottom type object
         x = widenconst(x)
-        return typeintersect(x, Type) === Union{}
+        return !hasintersect(x, Type)
     end
     x = unwrap_unionall(t)
     if isconcrete
@@ -466,29 +472,54 @@ add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
 add_tfunc(applicable, 1, INT_INF, (@nospecialize(f), args...)->Bool, 100)
 add_tfunc(Core.Intrinsics.arraylen, 1, 1, @nospecialize(x)->Int, 4)
 add_tfunc(arraysize, 2, 2, (@nospecialize(a), @nospecialize(d))->Int, 4)
+
 function pointer_eltype(@nospecialize(ptr))
     a = widenconst(ptr)
-    if a <: Ptr
-        if isa(a, DataType) && isa(a.parameters[1], Type)
-            return a.parameters[1]
-        elseif isa(a, UnionAll) && !has_free_typevars(a)
-            unw = unwrap_unionall(a)
-            if isa(unw, DataType)
-                return rewrap_unionall(unw.parameters[1], a)
-            end
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(T, a)
         end
     end
     return Any
 end
+function atomic_pointermodify_tfunc(ptr, op, v, order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            # note: we could sometimes refine this to a PartialStruct if we analyzed `op(T, T)::T`
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(Pair{T, T}, a)
+        end
+    end
+    return Pair
+end
+function atomic_pointerreplace_tfunc(ptr, x, v, success_order, failure_order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(ccall(:jl_apply_cmpswap_type, Any, (Any,), T), a)
+        end
+    end
+    return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+end
 add_tfunc(pointerref, 3, 3, (a, i, align) -> (@nospecialize; pointer_eltype(a)), 4)
 add_tfunc(pointerset, 4, 4, (a, v, i, align) -> (@nospecialize; a), 5)
-
 add_tfunc(atomic_fence, 1, 1, (order) -> (@nospecialize; Nothing), 4)
 add_tfunc(atomic_pointerref, 2, 2, (a, order) -> (@nospecialize; pointer_eltype(a)), 4)
 add_tfunc(atomic_pointerset, 3, 3, (a, v, order) -> (@nospecialize; a), 5)
 add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_eltype(a)), 5)
-add_tfunc(atomic_pointermodify, 4, 4, (a, op, v, order) -> (@nospecialize; T = pointer_eltype(a); Tuple{T, T}), 5)
-add_tfunc(atomic_pointerreplace, 5, 5, (a, x, v, success_order, failure_order) -> (@nospecialize; Tuple{pointer_eltype(a), Bool}), 5)
+add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5)
+add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5)
 
 # more accurate typeof_tfunc for vararg tuples abstract only in length
 function typeof_concrete_vararg(t::DataType)
@@ -527,11 +558,9 @@ function typeof_tfunc(@nospecialize(t))
             return Type{<:t}
         end
     elseif isa(t, Union)
-        a = widenconst(typeof_tfunc(t.a))
-        b = widenconst(typeof_tfunc(t.b))
+        a = widenconst(_typeof_tfunc(t.a))
+        b = widenconst(_typeof_tfunc(t.b))
         return Union{a, b}
-    elseif isa(t, TypeVar) && !(Any === t.ub)
-        return typeof_tfunc(t.ub)
     elseif isa(t, UnionAll)
         u = unwrap_unionall(t)
         if isa(u, DataType) && !isabstracttype(u)
@@ -548,6 +577,13 @@ function typeof_tfunc(@nospecialize(t))
     end
     return DataType # typeof(anything)::DataType
 end
+# helper function of `typeof_tfunc`, which accepts `TypeVar`
+function _typeof_tfunc(@nospecialize(t))
+    if isa(t, TypeVar)
+        return t.ub !== Any ? _typeof_tfunc(t.ub) : DataType
+    end
+    return typeof_tfunc(t)
+end
 add_tfunc(typeof, 1, 1, typeof_tfunc, 1)
 
 function typeassert_tfunc(@nospecialize(v), @nospecialize(t))
@@ -562,9 +598,7 @@ function isa_tfunc(@nospecialize(v), @nospecialize(tt))
     if t === Bottom
         # check if t could be equivalent to typeof(Bottom), since that's valid in `isa`, but the set of `v` is empty
         # if `t` cannot have instances, it's also invalid on the RHS of isa
-        if typeintersect(widenconst(tt), Type) === Union{}
-            return Union{}
-        end
+        hasintersect(widenconst(tt), Type) || return Union{}
         return Const(false)
     end
     if !has_free_typevars(t)
@@ -580,7 +614,7 @@ function isa_tfunc(@nospecialize(v), @nospecialize(tt))
             end
             v = widenconst(v)
             isdispatchelem(v) && return Const(false)
-            if typeintersect(v, t) === Bottom
+            if !hasintersect(v, t)
                 # similar to `isnotbrokensubtype` check above, `typeintersect(v, t)`
                 # can't be trusted for kind types so we do an extra check here
                 if !iskindtype(v)
@@ -603,7 +637,7 @@ function subtype_tfunc(@nospecialize(a), @nospecialize(b))
                 return Const(true)
             end
         else
-            if isexact_a || (b !== Bottom && typeintersect(a, b) === Union{})
+            if isexact_a || (b !== Bottom && !hasintersect(a, b))
                 return Const(false)
             end
         end
@@ -636,7 +670,7 @@ function fieldcount_noerror(@nospecialize t)
             return nothing
         end
         t = t::DataType
-    elseif t == Union{}
+    elseif t === Union{}
         return 0
     end
     if !(t isa DataType)
@@ -727,8 +761,8 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), boundscheck::
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
-        return getfield_nothrow(rewrap(s.a, s00), name, boundscheck) &&
-               getfield_nothrow(rewrap(s.b, s00), name, boundscheck)
+        return getfield_nothrow(rewrap_unionall(s.a, s00), name, boundscheck) &&
+               getfield_nothrow(rewrap_unionall(s.b, s00), name, boundscheck)
     elseif isa(s, DataType)
         # Can't say anything about abstract types
         isabstracttype(s) && return false
@@ -755,8 +789,8 @@ getfield_tfunc(s00, name, order, boundscheck) = (@nospecialize; getfield_tfunc(s
 function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
     s = unwrap_unionall(s00)
     if isa(s, Union)
-        return tmerge(getfield_tfunc(rewrap(s.a,s00), name),
-                      getfield_tfunc(rewrap(s.b,s00), name))
+        return tmerge(getfield_tfunc(rewrap_unionall(s.a, s00), name),
+                      getfield_tfunc(rewrap_unionall(s.b, s00), name))
     elseif isa(s, Conditional)
         return Bottom # Bool has no fields
     elseif isa(s, Const) || isconstType(s)
@@ -804,17 +838,18 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
             end
         end
         s = typeof(sv)
-    elseif isa(s, PartialStruct)
+    elseif isa(s00, PartialStruct)
+        s = widenconst(s00)
+        sty = unwrap_unionall(s)::DataType
         if isa(name, Const)
             nv = name.val
             if isa(nv, Symbol)
-                nv = fieldindex(widenconst(s), nv, false)
+                nv = fieldindex(sty, nv, false)
             end
-            if isa(nv, Int) && 1 <= nv <= length(s.fields)
-                return unwrapva(s.fields[nv])
+            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
+                return unwrapva(s00.fields[nv])
             end
         end
-        s = widenconst(s)
     end
     if isType(s) || !isa(s, DataType) || isabstracttype(s)
         return Any
@@ -829,9 +864,6 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
         end
         return Any
     end
-    # If no value has this type, then this statement should be unreachable.
-    # Bail quickly now.
-    has_concrete_subtype(s) || return Union{}
     if s.name === _NAMEDTUPLE_NAME && !isconcretetype(s)
         if isa(name, Const) && isa(name.val, Symbol)
             if isa(s.parameters[1], Tuple)
@@ -842,10 +874,7 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
         elseif Symbol ⊑ name
             name = Int
         end
-        _ts = s.parameters[2]
-        while isa(_ts, TypeVar)
-            _ts = _ts.ub
-        end
+        _ts = unwraptv(s.parameters[2])
         _ts = rewrap_unionall(_ts, s00)
         if !(_ts <: Tuple)
             return Any
@@ -853,13 +882,16 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
         return getfield_tfunc(_ts, name)
     end
     ftypes = datatype_fieldtypes(s)
-    if isempty(ftypes)
+    # If no value has this type, then this statement should be unreachable.
+    # Bail quickly now.
+    if !has_concrete_subtype(s) || isempty(ftypes)
         return Bottom
     end
     if isa(name, Conditional)
         return Bottom # can't index fields with Bool
     end
     if !isa(name, Const)
+        name = widenconst(name)
         if !(Int <: name || Symbol <: name)
             return Bottom
         end
@@ -911,11 +943,55 @@ setfield!_tfunc(o, f, v) = (@nospecialize; v)
 
 swapfield!_tfunc(o, f, v, order) = (@nospecialize; getfield_tfunc(o, f))
 swapfield!_tfunc(o, f, v) = (@nospecialize; getfield_tfunc(o, f))
-modifyfield!_tfunc(o, f, op, v, order) = (@nospecialize; T = getfield_tfunc(o, f); T === Bottom ? T : Tuple{T, T})
-modifyfield!_tfunc(o, f, op, v) = (@nospecialize; T = getfield_tfunc(o, f); T === Bottom ? T : Tuple{T, T}) # TODO: also model op(o.f, v) call
+modifyfield!_tfunc(o, f, op, v, order) = (@nospecialize; modifyfield!_tfunc(o, f, op, v))
+function modifyfield!_tfunc(o, f, op, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    PT = Const(Pair)
+    return instanceof_tfunc(apply_type_tfunc(PT, T, T))[1]
+end
+function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+    nargs = length(argtypes)
+    if !isempty(argtypes) && isvarargtype(argtypes[nargs])
+        nargs - 1 <= 6 || return CallMeta(Bottom, false)
+        nargs > 3 || return CallMeta(Any, false)
+    else
+        5 <= nargs <= 6 || return CallMeta(Bottom, false)
+    end
+    o = unwrapva(argtypes[2])
+    f = unwrapva(argtypes[3])
+    RT = modifyfield!_tfunc(o, f, Any, Any)
+    info = false
+    if nargs >= 5 && RT !== Bottom
+        # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
+        # as well as compute the info for the method matches
+        op = unwrapva(argtypes[4])
+        v = unwrapva(argtypes[5])
+        TF = getfield_tfunc(o, f)
+        push!(sv.ssavalue_uses[sv.currpc], sv.currpc) # temporarily disable `call_result_unused` check for this call
+        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), sv, #=max_methods=# 1)
+        pop!(sv.ssavalue_uses[sv.currpc], sv.currpc)
+        TF2 = tmeet(callinfo.rt, widenconst(TF))
+        if TF2 === Bottom
+            RT = Bottom
+        elseif isconcretetype(RT) && has_nontrivial_const_info(TF2) # isconcrete condition required to form a PartialStruct
+            RT = PartialStruct(RT, Any[TF, TF2])
+        end
+        info = callinfo.info
+    end
+    return CallMeta(RT, info)
+end
 replacefield!_tfunc(o, f, x, v, success_order, failure_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
 replacefield!_tfunc(o, f, x, v, success_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
-replacefield!_tfunc(o, f, x, v) = (@nospecialize; T = getfield_tfunc(o, f); T === Bottom ? T : Tuple{widenconst(T), Bool})
+function replacefield!_tfunc(o, f, x, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(PT, T))[1]
+end
+
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
 
 add_tfunc(getfield, 2, 4, getfield_tfunc, 1)
@@ -1002,8 +1078,8 @@ function fieldtype_tfunc(@nospecialize(s0), @nospecialize(name))
 
     su = unwrap_unionall(s0)
     if isa(su, Union)
-        return tmerge(fieldtype_tfunc(rewrap(su.a, s0), name),
-                      fieldtype_tfunc(rewrap(su.b, s0), name))
+        return tmerge(fieldtype_tfunc(rewrap_unionall(su.a, s0), name),
+                      fieldtype_tfunc(rewrap_unionall(su.b, s0), name))
     end
 
     s, exact = instanceof_tfunc(s0)
@@ -1015,8 +1091,19 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
     exact = exact && !has_free_typevars(s)
     u = unwrap_unionall(s)
     if isa(u, Union)
-        return tmerge(_fieldtype_tfunc(rewrap(u.a, s), exact, name),
-                      _fieldtype_tfunc(rewrap(u.b, s), exact, name))
+        ta0 = _fieldtype_tfunc(rewrap_unionall(u.a, s), exact, name)
+        tb0 = _fieldtype_tfunc(rewrap_unionall(u.b, s), exact, name)
+        ta0 ⊑ tb0 && return tb0
+        tb0 ⊑ ta0 && return ta0
+        ta, exacta, _, istypea = instanceof_tfunc(ta0)
+        tb, exactb, _, istypeb = instanceof_tfunc(tb0)
+        if exact && exacta && exactb
+            return Const(Union{ta, tb})
+        end
+        if istypea && istypeb
+            return Type{<:Union{ta, tb}}
+        end
+        return Any
     end
     u isa DataType || return Any
     if isabstracttype(u)
@@ -1130,7 +1217,7 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
     else
         return false
     end
-    # We know the apply_type is well formed. Oherwise our rt would have been
+    # We know the apply_type is well formed. Otherwise our rt would have been
     # Bottom (or Type).
     (headtype === Union) && return true
     isa(rt, Const) && return true
@@ -1189,7 +1276,7 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
         return Any
     end
     if !isempty(args) && isvarargtype(args[end])
-        return isvarargtype(headtype) ? Core.TypeofVararg : Type
+        return isvarargtype(headtype) ? TypeofVararg : Type
     end
     largs = length(args)
     if headtype === Union
@@ -1207,7 +1294,7 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
                 end
             else
                 if !isType(ai)
-                    if !isa(ai, Type) || typeintersect(ai, Type) !== Bottom || typeintersect(ai, TypeVar) !== Bottom
+                    if !isa(ai, Type) || hasintersect(ai, Type) || hasintersect(ai, TypeVar)
                         hasnonType = true
                     else
                         return Bottom
@@ -1215,7 +1302,11 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
                 end
             end
         end
-        largs == 1 && return isa(args[1], Type) ? typeintersect(args[1], Type) : Type
+        if largs == 1 # Union{T} --> T
+            u1 = typeintersect(widenconst(args[1]), Type)
+            valid_as_lattice(u1) || return Bottom
+            return u1
+        end
         hasnonType && return Type
         ty = Union{}
         allconst = true
@@ -1250,7 +1341,7 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
             canconst &= !has_free_typevars(aip1)
             push!(tparams, aip1)
         elseif isa(ai, Const) && (isa(ai.val, Type) || isa(ai.val, TypeVar) ||
-                                  valid_tparam(ai.val) || (istuple && isa(ai.val, Core.TypeofVararg)))
+                                  valid_tparam(ai.val) || (istuple && isvarargtype(ai.val)))
             push!(tparams, ai.val)
         elseif isa(ai, PartialTypeVar)
             canconst = false
@@ -1316,11 +1407,11 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
     catch ex
         # type instantiation might fail if one of the type parameters
         # doesn't match, which could happen if a type estimate is too coarse
-        return isvarargtype(headtype) ? Core.TypeofVararg : Type{<:headtype}
+        return isvarargtype(headtype) ? TypeofVararg : Type{<:headtype}
     end
     !uncertain && canconst && return Const(appl)
     if isvarargtype(appl)
-        return Core.TypeofVararg
+        return TypeofVararg
     end
     if istuple
         return Type{<:appl}
@@ -1341,31 +1432,34 @@ end
 
 # convert the dispatch tuple type argtype to the real (concrete) type of
 # the tuple of those values
-function tuple_tfunc(atypes::Vector{Any})
-    atypes = anymap(widenconditional, atypes)
+function tuple_tfunc(argtypes::Vector{Any})
+    argtypes = anymap(widenconditional, argtypes)
     all_are_const = true
-    for i in 1:length(atypes)
-        if !isa(atypes[i], Const)
+    for i in 1:length(argtypes)
+        if !isa(argtypes[i], Const)
             all_are_const = false
             break
         end
     end
     if all_are_const
-        return Const(ntuple(i -> atypes[i].val, length(atypes)))
+        return Const(ntuple(i -> argtypes[i].val, length(argtypes)))
     end
-    params = Vector{Any}(undef, length(atypes))
+    params = Vector{Any}(undef, length(argtypes))
     anyinfo = false
-    for i in 1:length(atypes)
-        x = atypes[i]
+    for i in 1:length(argtypes)
+        x = argtypes[i]
         if has_struct_const_info(x)
             anyinfo = true
         else
-            atypes[i] = x = widenconst(x)
+            if !isvarargtype(x)
+                x = widenconst(x)
+            end
+            argtypes[i] = x
         end
         if isa(x, Const)
             params[i] = typeof(x.val)
         else
-            x = widenconst(x)
+            x = isvarargtype(x) ? x : widenconst(x)
             if isType(x)
                 anyinfo = true
                 xparam = x.parameters[1]
@@ -1382,28 +1476,31 @@ function tuple_tfunc(atypes::Vector{Any})
     typ = Tuple{params...}
     # replace a singleton type with its equivalent Const object
     isdefined(typ, :instance) && return Const(typ.instance)
-    return anyinfo ? PartialStruct(typ, atypes) : typ
+    return anyinfo ? PartialStruct(typ, argtypes) : typ
 end
 
 function arrayref_tfunc(@nospecialize(boundscheck), @nospecialize(a), @nospecialize i...)
     a = widenconst(a)
-    if a <: Array
-        if isa(a, DataType) && (isa(a.parameters[1], Type) || isa(a.parameters[1], TypeVar))
-            # TODO: the TypeVar case should not be needed here
-            a = a.parameters[1]
-            return isa(a, TypeVar) ? a.ub : a
-        elseif isa(a, UnionAll) && !has_free_typevars(a)
-            unw = unwrap_unionall(a)
-            if isa(unw, DataType)
-                return rewrap_unionall(unw.parameters[1], a)
-            end
+    if !has_free_typevars(a) && a <: Array
+        a0 = a
+        if isa(a, UnionAll)
+            a = unwrap_unionall(a0)
+        end
+        if isa(a, DataType)
+            T = a.parameters[1]
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(T, a0)
         end
     end
     return Any
 end
 add_tfunc(arrayref, 3, INT_INF, arrayref_tfunc, 20)
 add_tfunc(const_arrayref, 3, INT_INF, arrayref_tfunc, 20)
-add_tfunc(arrayset, 4, INT_INF, (@nospecialize(boundscheck), @nospecialize(a), @nospecialize(v), @nospecialize i...)->a, 20)
+function arrayset_tfunc(@nospecialize(boundscheck), @nospecialize(a), @nospecialize(v), @nospecialize i...)
+    # TODO: we could check that the type-intersect of arrayref_tfunc and v is non-empty or always throws
+    return a
+end
+add_tfunc(arrayset, 4, INT_INF, arrayset_tfunc, 20)
 
 function _opaque_closure_tfunc(@nospecialize(arg), @nospecialize(isva),
         @nospecialize(lb), @nospecialize(ub), @nospecialize(source), env::Vector{Any},
@@ -1426,6 +1523,7 @@ function _opaque_closure_tfunc(@nospecialize(arg), @nospecialize(isva),
     return PartialOpaque(t, tuple_tfunc(env), isva.val, linfo, source.val)
 end
 
+# whether getindex for the elements can potentially throw UndefRef
 function array_type_undefable(@nospecialize(a))
     if isa(a, Union)
         return array_type_undefable(a.a) || array_type_undefable(a.b)
@@ -1468,7 +1566,7 @@ function _builtin_nothrow(@nospecialize(f), argtypes::Array{Any,1}, @nospecializ
         # Check that we can determine the element type
         (isa(a, DataType) && isa(a.parameters[1], Type)) || return false
         # Check that the element type is compatible with the element we're assigning
-        (argtypes[3] ⊑ a.parameters[1]::Type) || return false
+        (argtypes[3] ⊑ a.parameters[1]) || return false
         return true
     elseif f === arrayref || f === const_arrayref
         return array_builtin_common_nothrow(argtypes, 3)
@@ -1553,7 +1651,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         if length(argtypes) - 1 == tf[2]
             argtypes = argtypes[1:end-1]
         else
-            vatype = argtypes[end]
+            vatype = argtypes[end]::TypeofVararg
             argtypes = argtypes[1:end-1]
             while length(argtypes) < tf[1]
                 push!(argtypes, unwrapva(vatype))
@@ -1659,13 +1757,13 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
             aft = argtypes[2]
             if isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
                    (isconcretetype(aft) && !(aft <: Builtin))
-                af_argtype = isa(tt, Const) ? tt.val : tt.parameters[1]
+                af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
                 if isa(af_argtype, DataType) && af_argtype <: Tuple
                     argtypes_vec = Any[aft, af_argtype.parameters...]
                     if contains_is(argtypes_vec, Union{})
                         return CallMeta(Const(Union{}), false)
                     end
-                    call = abstract_call(interp, nothing, argtypes_vec, sv, -1)
+                    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), sv, -1)
                     info = verbose_stmt_info(interp) ? ReturnTypeCallInfo(call.info) : false
                     rt = widenconditional(call.rt)
                     if isa(rt, Const)
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 4ad96ae2e72f0..411d606394309 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -1,10 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# build (and start inferring) the inference frame for the linfo
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cached::Bool)
-    frame = InferenceState(result, cached, interp)
+# build (and start inferring) the inference frame for the top-level MethodInstance
+function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
+    frame = InferenceState(result, cache, interp)
     frame === nothing && return false
-    cached && lock_mi_inference(interp, result.linfo)
+    cache === :global && lock_mi_inference(interp, result.linfo)
     return typeinf(interp, frame)
 end
 
@@ -243,7 +243,7 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     # collect results for the new expanded frame
     results = Tuple{InferenceResult, Vector{Any}, Bool}[
             ( frames[i].result,
-              frames[i].stmt_edges[1],
+              frames[i].stmt_edges[1]::Vector{Any},
               frames[i].cached )
         for i in 1:length(frames) ]
     empty!(frames)
@@ -291,7 +291,7 @@ function CodeInstance(result::InferenceResult, @nospecialize(inferred_result::An
     @assert !(result_type isa LimitedAccuracy)
     if inferred_result isa Const
         # use constant calling convention
-        rettype_const = (result.src::Const).val
+        rettype_const = inferred_result.val
         const_flags = 0x3
         inferred_result = nothing
     else
@@ -335,15 +335,17 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
     if toplevel
         return ci
     end
-    cache_the_tree = !may_discard_trees(interp) || (ci.inferred &&
-        (ci.inlineable ||
-        ccall(:jl_isa_compileable_sig, Int32, (Any, Any), linfo.specTypes, def) != 0))
+    if may_discard_trees(interp)
+        cache_the_tree = ci.inferred && (ci.inlineable || isa_compileable_sig(linfo.specTypes, def))
+    else
+        cache_the_tree = true
+    end
     if cache_the_tree
         if may_compress(interp)
             nslots = length(ci.slotflags)
-            resize!(ci.slottypes, nslots)
+            resize!(ci.slottypes::Vector{Any}, nslots)
             resize!(ci.slotnames, nslots)
-            return ccall(:jl_compress_ir, Any, (Any, Any), def, ci)
+            return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci)
         else
             return ci
         end
@@ -354,16 +356,10 @@ end
 
 function transform_result_for_cache(interp::AbstractInterpreter, linfo::MethodInstance,
                                     valid_worlds::WorldRange, @nospecialize(inferred_result))
-    local const_flags::Int32
     # If we decided not to optimize, drop the OptimizationState now.
     # External interpreters can override as necessary to cache additional information
     if inferred_result isa OptimizationState
-        opt = inferred_result
-        if isa(opt.src, CodeInfo)
-            inferred_result = ir_to_codeinf!(opt)
-        else
-            inferred_result = opt.src
-        end
+        inferred_result = ir_to_codeinf!(inferred_result)
     end
     if inferred_result isa CodeInfo
         inferred_result.min_world = first(valid_worlds)
@@ -386,17 +382,18 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
     end
     # check if the existing linfo metadata is also sufficient to describe the current inference result
     # to decide if it is worth caching this
-    already_inferred = already_inferred_quick_test(interp, result.linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), result.linfo)
+    linfo = result.linfo
+    already_inferred = already_inferred_quick_test(interp, linfo)
+    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
         already_inferred = true
     end
 
     # TODO: also don't store inferred code if we've previously decided to interpret this function
     if !already_inferred
-        inferred_result = transform_result_for_cache(interp, result.linfo, valid_worlds, result.src)
-        code_cache(interp)[result.linfo] = CodeInstance(result, inferred_result, valid_worlds)
+        inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result.src)
+        code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
     end
-    unlock_mi_inference(interp, result.linfo)
+    unlock_mi_inference(interp, linfo)
     nothing
 end
 
@@ -437,7 +434,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         empty!(edges)
     end
     if me.src.edges !== nothing
-        append!(s_edges, me.src.edges)
+        append!(s_edges, me.src.edges::Vector)
         me.src.edges = nothing
     end
     # inspect whether our inference had a limited result accuracy,
@@ -446,7 +443,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
     limited_ret = me.bestguess isa LimitedAccuracy
     limited_src = false
     if !limited_ret
-        gt = me.src.ssavaluetypes
+        gt = me.src.ssavaluetypes::Vector{Any}
         for j = 1:length(gt)
             gt[j] = gtj = cycle_fix_limited(gt[j], me)
             if gtj isa LimitedAccuracy && me.parent !== nothing
@@ -480,6 +477,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
     end
     me.result.valid_worlds = me.valid_worlds
     me.result.result = me.bestguess
+    validate_code_in_debug_mode(me.linfo, me.src, "inferred")
     nothing
 end
 
@@ -510,8 +508,9 @@ end
 
 # widen all Const elements in type annotations
 function widen_all_consts!(src::CodeInfo)
-    for i = 1:length(src.ssavaluetypes)
-        src.ssavaluetypes[i] = widenconst(src.ssavaluetypes[i])
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for i = 1:length(ssavaluetypes)
+        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
     end
 
     for i = 1:length(src.code)
@@ -576,6 +575,7 @@ function record_slot_assign!(sv::InferenceState)
     states = sv.stmt_types
     body = sv.src.code::Vector{Any}
     slottypes = sv.slottypes::Vector{Any}
+    ssavaluetypes = sv.src.ssavaluetypes::Vector{Any}
     for i = 1:length(body)
         expr = body[i]
         st_i = states[i]
@@ -584,7 +584,7 @@ function record_slot_assign!(sv::InferenceState)
             lhs = expr.args[1]
             rhs = expr.args[2]
             if isa(lhs, SlotNumber)
-                vt = widenconst(sv.src.ssavaluetypes[i])
+                vt = widenconst(ssavaluetypes[i])
                 if vt !== Bottom
                     id = slot_id(lhs)
                     otherTy = slottypes[id]
@@ -607,12 +607,11 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
     # (otherwise, we'll perhaps run the optimization passes later, outside of inference)
 
     # remove all unused ssa values
-    gt = sv.src.ssavaluetypes
-    for j = 1:length(gt)
-        if gt[j] === NOT_FOUND
-            gt[j] = Union{}
-        end
-        gt[j] = widenconditional(gt[j])
+    src = sv.src
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for j = 1:length(ssavaluetypes)
+        t = ssavaluetypes[j]
+        ssavaluetypes[j] = t === NOT_FOUND ? Union{} : widenconditional(t)
     end
 
     # compute the required type for each slot
@@ -625,7 +624,6 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
     # annotate variables load types
     # remove dead code optimization
     # and compute which variables may be used undef
-    src = sv.src
     states = sv.stmt_types
     nargs = sv.nargs
     nslots = length(states[1]::VarTable)
@@ -638,7 +636,7 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
         expr = body[i]
         if isa(expr, GotoIfNot)
             if !isa(states[expr.dest], VarTable)
-                body[i] = expr.cond
+                body[i] = Expr(:call, GlobalRef(Core, :typeassert), expr.cond, GlobalRef(Core, :Bool))
             end
         end
     end
@@ -668,9 +666,10 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
             elseif run_optimizer
                 deleteat!(body, i)
                 deleteat!(states, i)
-                deleteat!(src.ssavaluetypes, i)
+                deleteat!(ssavaluetypes, i)
                 deleteat!(src.codelocs, i)
                 deleteat!(sv.stmt_info, i)
+                deleteat!(src.ssaflags, i)
                 nexpr -= 1
                 changemap[oldidx] = -1
                 continue
@@ -775,26 +774,36 @@ function resolve_call_cycle!(interp::AbstractInterpreter, linfo::MethodInstance,
 end
 
 # compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atypes), sparams::SimpleVector, caller::InferenceState)
-    mi = specialize_method(method, atypes, sparams)::MethodInstance
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::InferenceState)
+    mi = specialize_method(method, atype, sparams)::MethodInstance
     code = get(code_cache(interp), mi, nothing)
     if code isa CodeInstance # return existing rettype if the code is already inferred
-        update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
-        rettype = code.rettype
-        if isdefined(code, :rettype_const)
-            rettype_const = code.rettype_const
-            if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
-                return PartialStruct(rettype, rettype_const), mi
-            elseif rettype <: Core.OpaqueClosure && isa(rettype_const, PartialOpaque)
-                return rettype_const, mi
-            elseif isa(rettype_const, InterConditional)
-                return rettype_const, mi
+        if code.inferred === nothing && is_stmt_inline(get_curr_ssaflag(caller))
+            # we already inferred this edge previously and decided to discarded the inferred code
+            # but the inlinear will request to use it, we re-infer it here and keep it around in the local cache
+            cache = :local
+        else
+            update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
+            rettype = code.rettype
+            if isdefined(code, :rettype_const)
+                rettype_const = code.rettype_const
+                # the second subtyping conditions are necessary to distinguish usual cases
+                # from rare cases when `Const` wrapped those extended lattice type objects
+                if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
+                    return PartialStruct(rettype, rettype_const), mi
+                elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
+                    return rettype_const, mi
+                elseif isa(rettype_const, InterConditional) && !(InterConditional <: rettype)
+                    return rettype_const, mi
+                else
+                    return Const(rettype_const), mi
+                end
             else
-                return Const(rettype_const), mi
+                return rettype, mi
             end
-        else
-            return rettype, mi
         end
+    else
+        cache = :global # cache edge targets by default
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0
         return Any, nothing
@@ -810,7 +819,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         # completely new
         lock_mi_inference(interp, mi)
         result = InferenceResult(mi)
-        frame = InferenceState(result, #=cached=#true, interp) # always use the cache for edge targets
+        frame = InferenceState(result, cache, interp) # always use the cache for edge targets
         if frame === nothing
             # can't get the source for this, so we know nothing
             unlock_mi_inference(interp, mi)
@@ -835,18 +844,13 @@ end
 #### entry points for inferring a MethodInstance given a type signature ####
 
 # compute an inferred AST and return type
-function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atypes), sparams::SimpleVector, run_optimizer::Bool)
-    mi = specialize_method(method, atypes, sparams)::MethodInstance
+function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
+    mi = specialize_method(method, atype, sparams)::MethodInstance
     ccall(:jl_typeinf_begin, Cvoid, ())
     result = InferenceResult(mi)
-    frame = InferenceState(result, false, interp)
+    frame = InferenceState(result, run_optimizer ? :global : :no, interp)
     frame === nothing && return (nothing, Any)
-    if typeinf(interp, frame) && run_optimizer
-        opt_params = OptimizationParams(interp)
-        result.src = src = OptimizationState(frame, opt_params, interp)
-        optimize(interp, src, opt_params, ignorelimited(result.result))
-        frame.src = finish!(interp, result)
-    end
+    typeinf(interp, frame)
     ccall(:jl_typeinf_end, Cvoid, ())
     frame.inferred || return (nothing, Any)
     return (frame.src, widenconst(ignorelimited(result.result)))
@@ -868,7 +872,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
                 tree.code = Any[ ReturnNode(quoted(rettype_const)) ]
                 nargs = Int(method.nargs)
                 tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
-                tree.slotflags = fill(0x00, nargs)
+                tree.slotflags = fill(IR_FLAG_NULL, nargs)
                 tree.ssavaluetypes = 1
                 tree.codelocs = Int32[1]
                 tree.linetable = [LineInfoNode(method.module, method.name, method.file, Int(method.line), 0)]
@@ -903,7 +907,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
         return retrieve_code_info(mi)
     end
     lock_mi_inference(interp, mi)
-    frame = InferenceState(InferenceResult(mi), #=cached=#true, interp)
+    frame = InferenceState(InferenceResult(mi), #=cache=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
     ccall(:jl_typeinf_end, Cvoid, ())
@@ -912,11 +916,11 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
 end
 
 # compute (and cache) an inferred AST and return the inferred return type
-function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atypes), sparams::SimpleVector)
-    if contains_is(unwrap_unionall(atypes).parameters, Union{})
+function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector)
+    if contains_is(unwrap_unionall(atype).parameters, Union{})
         return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
     end
-    mi = specialize_method(method, atypes, sparams)::MethodInstance
+    mi = specialize_method(method, atype, sparams)::MethodInstance
     for i = 1:2 # test-and-lock-and-test
         i == 2 && ccall(:jl_typeinf_begin, Cvoid, ())
         code = get(code_cache(interp), mi, nothing)
@@ -926,11 +930,11 @@ function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize
             return code.rettype
         end
     end
-    frame = InferenceResult(mi)
-    typeinf(interp, frame, true)
+    result = InferenceResult(mi)
+    typeinf(interp, result, :global)
     ccall(:jl_typeinf_end, Cvoid, ())
-    frame.result isa InferenceState && return nothing
-    return widenconst(ignorelimited(frame.result))
+    result.result isa InferenceState && return nothing
+    return widenconst(ignorelimited(result.result))
 end
 
 # This is a bridge for the C code calling `jl_typeinf_func()`
@@ -946,7 +950,7 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
             ccall(:jl_typeinf_begin, Cvoid, ())
             if !src.inferred
                 result = InferenceResult(linfo)
-                frame = InferenceState(result, src, #=cached=#true, interp)
+                frame = InferenceState(result, src, #=cache=#:global, interp)
                 typeinf(interp, frame)
                 @assert frame.inferred # TODO: deal with this better
                 src = frame.src
@@ -957,7 +961,6 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
     return src
 end
 
-
 function return_type(@nospecialize(f), @nospecialize(t))
     world = ccall(:jl_get_tls_world_age, UInt, ())
     return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), Any[_return_type, f, t, world], 4)
@@ -969,14 +972,10 @@ function _return_type(interp::AbstractInterpreter, @nospecialize(f), @nospeciali
     rt = Union{}
     if isa(f, Builtin)
         rt = builtin_tfunction(interp, f, Any[t.parameters...], nothing)
-        if isa(rt, TypeVar)
-            rt = rt.ub
-        else
-            rt = widenconst(rt)
-        end
+        rt = widenconst(rt)
     else
         for match in _methods(f, t, -1, get_world_counter(interp))::Vector
-            match = match::Core.MethodMatch
+            match = match::MethodMatch
             ty = typeinf_type(interp, match.method, match.spec_types, match.sparams)
             ty === nothing && return Any
             rt = tmerge(rt, ty)
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 6391d4029b58e..1f55ceb94a062 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -73,16 +73,6 @@ struct MaybeUndef
     MaybeUndef(@nospecialize(typ)) = new(typ)
 end
 
-# The type of a variable load is either a value or an UndefVarError
-# (only used in abstractinterpret, doesn't appear in optimize)
-struct VarState
-    typ
-    undef::Bool
-    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
-end
-
-const VarTable = Array{Any,1}
-
 struct StateUpdate
     var::SlotNumber
     vtype::VarState
@@ -96,10 +86,21 @@ end
 struct LimitedAccuracy
     typ
     causes::IdSet{InferenceState}
-    LimitedAccuracy(@nospecialize(typ), causes::IdSet{InferenceState}) =
-        new(typ, causes)
+    function LimitedAccuracy(@nospecialize(typ), causes::IdSet{InferenceState})
+        @assert !isa(typ, LimitedAccuracy) "malformed LimitedAccuracy"
+        return new(typ, causes)
+    end
 end
 
+"""
+    struct NotFound end
+    const NOT_FOUND = NotFound()
+
+A special sigleton that represents a variable has not been analyzed yet.
+Particularly, all SSA value types are initialized as `NOT_FOUND` when creating a new `InferenceState`.
+Note that this is only used for `smerge`, which updates abstract state `VarTable`,
+and thus we don't define the lattice for this.
+"""
 struct NotFound end
 
 const NOT_FOUND = NotFound()
@@ -139,7 +140,12 @@ function maybe_extract_const_bool(c::AnyConditional)
 end
 maybe_extract_const_bool(@nospecialize c) = nothing
 
-function ⊑(@nospecialize(a), @nospecialize(b))
+"""
+    a ⊑ b -> Bool
+
+The non-strict partial order over the type inference lattice.
+"""
+@nospecialize(a) ⊑ @nospecialize(b) = begin
     if isa(b, LimitedAccuracy)
         if !isa(a, LimitedAccuracy)
             return false
@@ -231,6 +237,22 @@ function ⊑(@nospecialize(a), @nospecialize(b))
     end
 end
 
+"""
+    a ⊏ b -> Bool
+
+The strict partial order over the type inference lattice.
+This is defined as the irreflexive kernel of `⊑`.
+"""
+@nospecialize(a) ⊏ @nospecialize(b) = a ⊑ b && !⊑(b, a)
+
+"""
+    a ⋤ b -> Bool
+
+This order could be used as a slightly more efficient version of the strict order `⊏`,
+where we can safely assume `a ⊑ b` holds.
+"""
+@nospecialize(a) ⋤ @nospecialize(b) = !⊑(b, a)
+
 # Check if two lattice elements are partial order equivalent. This is basically
 # `a ⊑ b && b ⊑ a` but with extra performance optimizations.
 function is_lattice_equal(@nospecialize(a), @nospecialize(b))
@@ -268,23 +290,14 @@ function is_lattice_equal(@nospecialize(a), @nospecialize(b))
 end
 
 widenconst(c::AnyConditional) = Bool
-function widenconst(c::Const)
-    if isa(c.val, Type)
-        if isvarargtype(c.val)
-            return Type
-        end
-        return Type{c.val}
-    else
-        return typeof(c.val)
-    end
-end
+widenconst((; val)::Const) = isa(val, Type) ? Type{val} : typeof(val)
 widenconst(m::MaybeUndef) = widenconst(m.typ)
 widenconst(c::PartialTypeVar) = TypeVar
 widenconst(t::PartialStruct) = t.typ
 widenconst(t::PartialOpaque) = t.typ
 widenconst(t::Type) = t
-widenconst(t::TypeVar) = t
-widenconst(t::Core.TypeofVararg) = t
+widenconst(t::TypeVar) = error("unhandled TypeVar")
+widenconst(t::TypeofVararg) = error("unhandled Vararg")
 widenconst(t::LimitedAccuracy) = error("unhandled LimitedAccuracy")
 
 issubstate(a::VarState, b::VarState) = (a.typ ⊑ b.typ && a.undef <= b.undef)
@@ -299,7 +312,7 @@ function smerge(sa::Union{NotFound,VarState}, sb::Union{NotFound,VarState})
 end
 
 @inline tchanged(@nospecialize(n), @nospecialize(o)) = o === NOT_FOUND || (n !== NOT_FOUND && !(n ⊑ o))
-@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n, o)))
+@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n::VarState, o::VarState)))
 
 widenconditional(@nospecialize typ) = typ
 function widenconditional(typ::AnyConditional)
@@ -396,7 +409,7 @@ function stupdate1!(state::VarTable, change::StateUpdate)
                 if isa(oldtypetyp, Conditional) && slot_id(oldtypetyp.var) == changeid
                     oldtypetyp = widenconditional(oldtypetyp)
                     if oldtype.typ isa LimitedAccuracy
-                        oldtypetyp = LimitedAccuracy(oldtypetyp, oldtype.typ.causes)
+                        oldtypetyp = LimitedAccuracy(oldtypetyp, (oldtype.typ::LimitedAccuracy).causes)
                     end
                     state[i] = VarState(oldtypetyp, oldtype.undef)
                 end
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index 3145517630958..7641700552c42 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -39,6 +39,8 @@ function is_derived_type(@nospecialize(t), @nospecialize(c), mindepth::Int)
     if t === c
         return mindepth <= 1
     end
+    isvarargtype(t) && (t = unwrapva(t))
+    isvarargtype(c) && (c = unwrapva(c))
     if isa(c, Union)
         # see if it is one of the elements of the union
         return is_derived_type(t, c.a, mindepth) || is_derived_type(t, c.b, mindepth)
@@ -46,8 +48,6 @@ function is_derived_type(@nospecialize(t), @nospecialize(c), mindepth::Int)
         # see if it is derived from the body
         # also handle the var here, since this construct bounds the mindepth to the smallest possible value
         return is_derived_type(t, c.var.ub, mindepth) || is_derived_type(t, c.body, mindepth)
-    elseif isa(c, Core.TypeofVararg)
-        return is_derived_type(t, unwrapva(c), mindepth)
     elseif isa(c, DataType)
         if mindepth > 0
             mindepth -= 1
@@ -79,6 +79,7 @@ end
 # The goal of this function is to return a type of greater "size" and less "complexity" than
 # both `t` or `c` over the lattice defined by `sources`, `depth`, and `allowed_tuplelen`.
 function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVector, depth::Int, allowed_tuplelen::Int)
+    @assert isa(t, Type) && isa(c, Type) "unhandled TypeVar / Vararg"
     if t === c
         return t # quick egal test
     elseif t === Union{}
@@ -98,43 +99,29 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
     # first attempt to turn `c` into a type that contributes meaningful information
     # by peeling off meaningless non-matching wrappers of comparison one at a time
     # then unwrap `t`
-    if isa(c, TypeVar)
-        if isa(t, TypeVar) && t.ub === c.ub && (t.lb === Union{} || t.lb === c.lb)
-            return t # it's ok to change the name, or widen `lb` to Union{}, so we can handle this immediately here
-        end
-        return _limit_type_size(t, c.ub, sources, depth, allowed_tuplelen)
-    end
+    # NOTE that `TypeVar` / `Vararg` are handled separately to catch the logic errors
     if isa(c, UnionAll)
-        return _limit_type_size(t, c.body, sources, depth, allowed_tuplelen)
+        return __limit_type_size(t, c.body, sources, depth, allowed_tuplelen)::Type
     end
     if isa(t, UnionAll)
-        tbody = _limit_type_size(t.body, c, sources, depth, allowed_tuplelen)
+        tbody = __limit_type_size(t.body, c, sources, depth, allowed_tuplelen)
         tbody === t.body && return t
-        return UnionAll(t.var, tbody)
-    elseif isa(t, TypeVar)
-        # don't have a matching TypeVar in comparison, so we keep just the upper bound
-        return _limit_type_size(t.ub, c, sources, depth, allowed_tuplelen)
+        return UnionAll(t.var, tbody)::Type
     elseif isa(t, Union)
         if isa(c, Union)
-            a = _limit_type_size(t.a, c.a, sources, depth, allowed_tuplelen)
-            b = _limit_type_size(t.b, c.b, sources, depth, allowed_tuplelen)
+            a = __limit_type_size(t.a, c.a, sources, depth, allowed_tuplelen)
+            b = __limit_type_size(t.b, c.b, sources, depth, allowed_tuplelen)
             return Union{a, b}
         end
-    elseif isa(t, Core.TypeofVararg)
-        isa(c, Core.TypeofVararg) || return Vararg
-        VaT = _limit_type_size(unwrapva(t), unwrapva(c), sources, depth + 1, 0)
-        if isdefined(t, :N) && (isa(t.N, TypeVar) || (isdefined(c, :N) && t.N === c.N))
-            return Vararg{VaT, t.N}
-        end
-        return Vararg{VaT}
     elseif isa(t, DataType)
-        if isa(c, Core.TypeofVararg)
-            # Tuple{Vararg{T}} --> Tuple{T} is OK
-            return _limit_type_size(t, c.T, sources, depth, 0)
-        elseif isType(t) # allow taking typeof as Type{...}, but ensure it doesn't start nesting
+        if isType(t) # see equivalent case in type_more_complex
             tt = unwrap_unionall(t.parameters[1])
-            (!isa(tt, DataType) || isType(tt)) && (depth += 1)
-            is_derived_type_from_any(tt, sources, depth) && return t
+            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
+                is_derived_type_from_any(tt, sources, depth + 1) && return t
+            else
+                isType(c) && (c = unwrap_unionall(c.parameters[1]))
+                type_more_complex(tt, c, sources, depth, 0, 0) || return t
+            end
             return Type
         elseif isa(c, DataType)
             tP = t.parameters
@@ -161,7 +148,7 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
                         else
                             cPi = Any
                         end
-                        Q[i] = _limit_type_size(Q[i], cPi, sources, depth + 1, 0)
+                        Q[i] = __limit_type_size(Q[i], cPi, sources, depth + 1, 0)
                     end
                     return Tuple{Q...}
                 end
@@ -182,6 +169,38 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
     return Any
 end
 
+# helper function of `_limit_type_size`, which has the right to take and return `TypeVar` / `Vararg`
+function __limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVector, depth::Int, allowed_tuplelen::Int)
+    cN = 0
+    if isvarargtype(c) # Tuple{Vararg{T}} --> Tuple{T} is OK
+        isdefined(c, :N) && (cN = c.N)
+        c = unwrapva(c)
+    end
+    if isa(c, TypeVar)
+        if isa(t, TypeVar) && t.ub === c.ub && (t.lb === Union{} || t.lb === c.lb)
+            return t # it's ok to change the name, or widen `lb` to Union{}, so we can handle this immediately here
+        end
+        return __limit_type_size(t, c.ub, sources, depth, allowed_tuplelen)
+    elseif isa(t, TypeVar)
+        # don't have a matching TypeVar in comparison, so we keep just the upper bound
+        return __limit_type_size(t.ub, c, sources, depth, allowed_tuplelen)
+    elseif isvarargtype(t)
+        # Tuple{Vararg{T,N}} --> Tuple{Vararg{S,M}} is OK
+        # Tuple{T} --> Tuple{Vararg{T}} is OK
+        # but S must be more limited than T, and must not introduce a new number for M
+        VaT = __limit_type_size(unwrapva(t), c, sources, depth + 1, 0)
+        if isdefined(t, :N)
+            tN = t.N
+            if isa(tN, TypeVar) || tN === cN
+                return Vararg{VaT, tN}
+            end
+        end
+        return Vararg{VaT}
+    else
+        return _limit_type_size(t, c, sources, depth, allowed_tuplelen)
+    end
+end
+
 function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVector, depth::Int, tupledepth::Int, allowed_tuplelen::Int)
     # detect cases where the comparison is trivial
     if t === c
@@ -197,6 +216,8 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return false # t isn't something new
     end
     # peel off wrappers
+    isvarargtype(t) && (t = unwrapva(t))
+    isvarargtype(c) && (c = unwrapva(c))
     if isa(c, UnionAll)
         # allow wrapping type with fewer UnionAlls than comparison if in a covariant context
         if !isa(t, UnionAll) && tupledepth == 0
@@ -225,18 +246,19 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return t !== 1 && !(0 <= t < c) # alternatively, could use !(abs(t) <= abs(c) || abs(t) < n) for some n
     end
     # base case for data types
-    if isa(t, Core.TypeofVararg)
-        if isa(c, Core.TypeofVararg)
-            return type_more_complex(unwrapva(t), unwrapva(c), sources, depth + 1, tupledepth, 0)
-        end
-    elseif isa(t, DataType)
+    if isa(t, DataType)
         tP = t.parameters
-        if isa(c, Core.TypeofVararg)
-            return type_more_complex(t, unwrapva(c), sources, depth, tupledepth, 0)
-        elseif isType(t) # allow taking typeof any source type anywhere as Type{...}, as long as it isn't nesting Type{Type{...}}
+        if isType(t)
+            # Treat Type{T} and T as equivalent to allow taking typeof any
+            # source type (DataType) anywhere as Type{...}, as long as it isn't
+            # nesting as Type{Type{...}}
             tt = unwrap_unionall(t.parameters[1])
-            (!isa(tt, DataType) || isType(tt)) && (depth += 1)
-            return !is_derived_type_from_any(tt, sources, depth)
+            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
+                return !is_derived_type_from_any(tt, sources, depth + 1)
+            else
+                isType(c) && (c = unwrap_unionall(c.parameters[1]))
+                return type_more_complex(tt, c, sources, depth, 0, 0)
+            end
         elseif isa(c, DataType) && t.name === c.name
             cP = c.parameters
             length(cP) < length(tP) && return true
@@ -377,26 +399,25 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
         return Bool
     end
     # type-lattice for Const and PartialStruct wrappers
-    if (isa(typea, PartialStruct) || isa(typea, Const)) &&
-       (isa(typeb, PartialStruct) || isa(typeb, Const)) &&
-        widenconst(typea) === widenconst(typeb)
+    if ((isa(typea, PartialStruct) || isa(typea, Const)) &&
+        (isa(typeb, PartialStruct) || isa(typeb, Const)) &&
+        widenconst(typea) === widenconst(typeb))
 
-       typea_nfields = nfields_tfunc(typea)
-       typeb_nfields = nfields_tfunc(typeb)
-       if !isa(typea_nfields, Const) || !isa(typeb_nfields, Const) || typea_nfields.val !== typeb_nfields.val
+        typea_nfields = nfields_tfunc(typea)
+        typeb_nfields = nfields_tfunc(typeb)
+        if !isa(typea_nfields, Const) || !isa(typeb_nfields, Const) || typea_nfields.val !== typeb_nfields.val
             return widenconst(typea)
-       end
+        end
 
-       type_nfields = typea_nfields.val::Int
-       fields = Vector{Any}(undef, type_nfields)
-       anyconst = false
-       for i = 1:type_nfields
+        type_nfields = typea_nfields.val::Int
+        fields = Vector{Any}(undef, type_nfields)
+        anyconst = false
+        for i = 1:type_nfields
             fields[i] = tmerge(getfield_tfunc(typea, Const(i)),
                                getfield_tfunc(typeb, Const(i)))
             anyconst |= has_nontrivial_const_info(fields[i])
-       end
-       return anyconst ? PartialStruct(widenconst(typea), fields) :
-            widenconst(typea)
+        end
+        return anyconst ? PartialStruct(widenconst(typea), fields) : widenconst(typea)
     end
     if isa(typea, PartialOpaque) && isa(typeb, PartialOpaque) && widenconst(typea) == widenconst(typeb)
         if !(typea.source === typeb.source &&
@@ -429,7 +450,8 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
         # bail if everything isn't a well-formed DataType
         ti = types[i]
         uw = unwrap_unionall(ti)
-        (uw isa DataType && ti <: uw.name.wrapper) || return Any
+        uw isa DataType || return Any
+        ti <: uw.name.wrapper || return Any
         typenames[i] = uw.name
     end
     u = Union{types...}
@@ -598,16 +620,15 @@ function tmeet(@nospecialize(v), @nospecialize(t))
             return v
         end
         ti = typeintersect(widev, t)
-        if ti === Bottom
-            return Bottom
-        end
+        valid_as_lattice(ti) || return Bottom
         @assert widev <: Tuple
         new_fields = Vector{Any}(undef, length(v.fields))
         for i = 1:length(new_fields)
-            if isa(v.fields[i], Core.TypeofVararg)
-                new_fields[i] = v.fields[i]
+            vfi = v.fields[i]
+            if isvarargtype(vfi)
+                new_fields[i] = vfi
             else
-                new_fields[i] = tmeet(v.fields[i], widenconst(getfield_tfunc(t, Const(i))))
+                new_fields[i] = tmeet(vfi, widenconst(getfield_tfunc(t, Const(i))))
                 if new_fields[i] === Bottom
                     return Bottom
                 end
@@ -620,5 +641,7 @@ function tmeet(@nospecialize(v), @nospecialize(t))
         end
         return v
     end
-    return typeintersect(widenconst(v), t)
+    ti = typeintersect(widenconst(v), t)
+    valid_as_lattice(ti) || return Bottom
+    return ti
 end
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 1a89d5e994b15..accf095650e82 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -17,6 +17,11 @@ If `interp` is an `AbstractInterpreter`, it is expected to provide at least the
 """
 abstract type AbstractInterpreter end
 
+struct ArgInfo
+    fargs::Union{Nothing,Vector{Any}}
+    argtypes::Vector{Any}
+end
+
 """
     InferenceResult
 
@@ -29,13 +34,14 @@ mutable struct InferenceResult
     result # ::Type, or InferenceState if WIP
     src #::Union{CodeInfo, OptimizationState, Nothing} # if inferred copy is available
     valid_worlds::WorldRange # if inference and optimization is finished
-    function InferenceResult(linfo::MethodInstance, given_argtypes = nothing, va_override=false)
-        argtypes, overridden_by_const = matching_cache_argtypes(linfo, given_argtypes, va_override)
+    function InferenceResult(linfo::MethodInstance,
+                             arginfo#=::Union{Nothing,Tuple{ArgInfo,InferenceState}}=# = nothing,
+                             va_override::Bool = false)
+        argtypes, overridden_by_const = matching_cache_argtypes(linfo, arginfo, va_override)
         return new(linfo, argtypes, overridden_by_const, Any, nothing, WorldRange())
     end
 end
 
-
 """
     OptimizationParams
 
@@ -54,8 +60,6 @@ struct OptimizationParams
     MAX_TUPLE_SPLAT::Int
     MAX_UNION_SPLITTING::Int
 
-    unoptimize_throw_blocks::Bool
-
     function OptimizationParams(;
             inlining::Bool = inlining_enabled(),
             inline_cost_threshold::Int = 100,
@@ -65,7 +69,6 @@ struct OptimizationParams
             max_methods::Int = 3,
             tuple_splat::Int = 32,
             union_splitting::Int = 4,
-            unoptimize_throw_blocks::Bool = true,
         )
         return new(
             inlining,
@@ -76,7 +79,6 @@ struct OptimizationParams
             max_methods,
             tuple_splat,
             union_splitting,
-            unoptimize_throw_blocks,
         )
     end
 end
@@ -218,7 +220,6 @@ may_discard_trees(::AbstractInterpreter) = true
 verbose_stmt_info(::AbstractInterpreter) = false
 
 method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp))
-inlining_policy(::AbstractInterpreter) = default_inlining_policy
 
 """
 By default `AbstractInterpreter` implements the following inference bail out logic:
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index c869759f97d9a..f7f2aaece09ed 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -4,13 +4,6 @@
 # lattice utilities #
 #####################
 
-function rewrap(@nospecialize(t), @nospecialize(u))
-    if isa(t, TypeVar) || isa(t, Type) || isa(t, Core.TypeofVararg)
-        return rewrap_unionall(t, u)
-    end
-    return t
-end
-
 isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
 
 # true if Type{T} is inlineable as constant T
@@ -42,15 +35,13 @@ end
 
 has_const_info(@nospecialize x) = (!isa(x, Type) && !isvarargtype(x)) || isType(x)
 
-has_concrete_subtype(d::DataType) = d.flags & 0x20 == 0x20
-
 # Subtyping currently intentionally answers certain queries incorrectly for kind types. For
 # some of these queries, this check can be used to somewhat protect against making incorrect
 # decisions based on incorrect subtyping. Note that this check, itself, is broken for
 # certain combinations of `a` and `b` where one/both isa/are `Union`/`UnionAll` type(s)s.
 isnotbrokensubtype(@nospecialize(a), @nospecialize(b)) = (!iskindtype(b) || !isType(a) || hasuniquerep(a.parameters[1]) || b <: a)
 
-argtypes_to_type(argtypes::Array{Any,1}) = Tuple{anymap(widenconst, argtypes)...}
+argtypes_to_type(argtypes::Array{Any,1}) = Tuple{anymap(@nospecialize(a) -> isvarargtype(a) ? a : widenconst(a), argtypes)...}
 
 function isknownlength(t::DataType)
     isvatuple(t) || return true
@@ -89,6 +80,30 @@ function datatype_min_ninitialized(t::DataType)
     return length(t.name.names) - t.name.n_uninitialized
 end
 
+has_concrete_subtype(d::DataType) = d.flags & 0x20 == 0x20 # n.b. often computed only after setting the type and layout fields
+
+# determine whether x is a valid lattice element tag
+# For example, Type{v} is not valid if v is a value
+# Accepts TypeVars also, since it assumes the user will rewrap it correctly
+function valid_as_lattice(@nospecialize(x))
+    x === Bottom && false
+    x isa TypeVar && return valid_as_lattice(x.ub)
+    x isa UnionAll && (x = unwrap_unionall(x))
+    if x isa Union
+        # the Union constructor ensures this (and we'll recheck after
+        # operations that might remove the Union itself)
+        return true
+    end
+    if x isa DataType
+        if isType(x)
+            p = x.parameters[1]
+            p isa Type || p isa TypeVar || return false
+        end
+        return true
+    end
+    return false
+end
+
 # test if non-Type, non-TypeVar `x` can be used to parameterize a type
 function valid_tparam(@nospecialize(x))
     if isa(x, Tuple)
@@ -103,8 +118,8 @@ end
 function compatible_vatuple(a::DataType, b::DataType)
     vaa = a.parameters[end]
     vab = a.parameters[end]
-    if !(isa(vaa, Core.TypeofVararg) && isa(vab, Core.TypeofVararg))
-        return isa(vaa, Core.TypeofVararg) == isa(vab, Core.TypeofVararg)
+    if !(isvarargtype(vaa) && isvarargtype(vab))
+        return isvarargtype(vaa) == isvarargtype(vab)
     end
     (isdefined(vaa, :N) == isdefined(vab, :N)) || return false
     !isdefined(vaa, :N) && return true
@@ -119,8 +134,10 @@ function typesubtract(@nospecialize(a), @nospecialize(b), MAX_UNION_SPLITTING::I
     end
     ua = unwrap_unionall(a)
     if isa(ua, Union)
-        return Union{typesubtract(rewrap_unionall(ua.a, a), b, MAX_UNION_SPLITTING),
-                     typesubtract(rewrap_unionall(ua.b, a), b, MAX_UNION_SPLITTING)}
+        uua = typesubtract(rewrap_unionall(ua.a, a), b, MAX_UNION_SPLITTING)
+        uub = typesubtract(rewrap_unionall(ua.b, a), b, MAX_UNION_SPLITTING)
+        return Union{valid_as_lattice(uua) ? uua : Union{},
+                     valid_as_lattice(uub) ? uub : Union{}}
     elseif a isa DataType
         ub = unwrap_unionall(b)
         if ub isa DataType
@@ -146,7 +163,7 @@ function typesubtract(@nospecialize(a), @nospecialize(b), MAX_UNION_SPLITTING::I
                             ta = collect(a.parameters)
                             ap = a.parameters[i]
                             bp = b.parameters[i]
-                            (isa(ap, Core.TypeofVararg) || isa(bp, Core.TypeofVararg)) && return a
+                            (isvarargtype(ap) || isvarargtype(bp)) && return a
                             ta[i] = typesubtract(ap, bp, min(2, MAX_UNION_SPLITTING))
                             return Tuple{ta...}
                         end
@@ -158,6 +175,8 @@ function typesubtract(@nospecialize(a), @nospecialize(b), MAX_UNION_SPLITTING::I
     return a # TODO: improve this bound?
 end
 
+hasintersect(@nospecialize(a), @nospecialize(b)) = typeintersect(a, b) !== Bottom
+
 function tvar_extent(@nospecialize t)
     while t isa TypeVar
         t = t.ub
@@ -193,10 +212,10 @@ end
 # or outside of the Tuple/Union nesting, though somewhat more expensive to be
 # outside than inside because the representation is larger (because and it
 # informs the callee whether any splitting is possible).
-function unionsplitcost(atypes::Union{SimpleVector,Vector{Any}})
+function unionsplitcost(argtypes::Union{SimpleVector,Vector{Any}})
     nu = 1
     max = 2
-    for ti in atypes
+    for ti in argtypes
         if isa(ti, Union)
             nti = unionlen(ti)
             if nti > max
@@ -256,18 +275,9 @@ function unioncomplexity(t::DataType)
     return c
 end
 unioncomplexity(u::UnionAll) = max(unioncomplexity(u.body)::Int, unioncomplexity(u.var.ub)::Int)
-unioncomplexity(t::Core.TypeofVararg) = isdefined(t, :T) ? unioncomplexity(t.T)::Int : 0
+unioncomplexity(t::TypeofVararg) = isdefined(t, :T) ? unioncomplexity(t.T)::Int : 0
 unioncomplexity(@nospecialize(x)) = 0
 
-function improvable_via_constant_propagation(@nospecialize(t))
-    if isconcretetype(t) && t <: Tuple
-        for p in t.parameters
-            p === DataType && return true
-        end
-    end
-    return false
-end
-
 # convert a Union of Tuple types to a Tuple of Unions
 function unswitchtupleunion(u::Union)
     ts = uniontypes(u)
@@ -285,3 +295,10 @@ function unswitchtupleunion(u::Union)
     end
     Tuple{Any[ Union{Any[t.parameters[i] for t in ts]...} for i in 1:n ]...}
 end
+
+function unwraptv(@nospecialize t)
+    while isa(t, TypeVar)
+        t = t.ub
+    end
+    return t
+end
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index ed31e382d1152..2a3a975a4551d 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -59,7 +59,7 @@ end
 
 # Meta expression head, these generally can't be deleted even when they are
 # in a dead branch but can be ignored when analyzing uses/liveness.
-is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo)
+is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
 
 sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0
 
@@ -137,70 +137,78 @@ function retrieve_code_info(linfo::MethodInstance)
     return nothing
 end
 
-# Get at the nonfunction_mt, which happens to be the mt of SimpleVector
-const nonfunction_mt = typename(SimpleVector).mt
-
-function get_compileable_sig(method::Method, @nospecialize(atypes), sparams::SimpleVector)
-    isa(atypes, DataType) || return nothing
-    mt = ccall(:jl_method_table_for, Any, (Any,), atypes)
+function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    isa(atype, DataType) || return nothing
+    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
     mt === nothing && return nothing
     return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any),
-        mt, atypes, sparams, method)
+        mt, atype, sparams, method)
 end
 
+isa_compileable_sig(@nospecialize(atype), method::Method) =
+    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any), atype, method))
+
 # eliminate UnionAll vars that might be degenerate due to having identical bounds,
 # or a concrete upper bound and appearing covariantly.
-function subst_trivial_bounds(@nospecialize(atypes))
-    if !isa(atypes, UnionAll)
-        return atypes
+function subst_trivial_bounds(@nospecialize(atype))
+    if !isa(atype, UnionAll)
+        return atype
     end
-    v = atypes.var
+    v = atype.var
     if isconcretetype(v.ub) || v.lb === v.ub
-        return subst_trivial_bounds(atypes{v.ub})
+        subst = try
+            atype{v.ub}
+        catch
+            # Note in rare cases a var bound might not be valid to substitute.
+            nothing
+        end
+        if subst !== nothing
+            return subst_trivial_bounds(subst)
+        end
     end
-    return UnionAll(v, subst_trivial_bounds(atypes.body))
+    return UnionAll(v, subst_trivial_bounds(atype.body))
 end
 
-# If removing trivial vars from atypes results in an equivalent type, use that
+# If removing trivial vars from atype results in an equivalent type, use that
 # instead. Otherwise we can get a case like issue #38888, where a signature like
 #   f(x::S) where S<:Int
 # gets cached and matches a concrete dispatch case.
-function normalize_typevars(method::Method, @nospecialize(atypes), sparams::SimpleVector)
-    at2 = subst_trivial_bounds(atypes)
-    if at2 !== atypes && at2 == atypes
-        atypes = at2
+function normalize_typevars(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    at2 = subst_trivial_bounds(atype)
+    if at2 !== atype && at2 == atype
+        atype = at2
         sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), at2, method.sig)::SimpleVector
         sparams = sp_[2]::SimpleVector
     end
-    return atypes, sparams
+    return atype, sparams
 end
 
 # get a handle to the unique specialization object representing a particular instantiation of a call
-function specialize_method(method::Method, @nospecialize(atypes), sparams::SimpleVector, preexisting::Bool=false, compilesig::Bool=false)
-    if isa(atypes, UnionAll)
-        atypes, sparams = normalize_typevars(method, atypes, sparams)
+function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false, compilesig::Bool=false)
+    if isa(atype, UnionAll)
+        atype, sparams = normalize_typevars(method, atype, sparams)
     end
     if compilesig
-        new_atypes = get_compileable_sig(method, atypes, sparams)
-        new_atypes === nothing && return nothing
-        atypes = new_atypes
+        new_atype = get_compileable_sig(method, atype, sparams)
+        new_atype === nothing && return nothing
+        atype = new_atype
     end
     if preexisting
         # check cached specializations
         # for an existing result stored there
-        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes)
+        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atype)::Union{Nothing,MethodInstance}
     end
-    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atypes, sparams)
+    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atype, sparams)
 end
 
-function specialize_method(match::MethodMatch, preexisting::Bool=false, compilesig::Bool=false)
-    return specialize_method(match.method, match.spec_types, match.sparams, preexisting, compilesig)
+function specialize_method(match::MethodMatch; kwargs...)
+    return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
 end
 
 # This function is used for computing alternate limit heuristics
 function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector)
     if isdefined(method, :generator) && method.generator.expand_early && may_invoke_generator(method, sig, sparams)
-        method_instance = specialize_method(method, sig, sparams, false)
+        method_instance = specialize_method(method, sig, sparams)
         if isa(method_instance, MethodInstance)
             cinfo = get_staged(method_instance)
             if isa(cinfo, CodeInfo)
@@ -214,41 +222,19 @@ function method_for_inference_heuristics(method::Method, @nospecialize(sig), spa
     return nothing
 end
 
-argextype(@nospecialize(x), state) = argextype(x, state.src, state.sptypes, state.slottypes)
-
-const empty_slottypes = Any[]
-
-function argextype(@nospecialize(x), src, sptypes::Vector{Any}, slottypes::Vector{Any} = empty_slottypes)
-    if isa(x, Expr)
-        if x.head === :static_parameter
-            return sptypes[x.args[1]::Int]
-        elseif x.head === :boundscheck
-            return Bool
-        elseif x.head === :copyast
-            return argextype(x.args[1], src, sptypes, slottypes)
-        end
-        @assert false "argextype only works on argument-position values"
-    elseif isa(x, SlotNumber)
-        return slottypes[(x::SlotNumber).id]
-    elseif isa(x, TypedSlot)
-        return (x::TypedSlot).typ
-    elseif isa(x, SSAValue)
-        return abstract_eval_ssavalue(x::SSAValue, src)
-    elseif isa(x, Argument)
-        return isa(src, IncrementalCompact) ? src.ir.argtypes[x.n] :
-            isa(src, IRCode) ? src.argtypes[x.n] :
-            slottypes[x.n]
-    elseif isa(x, QuoteNode)
-        return Const((x::QuoteNode).value)
-    elseif isa(x, GlobalRef)
-        return abstract_eval_global(x.mod, (x::GlobalRef).name)
-    elseif isa(x, PhiNode)
-        return Any
-    elseif isa(x, PiNode)
-        return x.typ
-    else
-        return Const(x)
+#########
+# types #
+#########
+
+function singleton_type(@nospecialize(ft))
+    if isa(ft, Const)
+        return ft.val
+    elseif isconstType(ft)
+        return ft.parameters[1]
+    elseif ft isa DataType && isdefined(ft, :instance)
+        return ft.instance
     end
+    return nothing
 end
 
 ###################
@@ -311,25 +297,27 @@ function is_throw_call(e::Expr)
     return false
 end
 
-function find_throw_blocks(code::Vector{Any}, ir = RefValue{IRCode}())
+function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int})
+    for stmt in find_throw_blocks(src.code, handler_at)
+        src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
+    end
+    return nothing
+end
+
+function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
     stmts = BitSet()
     n = length(code)
-    try_depth = 0
     for i in n:-1:1
         s = code[i]
         if isa(s, Expr)
-            if s.head === :enter
-                try_depth -= 1
-            elseif s.head === :leave
-                try_depth += (s.args[1]::Int)
-            elseif s.head === :gotoifnot
-                tgt = s.args[2]::Int
-                if i+1 in stmts && tgt in stmts
+            if s.head === :gotoifnot
+                if i+1 in stmts && s.args[2]::Int in stmts
                     push!(stmts, i)
                 end
             elseif s.head === :return
+                # see `ReturnNode` handling
             elseif is_throw_call(s)
-                if try_depth == 0
+                if handler_at[i] == 0
                     push!(stmts, i)
                 end
             elseif i+1 in stmts
@@ -340,22 +328,12 @@ function find_throw_blocks(code::Vector{Any}, ir = RefValue{IRCode}())
             # (where !isdefined(s, :val)) as `throw` points, but that can cause
             # worse codegen around the call site (issue #37558)
         elseif isa(s, GotoNode)
-            tgt = s.label
-            if isassigned(ir)
-                tgt = first(ir[].cfg.blocks[tgt].stmts)
-            end
-            if tgt in stmts
+            if s.label in stmts
                 push!(stmts, i)
             end
         elseif isa(s, GotoIfNot)
-            if i+1 in stmts
-                tgt = s.dest::Int
-                if isassigned(ir)
-                    tgt = first(ir[].cfg.blocks[tgt].stmts)
-                end
-                if tgt in stmts
-                    push!(stmts, i)
-                end
+            if i+1 in stmts && s.dest in stmts
+                push!(stmts, i)
             end
         elseif i+1 in stmts
             push!(stmts, i)
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index f6b89f8f5cd04..bcde5d894159c 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -1,9 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Expr head => argument count bounds
-const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange}(
+const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :call => 1:typemax(Int),
     :invoke => 2:typemax(Int),
+    :invoke_modify => 3:typemax(Int),
     :static_parameter => 1:1,
     :(&) => 1:1,
     :(=) => 2:2,
@@ -16,6 +17,8 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange}(
     :leave => 1:1,
     :pop_exception => 1:1,
     :inbounds => 1:1,
+    :inline => 1:1,
+    :noinline => 1:1,
     :boundscheck => 0:0,
     :copyast => 1:1,
     :meta => 0:typemax(Int),
@@ -45,6 +48,7 @@ const EMPTY_SLOTNAMES = "slotnames field is empty"
 const SLOTFLAGS_MISMATCH = "length(slotnames) < length(slotflags)"
 const SSAVALUETYPES_MISMATCH = "not all SSAValues in AST have a type in ssavaluetypes"
 const SSAVALUETYPES_MISMATCH_UNINFERRED = "uninferred CodeInfo ssavaluetypes field does not equal the number of present SSAValues"
+const SSAFLAGS_MISMATCH = "not all SSAValues have a corresponding `ssaflags`"
 const NON_TOP_LEVEL_METHOD = "encountered `Expr` head `:method` in non-top-level code (i.e. `nargs` > 0)"
 const NON_TOP_LEVEL_GLOBAL = "encountered `Expr` head `:global` in non-top-level code (i.e. `nargs` > 0)"
 const SIGNATURE_NARGS_MISMATCH = "method signature does not match number of method arguments"
@@ -76,7 +80,7 @@ end
 
 function _validate_val!(@nospecialize(x), errors, ssavals::BitSet)
     if isa(x, Expr)
-        if x.head === :call || x.head === :invoke
+        if x.head === :call || x.head === :invoke || x.head === :invoke_modify
             f = x.args[1]
             if f isa GlobalRef && (f.name === :cglobal) && x.head === :call
                 # TODO: these are not yet linearized
@@ -136,12 +140,13 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
                 end
                 validate_val!(lhs)
                 validate_val!(rhs)
-            elseif head === :call || head === :invoke || head === :gc_preserve_end || head === :meta ||
+            elseif head === :call || head === :invoke || x.head === :invoke_modify ||
+                head === :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :cfunction ||
                 head === :const || head === :enter || head === :leave || head === :pop_exception ||
                 head === :method || head === :global || head === :static_parameter ||
                 head === :new || head === :splatnew || head === :thunk || head === :loopinfo ||
-                head === :throw_undef_if_not || head === :code_coverage_effect
+                head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline
                 validate_val!(x)
             else
                 # TODO: nothing is actually in statement position anymore
@@ -179,12 +184,16 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
     nssavals = length(c.code)
     !is_top_level && nslotnames == 0 && push!(errors, InvalidCodeError(EMPTY_SLOTNAMES))
     nslotnames < nslotflags && push!(errors, InvalidCodeError(SLOTFLAGS_MISMATCH, (nslotnames, nslotflags)))
-    if c.inferred
-        nssavaluetypes = length(c.ssavaluetypes)
+    ssavaluetypes = c.ssavaluetypes
+    if isa(ssavaluetypes, Vector{Any})
+        nssavaluetypes = length(ssavaluetypes)
         nssavaluetypes < nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH, (nssavals, nssavaluetypes)))
     else
-        c.ssavaluetypes != nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, c.ssavaluetypes)))
+        nssavaluetypes = ssavaluetypes::Int
+        nssavaluetypes ≠ nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, nssavaluetypes)))
     end
+    nssaflags = length(c.ssaflags)
+    nssavals ≠ nssaflags && push!(errors, InvalidCodeError(SSAFLAGS_MISMATCH, (nssavals, nssaflags)))
     return errors
 end
 
@@ -205,7 +214,7 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, mi::Core.MethodInsta
     else
         m = mi.def::Method
         mnargs = m.nargs
-        n_sig_params = length(Core.Compiler.unwrap_unionall(m.sig).parameters)
+        n_sig_params = length((unwrap_unionall(m.sig)::DataType).parameters)
         if (m.isva ? (n_sig_params < (mnargs - 1)) : (n_sig_params != mnargs))
             push!(errors, InvalidCodeError(SIGNATURE_NARGS_MISMATCH, (m.isva, n_sig_params, mnargs)))
         end
@@ -235,7 +244,7 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
         return true
     end
     return false
diff --git a/base/complex.jl b/base/complex.jl
index 4fe736a7c0465..50012274df4f1 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -91,7 +91,7 @@ imag(x::Real) = zero(x)
 """
     reim(z)
 
-Return both the real and imaginary parts of the complex number `z`.
+Return a tuple of the real and imaginary parts of the complex number `z`.
 
 # Examples
 ```jldoctest
@@ -535,18 +535,12 @@ end
 #     return Complex(abs(iz)/r/2, copysign(r,iz))
 # end
 
-# compute exp(im*theta)
-function cis(theta::Real)
-    s, c = sincos(theta)
-    Complex(c, s)
-end
-
 """
-    cis(z)
+    cis(x)
 
-Return ``\\exp(iz)``.
+More efficient method for `exp(im*x)` by using Euler's formula: ``cos(x) + i sin(x) = \\exp(i x)``.
 
-See also [`cispi`](@ref), [`angle`](@ref).
+See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref), [`angle`](@ref).
 
 # Examples
 ```jldoctest
@@ -554,23 +548,29 @@ julia> cis(π) ≈ -1
 true
 ```
 """
+function cis end
+function cis(theta::Real)
+    s, c = sincos(theta)
+    Complex(c, s)
+end
+
 function cis(z::Complex)
     v = exp(-imag(z))
     s, c = sincos(real(z))
     Complex(v * c, v * s)
 end
 
-cispi(theta::Real) = Complex(reverse(sincospi(theta))...)
-
 """
-    cispi(z)
+    cispi(x)
 
-Compute ``\\exp(i\\pi x)`` more accurately than `cis(pi*x)`, especially for large `x`.
+More accurate method for `cis(pi*x)` (especially for large `x`).
+
+See also [`cis`](@ref), [`sincospi`](@ref), [`exp`](@ref), [`angle`](@ref).
 
 # Examples
 ```jldoctest
-julia> cispi(1)
--1.0 + 0.0im
+julia> cispi(10000)
+1.0 + 0.0im
 
 julia> cispi(0.25 + 1im)
 0.030556854645952924 + 0.030556854645952924im
@@ -579,6 +579,9 @@ julia> cispi(0.25 + 1im)
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
 """
+function cispi end
+cispi(theta::Real) = Complex(reverse(sincospi(theta))...)
+
 function cispi(z::Complex)
     sipi, copi = sincospi(z)
     return complex(real(copi) - imag(sipi), imag(copi) + real(sipi))
diff --git a/base/condition.jl b/base/condition.jl
index be0f618865a48..69caf4a4cec3d 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -34,7 +34,7 @@ assert_havelock(l::AbstractLock, tid::Nothing) = concurrency_violation()
 This struct does not implement a real lock, but instead
 pretends to be always locked on the original thread it was allocated on,
 and simply ignores all other interactions.
-It also does not synchronize tasks; for that use a real lock such as [`RecursiveLock`](@ref).
+It also does not synchronize tasks; for that use a real lock such as [`ReentrantLock`](@ref).
 This can be used in the place of a real lock to, instead, simply and cheaply assert
 that the operation is only occurring on a single cooperatively-scheduled thread.
 It is thus functionally equivalent to allocating a real, recursive, task-unaware lock
@@ -82,6 +82,17 @@ function _wait2(c::GenericCondition, waiter::Task)
     ct = current_task()
     assert_havelock(c)
     push!(c.waitq, waiter)
+    # since _wait2 is similar to schedule, we should observe the sticky bit now
+    if waiter.sticky && Threads.threadid(waiter) == 0
+        # Issue #41324
+        # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+        # the parent task. If the parent (current_task) is not sticky we must
+        # set it to be sticky.
+        # XXX: Ideally we would be able to unset this
+        ct.sticky = true
+        tid = Threads.threadid()
+        ccall(:jl_set_task_tid, Cvoid, (Any, Cint), waiter, tid-1)
+    end
     return
 end
 
@@ -91,7 +102,8 @@ end
 Block the current task until some event occurs, depending on the type of the argument:
 
 * [`Channel`](@ref): Wait for a value to be appended to the channel.
-* [`Condition`](@ref): Wait for [`notify`](@ref) on a condition.
+* [`Condition`](@ref): Wait for [`notify`](@ref) on a condition and return the `val`
+  parameter passed to `notify`.
 * `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process
   can be used to determine success or failure.
 * [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, a
diff --git a/base/coreio.jl b/base/coreio.jl
index 9ef717383dedd..d0f8df290b41b 100644
--- a/base/coreio.jl
+++ b/base/coreio.jl
@@ -13,6 +13,7 @@ write(::DevNull, ::UInt8) = 1
 unsafe_write(::DevNull, ::Ptr{UInt8}, n::UInt)::Int = n
 close(::DevNull) = nothing
 wait_close(::DevNull) = wait()
+bytesavailable(io::DevNull) = 0
 
 let CoreIO = Union{Core.CoreSTDOUT, Core.CoreSTDERR}
     global write(io::CoreIO, x::UInt8) = Core.write(io, x)
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 77dab6d701772..1f54940c4b5de 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -263,3 +263,10 @@ end
 @deprecate catch_stack(task=current_task(); include_bt=true) current_exceptions(task; backtrace=include_bt) false
 
 # END 1.7 deprecations
+
+# BEGIN 1.8 deprecations
+
+@deprecate var"@_inline_meta"   var"@inline"   false
+@deprecate var"@_noinline_meta" var"@noinline" false
+
+# END 1.8 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index 6918677c4f0bb..c5adf4068d49b 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -367,6 +367,7 @@ end
         # > 3/4 deleted or > 2/3 full
         rehash!(h, h.count > 64000 ? h.count*2 : h.count*4)
     end
+    nothing
 end
 
 function setindex!(h::Dict{K,V}, v0, key0) where V where K
@@ -392,6 +393,22 @@ function setindex!(h::Dict{K,V}, v0, key::K) where V where K
     return h
 end
 
+function setindex!(h::Dict{K,Any}, v, key::K) where K
+    @nospecialize v
+    index = ht_keyindex2!(h, key)
+
+    if index > 0
+        h.age += 1
+        @inbounds h.keys[index] = key
+        @inbounds h.vals[index] = v
+    else
+        @inbounds _setindex!(h, v, key, -index)
+    end
+
+    return h
+end
+
+
 """
     get!(collection, key, default)
 
@@ -826,6 +843,6 @@ length(t::ImmutableDict) = count(Returns(true), t)
 isempty(t::ImmutableDict) = !isdefined(t, :parent)
 empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
 
-_similar_for(c::Dict, ::Type{Pair{K,V}}, itr, isz) where {K, V} = empty(c, K, V)
-_similar_for(c::AbstractDict, ::Type{T}, itr, isz) where {T} =
+_similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz, len) where {K, V} = empty(c, K, V)
+_similar_for(c::AbstractDict, ::Type{T}, itr, isz, len) where {T} =
     throw(ArgumentError("for AbstractDicts, similar requires an element type of Pair;\n  if calling map, consider a comprehension instead"))
diff --git a/base/div.jl b/base/div.jl
index 226d155a7bc01..bf2b38c93537e 100644
--- a/base/div.jl
+++ b/base/div.jl
@@ -113,7 +113,7 @@ What is happening here is that the true value of the floating-point number writt
 as `0.1` is slightly larger than the numerical value 1/10 while `6.0` represents
 the number 6 precisely. Therefore the true value of `6.0 / 0.1` is slightly less
 than 60. When doing division, this is rounded to precisely `60.0`, but
-`fld(6.0, 0.1)` always takes the floor or the true value, so the result is `59.0`.
+`fld(6.0, 0.1)` always takes the floor of the true value, so the result is `59.0`.
 """
 fld(a, b) = div(a, b, RoundDown)
 
@@ -156,6 +156,8 @@ julia> divrem(7,3)
 ```
 """
 divrem(x, y) = divrem(x, y, RoundToZero)
+
+
 function divrem(a, b, r::RoundingMode)
     if r === RoundToZero
         # For compat. Remove in 2.0.
@@ -167,6 +169,25 @@ function divrem(a, b, r::RoundingMode)
         (div(a, b, r), rem(a, b, r))
     end
 end
+#avoids calling rem for Integers-Integers (all modes),
+#a-d*b not precise for Floats - AbstractFloat, AbstractIrrational. Rationals are still slower
+function divrem(a::Integer, b::Integer, r::Union{typeof(RoundUp),
+                                                typeof(RoundDown),
+                                                typeof(RoundToZero)})
+    if r === RoundToZero
+        # For compat. Remove in 2.0.
+        d = div(a, b)
+        (d, a - d*b)
+    elseif r === RoundDown
+        # For compat. Remove in 2.0.
+        d = fld(a, b)
+        (d, a - d*b)
+    elseif r === RoundUp
+        # For compat. Remove in 2.0.
+        d = div(a, b, r)
+        (d, a - d*b)
+    end
+end
 function divrem(x::Integer, y::Integer, rnd::typeof(RoundNearest))
     (q, r) = divrem(x, y)
     if x >= 0
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index 473070114b0a2..b84b3ee8d55f4 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -74,15 +74,15 @@ const META    = gensym(:meta)
 const METAType = IdDict{Any,Any}
 
 function meta(m::Module)
-    if !isdefined(m, META)
+    if !isdefined(m, META) || getfield(m, META) === nothing
         initmeta(m)
     end
     return getfield(m, META)::METAType
 end
 
 function initmeta(m::Module)
-    if !isdefined(m, META)
-        Core.eval(m, :(const $META = $(METAType())))
+    if !isdefined(m, META) || getfield(m, META) === nothing
+        Core.eval(m, :($META = $(METAType())))
         push!(modules, m)
     end
     nothing
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index 509c2cd93ffd1..3cbe180233d9c 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -23,6 +23,9 @@ as well as many great tutorials and learning resources:
 For help on a specific function or macro, type `?` followed
 by its name, e.g. `?cos`, or `?@time`, and press enter.
 Type `;` to enter shell mode, `]` to enter package mode.
+
+To exit the interactive session, type `CTRL-D` (press the
+control key together with the `d` key), or type `exit()`.
 """
 kw"help", kw"Julia", kw"julia", kw""
 
@@ -127,7 +130,7 @@ kw"__init__"
     baremodule
 
 `baremodule` declares a module that does not contain `using Base` or local definitions of
-[`eval`](@ref Base.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
+[`eval`](@ref Base.MainInclude.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
 
 ```julia
 module Mod
@@ -179,8 +182,8 @@ kw"primitive type"
 A macro maps a sequence of argument expressions to a returned expression, and the
 resulting expression is substituted directly into the program at the point where
 the macro is invoked.
-Macros are a way to run generated code without calling [`eval`](@ref Base.eval), since the generated
-code instead simply becomes part of the surrounding program.
+Macros are a way to run generated code without calling [`eval`](@ref Base.MainInclude.eval),
+since the generated code instead simply becomes part of the surrounding program.
 Macro arguments may include expressions, literal values, and symbols. Macros can be defined for
 variable number of arguments (varargs), but do not accept keyword arguments.
 Every macro also implicitly gets passed the arguments `__source__`, which contains the line number
@@ -974,12 +977,22 @@ kw"..."
     ;
 
 `;` has a similar role in Julia as in many C-like languages, and is used to delimit the
-end of the previous statement. `;` is not necessary after new lines, but can be used to
+end of the previous statement.
+
+`;` is not necessary at the end of a line, but can be used to
 separate statements on a single line or to join statements into a single expression.
-`;` is also used to suppress output printing in the REPL and similar interfaces.
+
+Adding `;` at the end of a line in the REPL will suppress printing the result of that expression.
+
+In function declarations, and optionally in calls, `;` separates regular arguments from keywords.
+
+While constructing arrays, if the arguments inside the square brackets are separated by `;`
+then their contents are vertically concatenated together.
+
+In the standard REPL, typing `;` on an empty line will switch to shell mode.
 
 # Examples
-```julia
+```jldoctest
 julia> function foo()
            x = "Hello, "; x *= "World!"
            return x
@@ -993,6 +1006,19 @@ julia> foo();
 
 julia> bar()
 "Hello, Mars!"
+
+julia> function plot(x, y; style="solid", width=1, color="black")
+           ###
+       end
+
+julia> [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> ; # upon typing ;, the prompt changes (in place) to: shell>
+shell> echo hello
+hello
 ```
 """
 kw";"
@@ -1162,10 +1188,10 @@ fields of the type to be set after construction. See the manual section on
 kw"mutable struct"
 
 """
-    new
+    new, or new{A,B,...}
 
-Special function available to inner constructors which created a new object
-of the type.
+Special function available to inner constructors which creates a new object
+of the type. The form new{A,B,...} explicitly specifies values of parameters for parametric types.
 See the manual section on [Inner Constructor Methods](@ref man-inner-constructor-methods)
 for more information.
 """
@@ -1507,8 +1533,9 @@ DomainError
 """
     Task(func)
 
-Create a `Task` (i.e. coroutine) to execute the given function `func` (which must be
-callable with no arguments). The task exits when this function returns.
+Create a `Task` (i.e. coroutine) to execute the given function `func` (which
+must be callable with no arguments). The task exits when this function returns.
+The task will run in the "world age" from the parent at construction when [`schedule`](@ref)d.
 
 # Examples
 ```jldoctest
@@ -1980,24 +2007,23 @@ setfield!
 
 These atomically perform the operations to simultaneously get and set a field:
 
-    y = getfield!(value, name)
+    y = getfield(value, name)
     setfield!(value, name, x)
     return y
-```
 """
 swapfield!
 
 """
-    modifyfield!(value, name::Symbol, op, x, [order::Symbol])
-    modifyfield!(value, i::Int, op, x, [order::Symbol])
+    modifyfield!(value, name::Symbol, op, x, [order::Symbol]) -> Pair
+    modifyfield!(value, i::Int, op, x, [order::Symbol]) -> Pair
 
 These atomically perform the operations to get and set a field after applying
 the function `op`.
 
-    y = getfield!(value, name)
+    y = getfield(value, name)
     z = op(y, x)
     setfield!(value, name, z)
-    return y, z
+    return y => z
 
 If supported by the hardware (for example, atomic increment), this may be
 optimized to the appropriate hardware instruction, otherwise it'll use a loop.
@@ -2006,18 +2032,19 @@ modifyfield!
 
 """
     replacefield!(value, name::Symbol, expected, desired,
-        [success_order::Symbol, [fail_order::Symbol=success_order]) =>
-        (old, Bool)
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+    replacefield!(value, i::Int, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
 
 These atomically perform the operations to get and conditionally set a field to
 a given value.
 
-    y = getfield!(value, name, fail_order)
+    y = getfield(value, name, fail_order)
     ok = y === expected
     if ok
         setfield!(value, name, desired, success_order)
     end
-    return y, ok
+    return (; old = y, success = ok)
 
 If supported by the hardware, this may be optimized to the appropriate hardware
 instruction, otherwise it'll use a loop.
@@ -2529,10 +2556,20 @@ UnionAll
 """
     ::
 
-With the `::`-operator type annotations are attached to expressions and variables in programs.
-See the manual section on [Type Declarations](@ref).
+The `::` operator either asserts that a value has the given type, or declares that
+a local variable or function return always has the given type.
+
+Given `expression::T`, `expression` is first evaluated. If the result is of type
+`T`, the value is simply returned. Otherwise, a [`TypeError`](@ref) is thrown.
 
-Outside of declarations `::` is used to assert that expressions and variables in programs have a given type.
+In local scope, the syntax `local x::T` or `x::T = expression` declares that local variable
+`x` always has type `T`. When a value is assigned to the variable, it will be
+converted to type `T` by calling [`convert`](@ref).
+
+In a method declaration, the syntax `function f(x)::T` causes any value returned by
+the method to be converted to type `T`.
+
+See the manual section on [Type Declarations](@ref).
 
 # Examples
 ```jldoctest
@@ -2541,6 +2578,13 @@ ERROR: TypeError: typeassert: expected AbstractFloat, got a value of type Int64
 
 julia> (1+2)::Int
 3
+
+julia> let
+           local x::Int
+           x = 2.0
+           x
+       end
+2
 ```
 """
 kw"::"
@@ -2769,6 +2813,13 @@ StridedVecOrMat
     Module
 
 A `Module` is a separate global variable workspace. See [`module`](@ref) and the [manual section about modules](@ref modules) for details.
+
+    Module(name::Symbol=:anonymous, std_imports=true, default_names=true)
+
+Return a module with the specified name. A `baremodule` corresponds to `Module(:ModuleName, false)`
+
+An empty module containing no names at all can be created with `Module(:ModuleName, false, false)`.
+This module will not import `Base` or `Core` and does not contain a reference to itself.
 """
 Module
 
diff --git a/base/error.jl b/base/error.jl
index 9116d00618900..a1a7d1817d4c6 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -38,7 +38,7 @@ error(s::AbstractString) = throw(ErrorException(s))
 Raise an `ErrorException` with the given message.
 """
 function error(s::Vararg{Any,N}) where {N}
-    @_noinline_meta
+    @noinline
     throw(ErrorException(Main.Base.string(s...)))
 end
 
@@ -105,7 +105,7 @@ end
 Get a backtrace object for the current program point.
 """
 function backtrace()
-    @_noinline_meta
+    @noinline
     # skip frame for backtrace(). Note that for this to work properly,
     # backtrace() itself must not be interpreted nor inlined.
     skip = 1
@@ -124,11 +124,11 @@ function catch_backtrace()
 end
 
 struct ExceptionStack <: AbstractArray{Any,1}
-    stack
+    stack::Array{Any,1}
 end
 
 """
-    current_exceptions(task=current_task(); [inclue_bt=true])
+    current_exceptions(task::Task=current_task(); [backtrace::Bool=true])
 
 Get the stack of exceptions currently being handled. For nested catch blocks
 there may be more than one current exception in which case the most recently
@@ -142,10 +142,10 @@ arbitrary task. This is useful for inspecting tasks which have failed due to
 uncaught exceptions.
 
 !!! compat "Julia 1.7"
-    This function went by the experiemental name `catch_stack()` in Julia
+    This function went by the experimental name `catch_stack()` in Julia
     1.1–1.6, and had a plain Vector-of-tuples as a return type.
 """
-function current_exceptions(task=current_task(); backtrace=true)
+function current_exceptions(task::Task=current_task(); backtrace::Bool=true)
     raw = ccall(:jl_get_excstack, Any, (Any,Cint,Cint), task, backtrace, typemax(Cint))::Vector{Any}
     formatted = Any[]
     stride = backtrace ? 3 : 1
@@ -159,7 +159,7 @@ end
 
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
-    @_noinline_meta
+    @noinline
     throw(MethodError(typeof(args[1]).name.mt.kwsorter, (kw,args...)))
 end
 
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 641cf5f2cdf4b..121fb50db91c1 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -199,19 +199,20 @@ function print_with_compare(io::IO, @nospecialize(a), @nospecialize(b), color::S
     end
 end
 
-function show_convert_error(io::IO, ex::MethodError, @nospecialize(arg_types_param))
+function show_convert_error(io::IO, ex::MethodError, arg_types_param)
     # See #13033
     T = striptype(ex.args[1])
     if T === nothing
         print(io, "First argument to `convert` must be a Type, got ", ex.args[1])
     else
-        print_one_line = isa(T, DataType) && isa(arg_types_param[2], DataType) && T.name != arg_types_param[2].name
+        p2 = arg_types_param[2]
+        print_one_line = isa(T, DataType) && isa(p2, DataType) && T.name != p2.name
         printstyled(io, "Cannot `convert` an object of type ")
         print_one_line || printstyled(io, "\n  ")
-        print_with_compare(io, arg_types_param[2], T, :light_green)
+        print_with_compare(io, p2, T, :light_green)
         printstyled(io, " to an object of type ")
         print_one_line || printstyled(io, "\n  ")
-        print_with_compare(io, T, arg_types_param[2], :light_red)
+        print_with_compare(io, T, p2, :light_red)
     end
 end
 
@@ -226,6 +227,7 @@ function showerror(io::IO, ex::MethodError)
         return showerror_ambiguous(io, meth, f, arg_types)
     end
     arg_types_param::SimpleVector = arg_types.parameters
+    show_candidates = true
     print(io, "MethodError: ")
     ft = typeof(f)
     name = ft.name.mt.name
@@ -242,6 +244,9 @@ function showerror(io::IO, ex::MethodError)
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
         f_is_function = true
         show_convert_error(io, ex, arg_types_param)
+    elseif f === mapreduce_empty || f === reduce_empty
+        print(io, "reducing over an empty collection is not allowed; consider supplying `init` to the reducer")
+        show_candidates = false
     elseif isempty(methods(f)) && isa(f, DataType) && isabstracttype(f)
         print(io, "no constructors have been defined for ", f)
     elseif isempty(methods(f)) && !isa(f, Function) && !isa(f, Type)
@@ -314,7 +319,7 @@ function showerror(io::IO, ex::MethodError)
         end
     end
     Experimental.show_error_hints(io, ex, arg_types_param, kwargs)
-    try
+    show_candidates && try
         show_method_candidates(io, ex, kwargs)
     catch ex
         @error "Error showing method candidates, aborted" exception=ex,catch_backtrace()
@@ -362,6 +367,13 @@ function showerror_nostdio(err, msg::AbstractString)
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, "\n")
 end
 
+stacktrace_expand_basepaths()::Bool =
+    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_EXPAND_BASEPATHS", "false")) === true
+stacktrace_contract_userdir()::Bool =
+    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_CONTRACT_HOMEDIR", "true")) === true
+stacktrace_linebreaks()::Bool =
+    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_LINEBREAKS", "false")) === true
+
 function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=())
     is_arg_types = isa(ex.args, DataType)
     arg_types = is_arg_types ? ex.args : typesof(ex.args...)
@@ -414,7 +426,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 # If isvarargtype then it checks whether the rest of the input arguments matches
                 # the varargtype
                 if Base.isvarargtype(sig[i])
-                    sigstr = (unwrap_unionall(sig[i]).T, "...")
+                    sigstr = (unwrapva(unwrap_unionall(sig[i])), "...")
                     j = length(t_i)
                 else
                     sigstr = (sig[i],)
@@ -451,7 +463,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 # It ensures that methods like f(a::AbstractString...) gets the correct
                 # number of right_matches
                 for t in arg_types_param[length(sig):end]
-                    if t <: rewrap_unionall(unwrap_unionall(sig[end]).T, method.sig)
+                    if t <: rewrap_unionall(unwrapva(unwrap_unionall(sig[end])), method.sig)
                         right_matches += 1
                     end
                 end
@@ -464,7 +476,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                     for (k, sigtype) in enumerate(sig[length(t_i)+1:end])
                         sigtype = isvarargtype(sigtype) ? unwrap_unionall(sigtype) : sigtype
                         if Base.isvarargtype(sigtype)
-                            sigstr = ((sigtype::Core.TypeofVararg).T, "...")
+                            sigstr = (unwrapva(sigtype::Core.TypeofVararg), "...")
                         else
                             sigstr = (sigtype,)
                         end
@@ -489,7 +501,12 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 end
                 print(iob, ")")
                 show_method_params(iob0, tv)
-                print(iob, " at ", method.file, ":", method.line)
+                file, line = functionloc(method)
+                if file === nothing
+                    file = string(method.file)
+                end
+                stacktrace_contract_userdir() && (file = contractuser(file))
+                print(iob, " at ", file, ":", line)
                 if !isempty(kwargs)::Bool
                     unexpected = Symbol[]
                     if isempty(kwords) || !(any(endswith(string(kword), "...") for kword in kwords))
@@ -549,13 +566,6 @@ const update_stackframes_callback = Ref{Function}(identity)
 const STACKTRACE_MODULECOLORS = [:magenta, :cyan, :green, :yellow]
 const STACKTRACE_FIXEDCOLORS = IdDict(Base => :light_black, Core => :light_black)
 
-stacktrace_expand_basepaths()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_EXPAND_BASEPATHS", "false")) === true
-stacktrace_contract_userdir()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_CONTRACT_HOMEDIR", "true")) === true
-stacktrace_linebreaks()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_LINEBREAKS", "false")) === true
-
 function show_full_backtrace(io::IO, trace::Vector; print_linebreaks::Bool)
     num_frames = length(trace)
     ndigits_max = ndigits(num_frames)
@@ -684,6 +694,7 @@ end
 # Print a stack frame where the module color is set manually with `modulecolor`.
 function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, modulecolor)
     file, line = string(frame.file), frame.line
+    file = fixup_stdlib_path(file)
     stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
     stacktrace_contract_userdir() && (file = contractuser(file))
 
@@ -773,10 +784,9 @@ end
 # For improved user experience, filter out frames for include() implementation
 # - see #33065. See also #35371 for extended discussion of internal frames.
 function _simplify_include_frames(trace)
-    i = length(trace)
-    kept_frames = trues(i)
+    kept_frames = trues(length(trace))
     first_ignored = nothing
-    while i >= 1
+    for i in length(trace):-1:1
         frame::StackFrame, _ = trace[i]
         mod = parentmodule(frame)
         if first_ignored === nothing
@@ -798,10 +808,9 @@ function _simplify_include_frames(trace)
                 first_ignored = nothing
             end
         end
-        i -= 1
     end
     if first_ignored !== nothing
-        kept_frames[i:first_ignored] .= false
+        kept_frames[1:first_ignored] .= false
     end
     return trace[kept_frames]
 end
diff --git a/base/essentials.jl b/base/essentials.jl
index a01598ca4a6ca..dd410b06cc8d9 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -181,7 +181,7 @@ Stacktrace:
 [...]
 ```
 
-If `T` is a [`AbstractFloat`](@ref) or [`Rational`](@ref) type,
+If `T` is a [`AbstractFloat`](@ref) type,
 then it will return the closest value to `x` representable by `T`.
 
 ```jldoctest
@@ -191,11 +191,8 @@ julia> x = 1/3
 julia> convert(Float32, x)
 0.33333334f0
 
-julia> convert(Rational{Int32}, x)
-1//3
-
-julia> convert(Rational{Int64}, x)
-6004799503160661//18014398509481984
+julia> convert(BigFloat, x)
+0.333333333333333314829616256247390992939472198486328125
 ```
 
 If `T` is a collection type and `x` a collection, the result of
@@ -213,8 +210,8 @@ See also: [`round`](@ref), [`trunc`](@ref), [`oftype`](@ref), [`reinterpret`](@r
 """
 function convert end
 
-convert(::Type{Union{}}, x) = throw(MethodError(convert, (Union{}, x)))
-convert(::Type{Any}, x) = x
+convert(::Type{Union{}}, @nospecialize x) = throw(MethodError(convert, (Union{}, x)))
+convert(::Type{Any}, @nospecialize x) = x
 convert(::Type{T}, x::T) where {T} = x
 convert(::Type{Type}, x::Type) = x # the ssair optimizer is strongly dependent on this method existing to avoid over-specialization
                                    # in the absence of inlining-enabled
@@ -331,7 +328,7 @@ function typename(a::Union)
 end
 typename(union::UnionAll) = typename(union.body)
 
-_tuple_error(T::Type, x) = (@_noinline_meta; throw(MethodError(convert, (T, x))))
+_tuple_error(T::Type, x) = (@noinline; throw(MethodError(convert, (T, x))))
 
 convert(::Type{T}, x::T) where {T<:Tuple} = x
 function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
@@ -340,7 +337,7 @@ function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
     if typeintersect(NTuple{N,Any}, T) === Union{}
         _tuple_error(T, x)
     end
-    cvt1(n) = (@_inline_meta; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
+    cvt1(n) = (@inline; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
     return ntuple(cvt1, Val(N))::NTuple{N,Any}
 end
 
@@ -477,6 +474,22 @@ Stacktrace:
 """
 sizeof(x) = Core.sizeof(x)
 
+"""
+    ifelse(condition::Bool, x, y)
+
+Return `x` if `condition` is `true`, otherwise return `y`. This differs from `?` or `if` in
+that it is an ordinary function, so all the arguments are evaluated first. In some cases,
+using `ifelse` instead of an `if` statement can eliminate the branch in generated code and
+provide higher performance in tight loops.
+
+# Examples
+```jldoctest
+julia> ifelse(1 > 2, 1, 2)
+2
+```
+"""
+ifelse(condition::Bool, x, y) = Core.ifelse(condition, x, y)
+
 # simple Array{Any} operations needed for bootstrap
 @eval setindex!(A::Array{Any}, @nospecialize(x), i::Int) = arrayset($(Expr(:boundscheck)), A, x, i)
 
@@ -713,7 +726,7 @@ call obsolete versions of a function `f`.
 `f` directly, and the type of the result cannot be inferred by the compiler.)
 """
 function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...)
-    kwargs = Base.merge(NamedTuple(), kwargs)
+    kwargs = merge(NamedTuple(), kwargs)
     if isempty(kwargs)
         return Core._call_latest(f, args...)
     end
diff --git a/base/experimental.jl b/base/experimental.jl
index 232d2efd11d21..f5f81ee40b85e 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -29,9 +29,9 @@ Base.IndexStyle(::Type{<:Const}) = IndexLinear()
 Base.size(C::Const) = size(C.a)
 Base.axes(C::Const) = axes(C.a)
 @eval Base.getindex(A::Const, i1::Int) =
-    (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
+    (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
 @eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =
-  (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+  (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
 
 """
     @aliasscope expr
@@ -162,6 +162,30 @@ macro compiler_options(args...)
     return opts
 end
 
+"""
+    Experimental.@force_compile
+
+Force compilation of the block or function (Julia's built-in interpreter is blocked from executing it).
+
+# Examples
+
+```
+julia> occursin("interpreter", string(stacktrace(begin
+           # with forced compilation
+           Base.Experimental.@force_compile
+           backtrace()
+       end, true)))
+false
+
+julia> occursin("interpreter", string(stacktrace(begin
+           # without forced compilation
+           backtrace()
+       end, true)))
+true
+```
+"""
+macro force_compile() Expr(:meta, :force_compile) end
+
 # UI features for errors
 
 """
diff --git a/base/exports.jl b/base/exports.jl
index 88933dad882ca..8174488897cb5 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -573,11 +573,14 @@ export
     bytes2hex,
     chomp,
     chop,
+    chopprefix,
+    chopsuffix,
     codepoint,
     codeunit,
     codeunits,
     digits,
     digits!,
+    eachsplit,
     escape_string,
     hex2bytes,
     hex2bytes!,
@@ -803,6 +806,7 @@ export
 
 # I/O and events
     close,
+    closewrite,
     countlines,
     eachline,
     readeach,
@@ -891,10 +895,12 @@ export
     chown,
     cp,
     ctime,
+    diskstat,
     download,
     filemode,
     filesize,
     gperm,
+    hardlink,
     isblockdev,
     ischardev,
     isdir,
@@ -989,6 +995,7 @@ export
 
     # profiling
     @time,
+    @showtime,
     @timed,
     @timev,
     @elapsed,
diff --git a/base/expr.jl b/base/expr.jl
index 9df363714679e..1d95ae73c9cb3 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -209,11 +209,54 @@ end
 
 !!! compat "Julia 1.8"
     The usage within a function body requires at least Julia 1.8.
+
+---
+    @inline block
+
+Give a hint to the compiler that calls within `block` are worth inlining.
+
+```julia
+# The compiler will try to inline `f`
+@inline f(...)
+
+# The compiler will try to inline `f`, `g` and `+`
+@inline f(...) + g(...)
+```
+
+!!! note
+    A callsite annotation always has the precedence over the annotation applied to the
+    definition of the called function:
+    ```julia
+    @noinline function explicit_noinline(args...)
+        # body
+    end
+
+    let
+        @inline explicit_noinline(args...) # will be inlined
+    end
+    ```
+
+!!! note
+    When there are nested callsite annotations, the innermost annotation has the precedence:
+    ```julia
+    @noinline let a0, b0 = ...
+        a = @inline f(a0)  # the compiler will try to inline this call
+        b = f(b0)          # the compiler will NOT try to inline this call
+        return a, b
+    end
+    ```
+
+!!! warning
+    Although a callsite annotation will try to force inlining in regardless of the cost model,
+    there are still chances it can't succeed in it. Especially, recursive calls can not be
+    inlined even if they are annotated as `@inline`d.
+
+!!! compat "Julia 1.8"
+    The callsite annotation requires at least Julia 1.8.
 """
-macro inline(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :inline) : ex)
+macro inline(x)
+    return annotate_meta_def_or_block(x, :inline)
 end
-macro inline() Expr(:meta, :inline) end
 
 """
     @noinline
@@ -245,13 +288,52 @@ end
 !!! compat "Julia 1.8"
     The usage within a function body requires at least Julia 1.8.
 
+---
+    @noinline block
+
+Give a hint to the compiler that it should not inline the calls within `block`.
+
+```julia
+# The compiler will try to not inline `f`
+@noinline f(...)
+
+# The compiler will try to not inline `f`, `g` and `+`
+@noinline f(...) + g(...)
+```
+
+!!! note
+    A callsite annotation always has the precedence over the annotation applied to the
+    definition of the called function:
+    ```julia
+    @inline function explicit_inline(args...)
+        # body
+    end
+
+    let
+        @noinline explicit_inline(args...) # will not be inlined
+    end
+    ```
+
+!!! note
+    When there are nested callsite annotations, the innermost annotation has the precedence:
+    ```julia
+    @inline let a0, b0 = ...
+        a = @noinline f(a0)  # the compiler will NOT try to inline this call
+        b = f(b0)            # the compiler will try to inline this call
+        return a, b
+    end
+    ```
+
+!!! compat "Julia 1.8"
+    The callsite annotation requires at least Julia 1.8.
+
+---
 !!! note
     If the function is trivial (for example returning a constant) it might get inlined anyway.
 """
-macro noinline(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :noinline) : ex)
+macro noinline(x)
+    return annotate_meta_def_or_block(x, :noinline)
 end
-macro noinline() Expr(:meta, :noinline) end
 
 """
     @pure ex
@@ -267,16 +349,26 @@ macro pure(ex)
 end
 
 """
-    @aggressive_constprop ex
-    @aggressive_constprop(ex)
+    @constprop setting ex
+    @constprop(setting, ex)
+
+`@constprop` controls the mode of interprocedural constant propagation for the
+annotated function. Two `setting`s are supported:
 
-`@aggressive_constprop` requests more aggressive interprocedural constant
-propagation for the annotated function. For a method where the return type
-depends on the value of the arguments, this can yield improved inference results
-at the cost of additional compile time.
+- `@constprop :aggressive ex`: apply constant propagation aggressively.
+  For a method where the return type depends on the value of the arguments,
+  this can yield improved inference results at the cost of additional compile time.
+- `@constprop :none ex`: disable constant propagation. This can reduce compile
+  times for functions that Julia might otherwise deem worthy of constant-propagation.
+  Common cases are for functions with `Bool`- or `Symbol`-valued arguments or keyword arguments.
 """
-macro aggressive_constprop(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :aggressive_constprop) : ex)
+macro constprop(setting, ex)
+    if isa(setting, QuoteNode)
+        setting = setting.value
+    end
+    setting === :aggressive && return esc(isa(ex, Expr) ? pushmeta!(ex, :aggressive_constprop) : ex)
+    setting === :none && return esc(isa(ex, Expr) ? pushmeta!(ex, :no_constprop) : ex)
+    throw(ArgumentError("@constprop $setting not supported"))
 end
 
 """
@@ -303,6 +395,15 @@ end
 
 ## some macro utilities ##
 
+unwrap_macrocalls(@nospecialize(x)) = x
+function unwrap_macrocalls(ex::Expr)
+    inner = ex
+    while inner.head === :macrocall
+        inner = inner.args[end]::Expr
+    end
+    return inner
+end
+
 function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
     if isempty(args)
         tag = sym
@@ -310,10 +411,7 @@ function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
         tag = Expr(sym, args...)::Expr
     end
 
-    inner = ex
-    while inner.head === :macrocall
-        inner = inner.args[end]::Expr
-    end
+    inner = unwrap_macrocalls(ex)
 
     idx, exargs = findmeta(inner)
     if idx != 0
@@ -363,8 +461,23 @@ function findmetaarg(metaargs, sym)
     return 0
 end
 
-function is_short_function_def(ex)
-    ex.head === :(=) || return false
+function annotate_meta_def_or_block(@nospecialize(ex), meta::Symbol)
+    inner = unwrap_macrocalls(ex)
+    if is_function_def(inner)
+        # annotation on a definition
+        return esc(pushmeta!(ex, meta))
+    else
+        # annotation on a block
+        return Expr(:block,
+                    Expr(meta, true),
+                    Expr(:local, Expr(:(=), :val, esc(ex))),
+                    Expr(meta, false),
+                    :val)
+    end
+end
+
+function is_short_function_def(@nospecialize(ex))
+    isexpr(ex, :(=)) || return false
     while length(ex.args) >= 1 && isa(ex.args[1], Expr)
         (ex.args[1].head === :call) && return true
         (ex.args[1].head === :where || ex.args[1].head === :(::)) || return false
@@ -372,9 +485,11 @@ function is_short_function_def(ex)
     end
     return false
 end
+is_function_def(@nospecialize(ex)) =
+    return isexpr(ex, :function) || is_short_function_def(ex) || isexpr(ex, :->)
 
 function findmeta(ex::Expr)
-    if ex.head === :function || is_short_function_def(ex) || ex.head === :->
+    if is_function_def(ex)
         body = ex.args[2]::Expr
         body.head === :block || error(body, " is not a block expression")
         return findmeta_block(ex.args)
@@ -464,7 +579,10 @@ macro generated(f)
                          Expr(:block,
                               lno,
                               Expr(:if, Expr(:generated),
-                                   body,
+                                   # https://github.com/JuliaLang/julia/issues/25678
+                                   Expr(:block,
+                                        :(local tmp = $body),
+                                        :(if tmp isa Core.CodeInfo; return tmp; else tmp; end)),
                                    Expr(:block,
                                         Expr(:meta, :generated_only),
                                         Expr(:return, nothing))))))
@@ -505,7 +623,7 @@ result into the field in the first argument and return the values `(old, new)`.
 This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
 
 
-See [atomics](#man-atomics) in the manual for more details.
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
@@ -523,17 +641,20 @@ julia> @atomic a.x += 1 # increment field x of a, with sequential consistency
 3
 
 julia> @atomic a.x + 1 # increment field x of a, with sequential consistency
-(3, 4)
+3 => 4
 
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 4
 
 julia> @atomic max(a.x, 10) # change field x of a to the max value, with sequential consistency
-(4, 10)
+4 => 10
 
 julia> @atomic a.x max 5 # again change field x of a to the max value, with sequential consistency
-(10, 10)
+10 => 10
 ```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
 """
 macro atomic(ex)
     if !isa(ex, Symbol) && !is_expr(ex, :(::))
@@ -601,7 +722,7 @@ Stores `new` into `a.b.x` and returns the old value of `a.b.x`.
 
 This operation translates to a `swapproperty!(a.b, :x, new)` call.
 
-See [atomics](#man-atomics) in the manual for more details.
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
@@ -615,6 +736,9 @@ julia> @atomicswap a.x = 2+2 # replace field x of a with 4, with sequential cons
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 4
 ```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
 """
 macro atomicswap(order, ex)
     order isa QuoteNode || (order = esc(order))
@@ -644,7 +768,7 @@ replacement was completed.
 
 This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` call.
 
-See [atomics](#man-atomics) in the manual for more details.
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
@@ -653,22 +777,25 @@ julia> a = Atomic(1)
 Atomic{Int64}(1)
 
 julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
-(1, true)
+(old = 1, success = true)
 
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 2
 
 julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
-(2, false)
+(old = 2, success = false)
 
 julia> xchg = 2 => 0; # replace field x of a with 0 if it was 1, with sequential consistency
 
 julia> @atomicreplace a.x xchg
-(2, true)
+(old = 2, success = true)
 
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 0
 ```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
 """
 macro atomicreplace(success_order, fail_order, ex, old_new)
     fail_order isa QuoteNode || (fail_order = esc(fail_order))
diff --git a/base/file.jl b/base/file.jl
index 3a038863107bc..74cf10f6ca8fa 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -8,6 +8,8 @@ export
     chown,
     cp,
     cptree,
+    diskstat,
+    hardlink,
     mkdir,
     mkpath,
     mktemp,
@@ -192,22 +194,19 @@ end
 """
     mkpath(path::AbstractString; mode::Unsigned = 0o777)
 
-Create all directories in the given `path`, with permissions `mode`. `mode` defaults to
-`0o777`, modified by the current file creation mask. Unlike [`mkdir`](@ref), `mkpath`
-does not error if `path` (or parts of it) already exists.
-Return `path`.
+Create all intermediate directories in the `path` as required. Directories are created with
+the permissions `mode` which defaults to `0o777` and is modified by the current file
+creation mask. Unlike [`mkdir`](@ref), `mkpath` does not error if `path` (or parts of it)
+already exists. Return `path`.
+
+If `path` includes a filename you will probably want to use `mkpath(dirname(path))` to
+avoid creating a directory using the filename.
 
 # Examples
 ```julia-repl
-julia> mkdir("testingdir")
-"testingdir"
-
-julia> cd("testingdir")
+julia> cd(mktempdir())
 
-julia> pwd()
-"/home/JuliaUser/testingdir"
-
-julia> mkpath("my/test/dir")
+julia> mkpath("my/test/dir") # creates three directories
 "my/test/dir"
 
 julia> readdir()
@@ -223,6 +222,13 @@ julia> readdir()
 julia> readdir("test")
 1-element Array{String,1}:
  "dir"
+
+julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directories
+"intermediate_dir/actually_a_directory.txt"
+
+julia> isdir("intermediate_dir/actually_a_directory.txt")
+true
+
 ```
 """
 function mkpath(path::AbstractString; mode::Integer = 0o777)
@@ -315,12 +321,12 @@ function checkfor_mv_cp_cptree(src::AbstractString, dst::AbstractString, txt::Ab
             if Base.samefile(src, dst)
                 abs_src = islink(src) ? abspath(readlink(src)) : abspath(src)
                 abs_dst = islink(dst) ? abspath(readlink(dst)) : abspath(dst)
-                throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir.",
+                throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir. ",
                                            "This is not supported.\n  ",
                                            "`src` refers to: $(abs_src)\n  ",
                                            "`dst` refers to: $(abs_dst)\n")))
             end
-            rm(dst; recursive=true)
+            rm(dst; recursive=true, force=true)
         else
             throw(ArgumentError(string("'$dst' exists. `force=true` ",
                                        "is required to remove '$dst' before $(txt).")))
@@ -358,6 +364,13 @@ If `follow_symlinks=false`, and `src` is a symbolic link, `dst` will be created
 symbolic link. If `follow_symlinks=true` and `src` is a symbolic link, `dst` will be a copy
 of the file or directory `src` refers to.
 Return `dst`.
+
+!!! note
+    The `cp` function is different from the `cp` command. The `cp` function always operates on
+    the assumption that `dst` is a file, while the command does different things depending
+    on whether `dst` is a directory or a file.
+    Using `force=true` when `dst` is a directory will result in loss of all the contents present
+    in the `dst` directory, and `dst` will become a file that has the contents of `src` instead.
 """
 function cp(src::AbstractString, dst::AbstractString; force::Bool=false,
                                                       follow_symlinks::Bool=false)
@@ -998,6 +1011,26 @@ if Sys.iswindows()
     const UV__EPERM              = -4048
 end
 
+"""
+    hardlink(src::AbstractString, dst::AbstractString)
+
+Creates a hard link to an existing source file `src` with the name `dst`. The
+destination, `dst`, must not exist.
+
+See also: [`symlink`](@ref).
+
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+"""
+function hardlink(src::AbstractString, dst::AbstractString)
+    err = ccall(:jl_fs_hardlink, Int32, (Cstring, Cstring), src, dst)
+    if err < 0
+        msg = "hardlink($(repr(src)), $(repr(dst)))"
+        uv_error(msg, err)
+    end
+    return nothing
+end
+
 """
     symlink(target::AbstractString, link::AbstractString; dir_target = false)
 
@@ -1020,6 +1053,8 @@ a junction point will be used.  Best practice for creating symlinks on Windows
 is to create them only after the files/directories they reference are already
 created.
 
+See also: [`hardlink`](@ref).
+
 !!! note
     This function raises an error under operating systems that do not support
     soft symbolic links, such as Windows XP.
@@ -1134,3 +1169,57 @@ function chown(path::AbstractString, owner::Integer, group::Integer=-1)
     err < 0 && uv_error("chown($(repr(path)), $owner, $group)", err)
     path
 end
+
+
+# - http://docs.libuv.org/en/v1.x/fs.html#c.uv_fs_statfs (libuv function docs)
+# - http://docs.libuv.org/en/v1.x/fs.html#c.uv_statfs_t (libuv docs of the returned struct)
+"""
+    DiskStat
+
+Stores information about the disk in bytes. Populate by calling `diskstat`.
+"""
+struct DiskStat
+    ftype::UInt64
+    bsize::UInt64
+    blocks::UInt64
+    bfree::UInt64
+    bavail::UInt64
+    files::UInt64
+    ffree::UInt64
+    fspare::NTuple{4, UInt64} # reserved
+end
+
+function Base.getproperty(stats::DiskStat, field::Symbol)
+    total = Int64(getfield(stats, :bsize) * getfield(stats, :blocks))
+    available = Int64(getfield(stats, :bsize) * getfield(stats, :bavail))
+    field === :total && return total
+    field === :available && return available
+    field === :used && return total - available
+    return getfield(stats, field)
+end
+
+@eval Base.propertynames(stats::DiskStat) =
+    $((fieldnames(DiskStat)[1:end-1]..., :available, :total, :used))
+
+Base.show(io::IO, x::DiskStat) =
+    print(io, "DiskStat(total=$(x.total), used=$(x.used), available=$(x.available))")
+
+"""
+    diskstat(path=pwd())
+
+Returns statistics in bytes about the disk that contains the file or directory pointed at by
+`path`. If no argument is passed, statistics about the disk that contains the current
+working directory are returned.
+
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+"""
+function diskstat(path::AbstractString=pwd())
+    req = zeros(UInt8, _sizeof_uv_fs)
+    err = ccall(:uv_fs_statfs, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                C_NULL, req, path, C_NULL)
+    err < 0 && uv_error("diskstat($(repr(path)))", err)
+    statfs_ptr = ccall(:jl_uv_fs_t_ptr, Ptr{Nothing}, (Ptr{Cvoid},), req)
+
+    return unsafe_load(reinterpret(Ptr{DiskStat}, statfs_ptr))
+end
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 569b71995688d..dfa881068c6ab 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -58,7 +58,7 @@ import .Base:
     IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
     bytesavailable, position, read, read!, readavailable, seek, seekend, show,
     skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
-    rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
+    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
 
 import .Base.RefValue
 
@@ -92,6 +92,7 @@ if OS_HANDLE !== RawFD
 end
 
 rawhandle(file::File) = file.handle
+setup_stdio(file::File, ::Bool) = (file, false)
 
 # Filesystem.open, not Base.open
 function open(path::AbstractString, flags::Integer, mode::Integer=0)
diff --git a/base/float.jl b/base/float.jl
index 867abd30eeed6..542516021494f 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -676,17 +676,31 @@ end
 
 
 """
-    precision(num::AbstractFloat)
+    precision(num::AbstractFloat; base::Integer=2)
+    precision(T::Type; base::Integer=2)
 
 Get the precision of a floating point number, as defined by the effective number of bits in
-the significand.
+the significand, or the precision of a floating-point type `T` (its current default, if
+`T` is a variable-precision type like [`BigFloat`](@ref)).
+
+If `base` is specified, then it returns the maximum corresponding
+number of significand digits in that base.
+
+!!! compat "Julia 1.8"
+    The `base` keyword requires at least Julia 1.8.
 """
 function precision end
 
-precision(::Type{Float16}) = 11
-precision(::Type{Float32}) = 24
-precision(::Type{Float64}) = 53
-precision(::T) where {T<:AbstractFloat} = precision(T)
+_precision(::Type{Float16}) = 11
+_precision(::Type{Float32}) = 24
+_precision(::Type{Float64}) = 53
+function _precision(x, base::Integer=2)
+    base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
+    p = _precision(x)
+    return base == 2 ? Int(p) : floor(Int, p / log2(base))
+end
+precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base)
+precision(::T; base::Integer=2) where {T<:AbstractFloat} = precision(T; base)
 
 """
     uabs(x::Integer)
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 60134ee91919d..b22dd7c238aae 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -166,6 +166,16 @@ function _round_invstep(x, invstep, r::RoundingMode)
     return y
 end
 
+# round x to multiples of 1/(invstepsqrt^2)
+# Using square root of step prevents overflowing
+function _round_invstepsqrt(x, invstepsqrt, r::RoundingMode)
+    y = round((x * invstepsqrt) * invstepsqrt, r) / invstepsqrt / invstepsqrt
+    if !isfinite(y)
+        return x
+    end
+    return y
+end
+
 # round x to multiples of step
 function _round_step(x, step, r::RoundingMode)
     # TODO: use div with rounding mode
@@ -186,10 +196,15 @@ function _round_digits(x, r::RoundingMode, digits::Integer, base)
     fx = float(x)
     if digits >= 0
         invstep = oftype(fx, base)^digits
-        _round_invstep(fx, invstep, r)
+        if isfinite(invstep)
+            return _round_invstep(fx, invstep, r)
+        else
+            invstepsqrt = oftype(fx, base)^oftype(fx, digits/2)
+            return _round_invstepsqrt(fx, invstepsqrt, r)
+        end
     else
         step = oftype(fx, base)^-digits
-        _round_step(fx, step, r)
+        return _round_step(fx, step, r)
     end
 end
 
@@ -233,7 +248,7 @@ whether or not NaN values are considered equal (defaults to false).
 
 For real or complex floating-point values, if an `atol > 0` is not specified, `rtol` defaults to
 the square root of [`eps`](@ref) of the type of `x` or `y`, whichever is bigger (least precise).
-This corresponds to requiring equality of about half of the significand digits. Otherwise,
+This corresponds to requiring equality of about half of the significant digits. Otherwise,
 e.g. for integer arguments or if an `atol > 0` is supplied, `rtol` defaults to zero.
 
 The `norm` keyword defaults to `abs` for numeric `(x,y)` and to `LinearAlgebra.norm` for
@@ -327,30 +342,81 @@ significantly more expensive than `x*y+z`. `fma` is used to improve accuracy in
 algorithms. See [`muladd`](@ref).
 """
 function fma end
+function fma_emulated(a::Float32, b::Float32, c::Float32)::Float32
+    ab = Float64(a) * b
+    res = ab+c
+    reinterpret(UInt64, res)&0x1fff_ffff!=0x1000_0000 && return res
+    # yes error compensation is necessary. It sucks
+    reslo = abs(c)>abs(ab) ? ab-(res - c) : c-(res - ab)
+    res = iszero(reslo) ? res : (signbit(reslo) ? prevfloat(res) : nextfloat(res))
+    return res
+end
+
+""" Splits a Float64 into a hi bit and a low bit where the high bit has 27 trailing 0s and the low bit has 26 trailing 0s"""
+@inline function splitbits(x::Float64)
+    hi = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000)
+    return hi, x-hi
+end
 
-fma_libm(x::Float32, y::Float32, z::Float32) =
-    ccall(("fmaf", libm_name), Float32, (Float32,Float32,Float32), x, y, z)
-fma_libm(x::Float64, y::Float64, z::Float64) =
-    ccall(("fma", libm_name), Float64, (Float64,Float64,Float64), x, y, z)
+function twomul(a::Float64, b::Float64)
+    ahi, alo = splitbits(a)
+    bhi, blo = splitbits(b)
+    abhi = a*b
+    blohi, blolo = splitbits(blo)
+    ablo = alo*blohi - (((abhi - ahi*bhi) - alo*bhi) - ahi*blo) + blolo*alo
+    return abhi, ablo
+end
+
+function fma_emulated(a::Float64, b::Float64,c::Float64)
+    abhi, ablo = @inline twomul(a,b)
+    if !isfinite(abhi+c) || isless(abs(abhi), nextfloat(0x1p-969)) || issubnormal(a) || issubnormal(b)
+        aandbfinite = isfinite(a) && isfinite(b)
+        if !(isfinite(c) && aandbfinite)
+            return aandbfinite ? c : abhi+c
+        end
+        (iszero(a) || iszero(b)) && return abhi+c
+        bias = exponent(a) + exponent(b)
+        c_denorm = ldexp(c, -bias)
+        if isfinite(c_denorm)
+            # rescale a and b to [1,2), equivalent to ldexp(a, -exponent(a))
+            issubnormal(a) && (a *= 0x1p52)
+            issubnormal(b) && (b *= 0x1p52)
+            a = reinterpret(Float64, (reinterpret(UInt64, a) & 0x800fffffffffffff) | 0x3ff0000000000000)
+            b = reinterpret(Float64, (reinterpret(UInt64, b) & 0x800fffffffffffff) | 0x3ff0000000000000)
+            c = c_denorm
+            abhi, ablo = twomul(a,b)
+            r = abhi+c
+            s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo)
+            sumhi = r+s
+            # If result is subnormal, ldexp will cause double rounding because subnormals have fewer mantisa bits.
+            # As such, we need to check whether round to even would lead to double rounding and manually round sumhi to avoid it.
+            if issubnormal(ldexp(sumhi, bias))
+                sumlo = r-sumhi+s
+                bits_lost = -bias-exponent(sumhi)-1022
+                sumhiInt = reinterpret(UInt64, sumhi)
+                if (bits_lost != 1) ⊻ (sumhiInt&1 == 1)
+                    sumhi = nextfloat(sumhi, cmp(sumlo,0))
+                end
+            end
+            return ldexp(sumhi, bias)
+        end
+        isinf(abhi) && signbit(c) == signbit(a*b) && return abhi
+        # fall through
+    end
+    r = abhi+c
+    s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo)
+    return r+s
+end
 fma_llvm(x::Float32, y::Float32, z::Float32) = fma_float(x, y, z)
 fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z)
+
 # Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
-# onto a broken system libm; if so, use openlibm's fma instead
-# 1.0000305f0 = 1 + 1/2^15
-# 1.0000000009313226 = 1 + 1/2^30
-# If fma_llvm() clobbers the rounding mode, the result of 0.1 + 0.2 will be 0.3
-# instead of the properly-rounded 0.30000000000000004; check after calling fma
-if (Sys.ARCH !== :i686 && fma_llvm(1.0000305f0, 1.0000305f0, -1.0f0) == 6.103609f-5 &&
-    (fma_llvm(1.0000000009313226, 1.0000000009313226, -1.0) ==
-     1.8626451500983188e-9) && 0.1 + 0.2 == 0.30000000000000004)
-    fma(x::Float32, y::Float32, z::Float32) = fma_llvm(x,y,z)
-    fma(x::Float64, y::Float64, z::Float64) = fma_llvm(x,y,z)
-else
-    fma(x::Float32, y::Float32, z::Float32) = fma_libm(x,y,z)
-    fma(x::Float64, y::Float64, z::Float64) = fma_libm(x,y,z)
-end
+# onto a broken system libm; if so, use a software emulated fma
+fma(x::Float32, y::Float32, z::Float32) = Core.Intrinsics.have_fma(Float32) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
+fma(x::Float64, y::Float64, z::Float64) = Core.Intrinsics.have_fma(Float64) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
+
 function fma(a::Float16, b::Float16, c::Float16)
-    Float16(fma(Float32(a), Float32(b), Float32(c)))
+    Float16(muladd(Float32(a), Float32(b), Float32(c))) #don't use fma if the hardware doesn't have it.
 end
 
 # This is necessary at least on 32-bit Intel Linux, since fma_llvm may
diff --git a/base/gcutils.jl b/base/gcutils.jl
index e74752f4f6626..b794bd32a55da 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -50,8 +50,7 @@ function finalizer(@nospecialize(f), @nospecialize(o))
     return o
 end
 
-function finalizer(f::Ptr{Cvoid}, o::T) where T
-    @_inline_meta
+function finalizer(f::Ptr{Cvoid}, o::T) where T @inline
     if !ismutable(o)
         error("objects of type ", typeof(o), " cannot be finalized")
     end
@@ -116,16 +115,14 @@ another Task or thread.
 """
 enable_finalizers(on::Bool) = on ? enable_finalizers() : disable_finalizers()
 
-function enable_finalizers()
-    Base.@_inline_meta
+function enable_finalizers() @inline
     ccall(:jl_gc_enable_finalizers_internal, Cvoid, ())
-    if unsafe_load(cglobal(:jl_gc_have_pending_finalizers, Cint)) != 0
+    if Core.Intrinsics.atomic_pointerref(cglobal(:jl_gc_have_pending_finalizers, Cint), :monotonic) != 0
         ccall(:jl_gc_run_pending_finalizers, Cvoid, (Ptr{Cvoid},), C_NULL)
     end
 end
 
-function disable_finalizers()
-    Base.@_inline_meta
+function disable_finalizers() @inline
     ccall(:jl_gc_disable_finalizers_internal, Cvoid, ())
 end
 
diff --git a/base/generator.jl b/base/generator.jl
index e5b3e46f88361..9d94996be1d4f 100644
--- a/base/generator.jl
+++ b/base/generator.jl
@@ -40,7 +40,7 @@ Generator(::Type{T}, iter::I) where {T,I} = Generator{I,Type{T}}(T, iter)
 Generator(::Type{T}, I1, I2, Is...) where {T} = Generator(a->T(a...), zip(I1, I2, Is...))
 
 function iterate(g::Generator, s...)
-    @_inline_meta
+    @inline
     y = iterate(g.iter, s...)
     y === nothing && return nothing
     y = y::Tuple{Any, Any} # try to give inference some idea of what to expect about the behavior of the next line
@@ -52,6 +52,7 @@ size(g::Generator) = size(g.iter)
 axes(g::Generator) = axes(g.iter)
 ndims(g::Generator) = ndims(g.iter)
 keys(g::Generator) = keys(g.iter)
+last(g::Generator) = g.f(last(g.iter))
 
 
 ## iterator traits
diff --git a/base/gmp.jl b/base/gmp.jl
index fcecfb4075586..435a0a0954ce9 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -6,8 +6,8 @@ export BigInt
 
 import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor, nand, nor,
              binomial, cmp, convert, div, divrem, factorial, cld, fld, gcd, gcdx, lcm, mod,
-             ndigits, promote_rule, rem, show, isqrt, string, powermod,
-             sum, prod, trailing_zeros, trailing_ones, count_ones, tryparse_internal,
+             ndigits, promote_rule, rem, show, isqrt, string, powermod, sum, prod,
+             trailing_zeros, trailing_ones, count_ones, count_zeros, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
              sign, hastypemax, isodd, iseven, digits!, hash, hash_integer
@@ -94,10 +94,10 @@ const ALLOC_OVERFLOW_FUNCTION = Ref(false)
 function __init__()
     try
         if version().major != VERSION.major || bits_per_limb() != BITS_PER_LIMB
-            msg = bits_per_limb() != BITS_PER_LIMB ? error : warn
-            msg("The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))\n",
-                "does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).\n",
-                "Please rebuild Julia.")
+            msg = """The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))
+                     does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).
+                     Please rebuild Julia."""
+            bits_per_limb() != BITS_PER_LIMB ? @error(msg) : @warn(msg)
         end
 
         ccall((:__gmp_set_memory_functions, :libgmp), Cvoid,
@@ -178,7 +178,9 @@ ui_sub!(x::BigInt, a, b::BigInt) = (ccall((:__gmpz_ui_sub, :libgmp), Cvoid, (mpz
 ui_sub(a, b::BigInt) = ui_sub!(BigInt(), a, b)
 
 for op in (:scan1, :scan0)
-    @eval $op(a::BigInt, b) = Int(ccall($(gmpz(op)), Culong, (mpz_t, Culong), a, b))
+    # when there is no meaningful answer, ccall returns typemax(Culong), where Culong can
+    # be UInt32 (Windows) or UInt64; we return -1 in this case for all architectures
+    @eval $op(a::BigInt, b) = Int(signed(ccall($(gmpz(op)), Culong, (mpz_t, Culong), a, b)))
 end
 
 mul_si!(x::BigInt, a::BigInt, b) = (ccall((:__gmpz_mul_si, :libgmp), Cvoid, (mpz_t, mpz_t, Clong), x, a, b); x)
@@ -203,7 +205,7 @@ for (op, T) in ((:fac_ui, Culong), (:set_ui, Culong), (:set_si, Clong), (:set_d,
     end
 end
 
-popcount(a::BigInt) = Int(ccall((:__gmpz_popcount, :libgmp), Culong, (mpz_t,), a))
+popcount(a::BigInt) = Int(signed(ccall((:__gmpz_popcount, :libgmp), Culong, (mpz_t,), a)))
 
 mpn_popcount(d::Ptr{Limb}, s::Integer) = Int(ccall((:__gmpn_popcount, :libgmp), Culong, (Ptr{Limb}, Csize_t), d, s))
 mpn_popcount(a::BigInt) = mpn_popcount(a.d, abs(a.size))
@@ -292,14 +294,14 @@ BigInt(x::Union{Clong,Int32}) = MPZ.set_si(x)
 BigInt(x::Union{Culong,UInt32}) = MPZ.set_ui(x)
 BigInt(x::Bool) = BigInt(UInt(x))
 
-unsafe_trunc(::Type{BigInt}, x::Union{Float32,Float64}) = MPZ.set_d(x)
+unsafe_trunc(::Type{BigInt}, x::Union{Float16,Float32,Float64}) = MPZ.set_d(x)
 
-function BigInt(x::Union{Float32,Float64})
+function BigInt(x::Float64)
     isinteger(x) || throw(InexactError(:BigInt, BigInt, x))
     unsafe_trunc(BigInt,x)
 end
 
-function trunc(::Type{BigInt}, x::Union{Float32,Float64})
+function trunc(::Type{BigInt}, x::Union{Float16,Float32,Float64})
     isfinite(x) || throw(InexactError(:trunc, BigInt, x))
     unsafe_trunc(BigInt,x)
 end
@@ -552,10 +554,30 @@ end
 >>(x::BigInt, c::UInt) = c == 0 ? x : MPZ.fdiv_q_2exp(x, c)
 >>>(x::BigInt, c::UInt) = x >> c
 
-trailing_zeros(x::BigInt) = MPZ.scan1(x, 0)
-trailing_ones(x::BigInt) = MPZ.scan0(x, 0)
+function trailing_zeros(x::BigInt)
+    c = MPZ.scan1(x, 0)
+    c == -1 && throw(DomainError(x, "`x` must be non-zero"))
+    c
+end
+
+function trailing_ones(x::BigInt)
+    c = MPZ.scan0(x, 0)
+    c == -1 && throw(DomainError(x, "`x` must not be equal to -1"))
+    c
+end
 
-count_ones(x::BigInt) = MPZ.popcount(x)
+function count_ones(x::BigInt)
+    c = MPZ.popcount(x)
+    c == -1 && throw(DomainError(x, "`x` cannot be negative"))
+    c
+end
+
+# generic definition is not used to provide a better error message
+function count_zeros(x::BigInt)
+    c = MPZ.popcount(~x)
+    c == -1 && throw(DomainError(x, "`x` must be negative"))
+    c
+end
 
 """
     count_ones_abs(x::BigInt)
diff --git a/base/idset.jl b/base/idset.jl
index 6812c4ff3ceb0..0a4d4275b4231 100644
--- a/base/idset.jl
+++ b/base/idset.jl
@@ -12,6 +12,7 @@ IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
 IdSet() = IdSet{Any}()
 
 copymutable(s::IdSet) = typeof(s)(s)
+emptymutable(s::IdSet{T}, ::Type{U}=T) where {T,U} = IdSet{U}()
 copy(s::IdSet) = typeof(s)(s)
 
 isempty(s::IdSet) = isempty(s.dict)
diff --git a/base/indices.jl b/base/indices.jl
index 817d9d435522b..28028f23c72a3 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -321,16 +321,16 @@ which they index. To support those cases, `to_indices(A, I)` calls
 given tuple of indices and the dimensional indices of `A` in tandem. As such,
 not all index types are guaranteed to propagate to `Base.to_index`.
 """
-to_indices(A, I::Tuple) = (@_inline_meta; to_indices(A, axes(A), I))
-to_indices(A, I::Tuple{Any}) = (@_inline_meta; to_indices(A, (eachindex(IndexLinear(), A),), I))
+to_indices(A, I::Tuple) = (@inline; to_indices(A, axes(A), I))
+to_indices(A, I::Tuple{Any}) = (@inline; to_indices(A, (eachindex(IndexLinear(), A),), I))
 # In simple cases, we know that we don't need to use axes(A), optimize those.
 # Having this here avoids invalidations from multidimensional.jl: to_indices(A, I::Tuple{Vararg{Union{Integer, CartesianIndex}}})
 to_indices(A, I::Tuple{}) = ()
 to_indices(A, I::Tuple{Vararg{Int}}) = I
-to_indices(A, I::Tuple{Vararg{Integer}}) = (@_inline_meta; to_indices(A, (), I))
+to_indices(A, I::Tuple{Vararg{Integer}}) = (@inline; to_indices(A, (), I))
 to_indices(A, inds, ::Tuple{}) = ()
 to_indices(A, inds, I::Tuple{Any, Vararg{Any}}) =
-    (@_inline_meta; (to_index(A, I[1]), to_indices(A, _maybetail(inds), tail(I))...))
+    (@inline; (to_index(A, I[1]), to_indices(A, _maybetail(inds), tail(I))...))
 
 _maybetail(::Tuple{}) = ()
 _maybetail(t::Tuple) = tail(t)
@@ -360,9 +360,9 @@ first(S::Slice) = first(S.indices)
 last(S::Slice) = last(S.indices)
 size(S::Slice) = (length(S.indices),)
 length(S::Slice) = length(S.indices)
-getindex(S::Slice, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::Slice, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
 show(io::IO, r::Slice) = print(io, "Base.Slice(", r.indices, ")")
 iterate(S::Slice, s...) = iterate(S.indices, s...)
 
@@ -388,15 +388,15 @@ first(S::IdentityUnitRange) = first(S.indices)
 last(S::IdentityUnitRange) = last(S.indices)
 size(S::IdentityUnitRange) = (length(S.indices),)
 length(S::IdentityUnitRange) = length(S.indices)
-getindex(S::IdentityUnitRange, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
 show(io::IO, r::IdentityUnitRange) = print(io, "Base.IdentityUnitRange(", r.indices, ")")
 iterate(S::IdentityUnitRange, s...) = iterate(S.indices, s...)
 
 # For OneTo, the values and indices of the values are identical, so this may be defined in Base.
 # In general such an indexing operation would produce offset ranges
-getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}}) = (@_inline_meta; @boundscheck checkbounds(S, I); I)
+getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}}) = (@inline; @boundscheck checkbounds(S, I); I)
 
 """
     LinearIndices(A::AbstractArray)
@@ -475,12 +475,12 @@ IndexStyle(::Type{<:LinearIndices}) = IndexLinear()
 axes(iter::LinearIndices) = map(axes1, iter.indices)
 size(iter::LinearIndices) = map(length, iter.indices)
 function getindex(iter::LinearIndices, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(iter, i)
     i
 end
 function getindex(iter::LinearIndices, i::AbstractRange{<:Integer})
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(iter, i)
     @inbounds isa(iter, LinearIndices{1}) ? iter.indices[1][i] : (first(iter):last(iter))[i]
 end
@@ -491,6 +491,6 @@ iterate(iter::LinearIndices, i=1) = i > length(iter) ? nothing : (i, i+1)
 
 # Needed since firstindex and lastindex are defined in terms of LinearIndices
 first(iter::LinearIndices) = 1
-first(iter::LinearIndices{1}) = (@_inline_meta; first(axes1(iter.indices[1])))
-last(iter::LinearIndices) = (@_inline_meta; length(iter))
-last(iter::LinearIndices{1}) = (@_inline_meta; last(axes1(iter.indices[1])))
+first(iter::LinearIndices{1}) = (@inline; first(axes1(iter.indices[1])))
+last(iter::LinearIndices) = (@inline; length(iter))
+last(iter::LinearIndices{1}) = (@inline; last(axes1(iter.indices[1])))
diff --git a/base/initdefs.jl b/base/initdefs.jl
index 2cac786cfd194..231f57454e3cd 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -100,7 +100,7 @@ function init_depot_path()
     if haskey(ENV, "JULIA_DEPOT_PATH")
         str = ENV["JULIA_DEPOT_PATH"]
         isempty(str) && return
-        for path in split(str, Sys.iswindows() ? ';' : ':')
+        for path in eachsplit(str, Sys.iswindows() ? ';' : ':')
             if isempty(path)
                 append_default_depot_path!(DEPOT_PATH)
             else
@@ -169,7 +169,11 @@ See also
 const LOAD_PATH = copy(DEFAULT_LOAD_PATH)
 # HOME_PROJECT is no longer used, here just to avoid breaking things
 const HOME_PROJECT = Ref{Union{String,Nothing}}(nothing)
-const ACTIVE_PROJECT = Ref{Union{String,Nothing}}(nothing)
+const ACTIVE_PROJECT = Ref{Union{String,Nothing}}(nothing) # Modify this only via `Base.set_active_project(proj)`
+## Watchers for when the active project changes (e.g., Revise)
+# Each should be a thunk, i.e., `f()`. To determine the current active project,
+# the thunk can query `Base.active_project()`.
+const active_project_callbacks = []
 
 function current_project(dir::AbstractString)
     # look for project file in current dir and parents
@@ -198,7 +202,7 @@ end
 function parse_load_path(str::String)
     envs = String[]
     isempty(str) && return envs
-    for env in split(str, Sys.iswindows() ? ';' : ':')
+    for env in eachsplit(str, Sys.iswindows() ? ';' : ':')
         if isempty(env)
             for env′ in DEFAULT_LOAD_PATH
                 env′ in envs || push!(envs, env′)
@@ -231,10 +235,11 @@ function init_active_project()
     project = (JLOptions().project != C_NULL ?
         unsafe_string(Base.JLOptions().project) :
         get(ENV, "JULIA_PROJECT", nothing))
-    ACTIVE_PROJECT[] =
+    set_active_project(
         project === nothing ? nothing :
         project == "" ? nothing :
         startswith(project, "@") ? load_path_expand(project) : abspath(expanduser(project))
+    )
 end
 
 ## load path expansion: turn LOAD_PATH entries into concrete paths ##
@@ -280,7 +285,7 @@ load_path_expand(::Nothing) = nothing
 """
     active_project()
 
-Return the path of the active `Project.toml` file.
+Return the path of the active `Project.toml` file. See also [`Base.set_active_project`](@ref).
 """
 function active_project(search_load_path::Bool=true)
     for project in (ACTIVE_PROJECT[],)
@@ -306,6 +311,23 @@ function active_project(search_load_path::Bool=true)
     end
 end
 
+"""
+    set_active_project(projfile::Union{AbstractString,Nothing})
+
+Set the active `Project.toml` file to `projfile`. See also [`Base.active_project`](@ref).
+"""
+function set_active_project(projfile::Union{AbstractString,Nothing})
+    ACTIVE_PROJECT[] = projfile
+    for f in active_project_callbacks
+        try
+            Base.invokelatest(f)
+        catch
+            @error "active project callback $f failed" maxlog=1
+        end
+    end
+end
+
+
 """
     load_path()
 
diff --git a/base/int.jl b/base/int.jl
index 17410e9eb3f99..49ec0c54721f1 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -97,6 +97,9 @@ inv(x::Integer) = float(one(x)) / float(x)
 
 Return `true` if `x` is an odd integer (that is, an integer not divisible by 2), and `false` otherwise.
 
+!!! compat "Julia 1.7"
+    Non-`Integer` arguments require Julia 1.7 or later.
+
 # Examples
 ```jldoctest
 julia> isodd(9)
@@ -114,6 +117,9 @@ isodd(n::Real) = isinteger(n) && !iszero(rem(Integer(n), 2))
 
 Return `true` if `x` is an even integer (that is, an integer divisible by 2), and `false` otherwise.
 
+!!! compat "Julia 1.7"
+    Non-`Integer` arguments require Julia 1.7 or later.
+
 # Examples
 ```jldoctest
 julia> iseven(9)
@@ -647,8 +653,8 @@ floor(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
     @int128_str str
     @int128_str(str)
 
-`@int128_str` parses a string into a Int128
-Throws an `ArgumentError` if the string is not a valid integer
+`@int128_str` parses a string into a Int128.
+Throws an `ArgumentError` if the string is not a valid integer.
 """
 macro int128_str(s)
     return parse(Int128, s)
@@ -658,8 +664,8 @@ end
     @uint128_str str
     @uint128_str(str)
 
-`@uint128_str` parses a string into a UInt128
-Throws an `ArgumentError` if the string is not a valid integer
+`@uint128_str` parses a string into a UInt128.
+Throws an `ArgumentError` if the string is not a valid integer.
 """
 macro uint128_str(s)
     return parse(UInt128, s)
@@ -683,25 +689,30 @@ julia> big"7891.5"
 ```
 """
 macro big_str(s)
+    message = "invalid number format $s for BigInt or BigFloat"
+    throw_error =  :(throw(ArgumentError($message)))
     if '_' in s
         # remove _ in s[2:end-1]
         bf = IOBuffer(maxsize=lastindex(s))
-        print(bf, s[1])
+        c = s[1]
+        print(bf, c)
+        is_prev_underscore = (c == '_')
+        is_prev_dot = (c == '.')
         for c in SubString(s, 2, lastindex(s)-1)
             c != '_' && print(bf, c)
+            c == '_' && is_prev_dot && return throw_error
+            c == '.' && is_prev_underscore && return throw_error
+            is_prev_underscore = (c == '_')
+            is_prev_dot = (c == '.')
         end
         print(bf, s[end])
-        seekstart(bf)
-        n = tryparse(BigInt, String(take!(bf)))
-        n === nothing || return n
-    else
-        n = tryparse(BigInt, s)
-        n === nothing || return n
-        n = tryparse(BigFloat, s)
-        n === nothing || return n
+        s = String(take!(bf))
     end
-    message = "invalid number format $s for BigInt or BigFloat"
-    return :(throw(ArgumentError($message)))
+    n = tryparse(BigInt, s)
+    n === nothing || return n
+    n = tryparse(BigFloat, s)
+    n === nothing || return n
+    return throw_error
 end
 
 ## integer promotions ##
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index fd793f94dd783..29cf041cbcf7b 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -13,25 +13,25 @@ The arguments may be integer and rational numbers.
 
 # Examples
 ```jldoctest
-julia> gcd(6,9)
+julia> gcd(6, 9)
 3
 
-julia> gcd(6,-9)
+julia> gcd(6, -9)
 3
 
-julia> gcd(6,0)
+julia> gcd(6, 0)
 6
 
-julia> gcd(0,0)
+julia> gcd(0, 0)
 0
 
-julia> gcd(1//3,2//3)
+julia> gcd(1//3, 2//3)
 1//3
 
-julia> gcd(1//3,-2//3)
+julia> gcd(1//3, -2//3)
 1//3
 
-julia> gcd(1//3,2)
+julia> gcd(1//3, 2)
 1//3
 
 julia> gcd(0, 0, 10, 15)
@@ -47,11 +47,22 @@ function gcd(a::T, b::T) where T<:Integer
     checked_abs(a)
 end
 
-# binary GCD (aka Stein's) algorithm
-# about 1.7x (2.1x) faster for random Int64s (Int128s)
 function gcd(a::T, b::T) where T<:BitInteger
     a == 0 && return checked_abs(b)
     b == 0 && return checked_abs(a)
+    r = _gcd(a, b)
+    signbit(r) && __throw_gcd_overflow(a, b)
+    return r
+end
+@noinline __throw_gcd_overflow(a, b) = throw(OverflowError("gcd($a, $b) overflows"))
+
+# binary GCD (aka Stein's) algorithm
+# about 1.7x (2.1x) faster for random Int64s (Int128s)
+# Unfortunately, we need to manually annotate this as `@pure` to work around #41694. Since
+# this is used in the Rational constructor, constant prop is something we do care about here.
+# This does call generic functions, so it might not be completely sound, but since `_gcd` is
+# restricted to BitIntegers, it is probably fine in practice.
+@pure function _gcd(a::T, b::T) where T<:BitInteger
     za = trailing_zeros(a)
     zb = trailing_zeros(b)
     k = min(za, zb)
@@ -65,11 +76,8 @@ function gcd(a::T, b::T) where T<:BitInteger
         v >>= trailing_zeros(v)
     end
     r = u << k
-    # T(r) would throw InexactError; we want OverflowError instead
-    r > typemax(T) && __throw_gcd_overflow(a, b)
-    r % T
+    return r % T
 end
-@noinline __throw_gcd_overflow(a, b) = throw(OverflowError("gcd($a, $b) overflows"))
 
 """
     lcm(x, y...)
@@ -82,33 +90,33 @@ The arguments may be integer and rational numbers.
 
 # Examples
 ```jldoctest
-julia> lcm(2,3)
+julia> lcm(2, 3)
 6
 
-julia> lcm(-2,3)
+julia> lcm(-2, 3)
 6
 
-julia> lcm(0,3)
+julia> lcm(0, 3)
 0
 
-julia> lcm(0,0)
+julia> lcm(0, 0)
 0
 
-julia> lcm(1//3,2//3)
+julia> lcm(1//3, 2//3)
 2//3
 
-julia> lcm(1//3,-2//3)
+julia> lcm(1//3, -2//3)
 2//3
 
-julia> lcm(1//3,2)
+julia> lcm(1//3, 2)
 2//1
 
-julia> lcm(1,3,5,7)
+julia> lcm(1, 3, 5, 7)
 105
 ```
 """
 function lcm(a::T, b::T) where T<:Integer
-    # explicit a==0 test is to handle case of lcm(0,0) correctly
+    # explicit a==0 test is to handle case of lcm(0, 0) correctly
     # explicit b==0 test is to handle case of lcm(typemin(T),0) correctly
     if a == 0 || b == 0
         return zero(a)
@@ -206,13 +214,13 @@ and ``div(y,m) = 0``. This will throw an error if ``m = 0``, or if
 
 # Examples
 ```jldoctest
-julia> invmod(2,5)
+julia> invmod(2, 5)
 3
 
-julia> invmod(2,3)
+julia> invmod(2, 3)
 2
 
-julia> invmod(5,6)
+julia> invmod(5, 6)
 5
 ```
 """
@@ -311,6 +319,9 @@ const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{1}) = x
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{2}) = x*x
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{3}) = x*x*x
+@inline literal_pow(::typeof(^), x::HWNumber, ::Val{-1}) = inv(x)
+@inline literal_pow(::typeof(^), x::HWNumber, ::Val{-2}) = (i=inv(x); i*i)
+@inline literal_pow(::typeof(^), x::HWNumber, ::Val{-3}) = (i=inv(x); i*i*i)
 
 # don't use the inv(x) transformation here since float^p is slightly more accurate
 @inline literal_pow(::typeof(^), x::AbstractFloat, ::Val{p}) where {p} = x^p
@@ -320,7 +331,11 @@ const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 # be computed in a type-stable way even for e.g. integers.
 @inline function literal_pow(f::typeof(^), x, ::Val{p}) where {p}
     if p < 0
-        literal_pow(^, inv(x), Val(-p))
+        if x isa BitInteger64
+            f(Float64(x), p) # inv would cause rounding, while Float64^Integer is able to compensate the inverse
+        else
+            f(inv(x), -p)
+        end
     else
         f(x, p)
     end
@@ -874,6 +889,7 @@ end
 Return true if and only if the extrema `typemax(T)` and `typemin(T)` are defined.
 """
 hastypemax(::Base.BitIntegerType) = true
+hastypemax(::Type{Bool}) = true
 hastypemax(::Type{T}) where {T} = applicable(typemax, T) && applicable(typemin, T)
 
 """
@@ -885,14 +901,14 @@ the array length. If the array length is excessive, the excess portion is filled
 
 # Examples
 ```jldoctest
-julia> digits!([2,2,2,2], 10, base = 2)
+julia> digits!([2, 2, 2, 2], 10, base = 2)
 4-element Vector{Int64}:
  0
  1
  0
  1
 
-julia> digits!([2,2,2,2,2,2], 10, base = 2)
+julia> digits!([2, 2, 2, 2, 2, 2], 10, base = 2)
 6-element Vector{Int64}:
  0
  1
@@ -1031,7 +1047,7 @@ function binomial(n::T, k::T) where T<:Integer
     k < 0 && return zero(T)
     sgn = one(T)
     if n < 0
-        n = -n + k -1
+        n = -n + k - one(T)
         if isodd(k)
             sgn = -sgn
         end
@@ -1042,15 +1058,15 @@ function binomial(n::T, k::T) where T<:Integer
     if k > (n>>1)
         k = (n - k)
     end
-    x::T = nn = n - k + 1
-    nn += 1
-    rr = 2
+    x = nn = n - k + one(T)
+    nn += one(T)
+    rr = T(2)
     while rr <= k
         xt = div(widemul(x, nn), rr)
         x = xt % T
         x == xt || throw(OverflowError("binomial($n0, $k0) overflows"))
-        rr += 1
-        nn += 1
+        rr += one(T)
+        nn += one(T)
     end
-    convert(T, copysign(x, sgn))
+    copysign(x, sgn)
 end
diff --git a/base/io.jl b/base/io.jl
index 30a87aa9e1cf3..40b38d3183be7 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -60,9 +60,50 @@ function isopen end
 Close an I/O stream. Performs a [`flush`](@ref) first.
 """
 function close end
+
+"""
+    closewrite(stream)
+
+Shutdown the write half of a full-duplex I/O stream. Performs a [`flush`](@ref)
+first. Notify the other end that no more data will be written to the underlying
+file. This is not supported by all IO types.
+
+# Examples
+```jldoctest
+julia> io = Base.BufferStream(); # this never blocks, so we can read and write on the same Task
+
+julia> write(io, "request");
+
+julia> # calling `read(io)` here would block forever
+
+julia> closewrite(io);
+
+julia> read(io, String)
+"request"
+```
+"""
+function closewrite end
+
+"""
+    flush(stream)
+
+Commit all currently buffered writes to the given stream.
+"""
 function flush end
-function wait_readnb end
-function wait_close end
+
+"""
+    bytesavailable(io)
+
+Return the number of bytes available for reading before a read from this stream or buffer will block.
+
+# Examples
+```jldoctest
+julia> io = IOBuffer("JuliaLang is a GitHub organization");
+
+julia> bytesavailable(io)
+34
+```
+"""
 function bytesavailable end
 
 """
@@ -81,7 +122,7 @@ function readavailable end
 """
     isreadable(io) -> Bool
 
-Return `true` if the specified IO object is readable (if that can be determined).
+Return `false` if the specified IO object is not readable.
 
 # Examples
 ```jldoctest
@@ -99,12 +140,12 @@ true
 julia> rm("myfile.txt")
 ```
 """
-function isreadable end
+isreadable(io::IO) = isopen(io)
 
 """
     iswritable(io) -> Bool
 
-Return `true` if the specified IO object is writable (if that can be determined).
+Return `false` if the specified IO object is not writable.
 
 # Examples
 ```jldoctest
@@ -122,10 +163,23 @@ false
 julia> rm("myfile.txt")
 ```
 """
-function iswritable end
-function copy end
+iswritable(io::IO) = isopen(io)
+
+"""
+    eof(stream) -> Bool
+
+Test whether an I/O stream is at end-of-file. If the stream is not yet exhausted, this
+function will block to wait for more data if necessary, and then return `false`. Therefore
+it is always safe to read one byte after seeing `eof` return `false`. `eof` will return
+`false` as long as buffered data is still available, even if the remote end of a connection
+is closed.
+"""
 function eof end
 
+function copy end
+function wait_readnb end
+function wait_close end
+
 """
     read(io::IO, T)
 
@@ -357,65 +411,37 @@ end
 function pipe_reader end
 function pipe_writer end
 
+for f in (:flush, :closewrite, :iswritable)
+    @eval $(f)(io::AbstractPipe) = $(f)(pipe_writer(io)::IO)
+end
 write(io::AbstractPipe, byte::UInt8) = write(pipe_writer(io)::IO, byte)
 write(to::IO, from::AbstractPipe) = write(to, pipe_reader(from))
 unsafe_write(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_write(pipe_writer(io)::IO, p, nb)::Union{Int,UInt}
 buffer_writes(io::AbstractPipe, args...) = buffer_writes(pipe_writer(io)::IO, args...)
-flush(io::AbstractPipe) = flush(pipe_writer(io)::IO)
 
+for f in (
+        # peek/mark interface
+        :mark, :unmark, :reset, :ismarked,
+        # Simple reader functions
+        :read, :readavailable, :bytesavailable, :reseteof, :isreadable)
+    @eval $(f)(io::AbstractPipe) = $(f)(pipe_reader(io)::IO)
+end
 read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io)::IO, byte)::UInt8
 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io)::IO, p, nb)
-read(io::AbstractPipe) = read(pipe_reader(io)::IO)
 readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io)::IO, target, keep, out)
 readbytes!(io::AbstractPipe, target::AbstractVector{UInt8}, n=length(target)) = readbytes!(pipe_reader(io)::IO, target, n)
-
-for f in (
-        # peek/mark interface
-        :mark, :unmark, :reset, :ismarked,
-        # Simple reader functions
-        :readavailable, :isreadable)
-    @eval $(f)(io::AbstractPipe) = $(f)(pipe_reader(io)::IO)
-end
 peek(io::AbstractPipe, ::Type{T}) where {T} = peek(pipe_reader(io)::IO, T)::T
+wait_readnb(io::AbstractPipe, nb::Int) = wait_readnb(pipe_reader(io)::IO, nb)
+eof(io::AbstractPipe) = eof(pipe_reader(io)::IO)::Bool
 
-iswritable(io::AbstractPipe) = iswritable(pipe_writer(io)::IO)
 isopen(io::AbstractPipe) = isopen(pipe_writer(io)::IO) || isopen(pipe_reader(io)::IO)
 close(io::AbstractPipe) = (close(pipe_writer(io)::IO); close(pipe_reader(io)::IO))
-wait_readnb(io::AbstractPipe, nb::Int) = wait_readnb(pipe_reader(io)::IO, nb)
 wait_close(io::AbstractPipe) = (wait_close(pipe_writer(io)::IO); wait_close(pipe_reader(io)::IO))
 
-"""
-    bytesavailable(io)
-
-Return the number of bytes available for reading before a read from this stream or buffer will block.
-
-# Examples
-```jldoctest
-julia> io = IOBuffer("JuliaLang is a GitHub organization");
-
-julia> bytesavailable(io)
-34
-```
-"""
-bytesavailable(io::AbstractPipe) = bytesavailable(pipe_reader(io)::IO)
-bytesavailable(io::DevNull) = 0
-
-"""
-    eof(stream) -> Bool
-
-Test whether an I/O stream is at end-of-file. If the stream is not yet exhausted, this
-function will block to wait for more data if necessary, and then return `false`. Therefore
-it is always safe to read one byte after seeing `eof` return `false`. `eof` will return
-`false` as long as buffered data is still available, even if the remote end of a connection
-is closed.
-"""
-eof(io::AbstractPipe) = eof(pipe_reader(io)::IO)::Bool
-reseteof(io::AbstractPipe) = reseteof(pipe_reader(io)::IO)
-
 
 # Exception-safe wrappers (io = open(); try f(io) finally close(io))
 
@@ -523,7 +549,8 @@ end
 
 Read all lines of an I/O stream or a file as a vector of strings. Behavior is
 equivalent to saving the result of reading [`readline`](@ref) repeatedly with the same
-arguments and saving the resulting lines as a vector of strings.
+arguments and saving the resulting lines as a vector of strings.  See also
+[`eachline`](@ref) to iterate over the lines without reading them all at once.
 
 # Examples
 ```jldoctest
@@ -1119,11 +1146,6 @@ ismarked(io::IO) = io.mark >= 0
 # Make sure all IO streams support flush, even if only as a no-op,
 # to make it easier to write generic I/O code.
 
-"""
-    flush(stream)
-
-Commit all currently buffered writes to the given stream.
-"""
 flush(io::IO) = nothing
 
 """
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index e204eca906cbf..e08a019d84a2c 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -334,6 +334,12 @@ end
 
 eof(io::GenericIOBuffer) = (io.ptr-1 == io.size)
 
+function closewrite(io::GenericIOBuffer)
+    io.writable = false
+    # OR throw(_UVError("closewrite", UV_ENOTSOCK))
+    nothing
+end
+
 @noinline function close(io::GenericIOBuffer{T}) where T
     io.readable = false
     io.writable = false
diff --git a/base/iostream.jl b/base/iostream.jl
index 98f15fd8a7db7..0af0e244cf357 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -404,13 +404,15 @@ end
 if ENDIAN_BOM == 0x04030201
 function read(s::IOStream, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64}})
     n = sizeof(T)
-    lock(s.lock)
+    l = s._dolock
+    _lock = s.lock
+    l && lock(_lock)
     if ccall(:jl_ios_buffer_n, Cint, (Ptr{Cvoid}, Csize_t), s.ios, n) != 0
-        unlock(s.lock)
+        l && unlock(_lock)
         throw(EOFError())
     end
     x = ccall(:jl_ios_get_nbyte_int, UInt64, (Ptr{Cvoid}, Csize_t), s.ios, n) % T
-    unlock(s.lock)
+    l && unlock(_lock)
     return x
 end
 
diff --git a/base/irrationals.jl b/base/irrationals.jl
index b1ecaf8557090..f3a9817f1ee35 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -203,7 +203,7 @@ big(::Type{<:AbstractIrrational}) = BigFloat
 function alignment(io::IO, x::AbstractIrrational)
     m = match(r"^(.*?)(=.*)$", sprint(show, x, context=io, sizehint=0))
     m === nothing ? (length(sprint(show, x, context=io, sizehint=0)), 0) :
-    (length(m.captures[1]), length(m.captures[2]))
+    (length(something(m.captures[1])), length(something(m.captures[2])))
 end
 
 # inv
diff --git a/base/iterators.jl b/base/iterators.jl
index f9728cbbd0793..ec47ae8ea0435 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -113,13 +113,22 @@ first(r::Reverse) = last(r.itr) # and the last shall be first
     (A.itr[idx], (state[1], itrs))
 end
 
+# Fallback method of `iterate(::Reverse{T})` which assumes the collection has `getindex(::T) and `reverse(eachindex(::T))`
+# don't propagate inbounds for this just in case
+function iterate(A::Reverse, state=(reverse(eachindex(A.itr)),))
+    y = iterate(state...)
+    y === nothing && return y
+    idx, itrs = y
+    (A.itr[idx], (state[1], itrs))
+end
+
 reverse(R::AbstractRange) = Base.reverse(R) # copying ranges is cheap
 reverse(G::Generator) = Generator(G.f, reverse(G.iter))
 reverse(r::Reverse) = r.itr
 reverse(x::Union{Number,AbstractChar}) = x
 reverse(p::Pair) = Base.reverse(p) # copying pairs is cheap
 
-iterate(r::Reverse{<:Tuple}, i::Int = length(r.itr)) = i < 1 ? nothing : (r.itr[i], i-1)
+iterate(r::Reverse{<:Union{Tuple, NamedTuple}}, i::Int = length(r.itr)) = i < 1 ? nothing : (r.itr[i], i-1)
 
 # enumerate
 
@@ -160,6 +169,7 @@ size(e::Enumerate) = size(e.itr)
     n === nothing && return n
     (i, n[1]), (i+1, n[2])
 end
+last(e::Enumerate) = (length(e.itr), e.itr[end])
 
 eltype(::Type{Enumerate{I}}) where {I} = Tuple{Int, eltype(I)}
 
@@ -238,19 +248,34 @@ pairs(A::AbstractVector) = pairs(IndexLinear(), A)
 length(v::Pairs) = length(getfield(v, :itr))
 axes(v::Pairs) = axes(getfield(v, :itr))
 size(v::Pairs) = size(getfield(v, :itr))
-@propagate_inbounds function iterate(v::Pairs{K, V}, state...) where {K, V}
-    x = iterate(getfield(v, :itr), state...)
+
+@propagate_inbounds function _pairs_elt(p::Pairs{K, V}, idx) where {K, V}
+    return Pair{K, V}(idx, getfield(p, :data)[idx])
+end
+
+@propagate_inbounds function iterate(p::Pairs{K, V}, state...) where {K, V}
+    x = iterate(getfield(p, :itr), state...)
     x === nothing && return x
-    indx, n = x
-    item = getfield(v, :data)[indx]
-    return (Pair{K, V}(indx, item), n)
+    idx, next = x
+    return (_pairs_elt(p, idx), next)
 end
+
+@propagate_inbounds function iterate(r::Reverse{<:Pairs}, state=(reverse(getfield(r.itr, :itr)),))
+    x = iterate(state...)
+    x === nothing && return x
+    idx, next = x
+    return (_pairs_elt(r.itr, idx), (state[1], next))
+end
+
 @inline isdone(v::Pairs, state...) = isdone(getfield(v, :itr), state...)
 
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, I}}) where {I} = IteratorSize(I)
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, <:Base.AbstractUnitRange, <:Tuple}}) = HasLength()
 
-reverse(v::Pairs) = Pairs(getfield(v, :data), reverse(getfield(v, :itr)))
+function last(v::Pairs{K, V}) where {K, V}
+    idx = last(getfield(v, :itr))
+    return Pair{K, V}(idx, v[idx])
+end
 
 haskey(v::Pairs, key) = (key in getfield(v, :itr))
 keys(v::Pairs) = getfield(v, :itr)
@@ -398,7 +423,8 @@ zip_iteratoreltype() = HasEltype()
 zip_iteratoreltype(a) = a
 zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail...))
 
-reverse(z::Zip) = Zip(Base.map(reverse, z.is))
+reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length
+last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is)))
 
 # filter
 
@@ -457,6 +483,7 @@ IteratorEltype(::Type{Filter{F,I}}) where {F,I} = IteratorEltype(I)
 IteratorSize(::Type{<:Filter}) = SizeUnknown()
 
 reverse(f::Filter) = Filter(f.flt, reverse(f.itr))
+last(f::Filter) = first(reverse(f))
 
 # Accumulate -- partial reductions of a function over an iterator
 
@@ -480,20 +507,22 @@ This is effectively a lazy version of [`Base.accumulate`](@ref).
 
 # Examples
 ```jldoctest
-julia> f = Iterators.accumulate(+, [1,2,3,4]);
+julia> a = Iterators.accumulate(+, [1,2,3,4]);
 
-julia> foreach(println, f)
+julia> foreach(println, a)
 1
 3
 6
 10
 
-julia> f = Iterators.accumulate(+, [1,2,3]; init = 100);
+julia> b = Iterators.accumulate(/, (2, 5, 2, 5); init = 100);
 
-julia> foreach(println, f)
-101
-103
-106
+julia> collect(b)
+4-element Vector{Float64}:
+ 50.0
+ 10.0
+  5.0
+  1.0
 ```
 """
 accumulate(f, itr; init = Base._InitialValue()) = Accumulate(f, itr, init)
@@ -592,8 +621,8 @@ IteratorSize(::Type{<:Rest{I}}) where {I} = rest_iteratorsize(IteratorSize(I))
 
 # Count -- infinite counting
 
-struct Count{S<:Number}
-    start::S
+struct Count{T,S}
+    start::T
     step::S
 end
 
@@ -613,11 +642,13 @@ julia> for v in Iterators.countfrom(5, 2)
 9
 ```
 """
-countfrom(start::Number, step::Number) = Count(promote(start, step)...)
-countfrom(start::Number)               = Count(start, oneunit(start))
-countfrom()                            = Count(1, 1)
+countfrom(start::T, step::S) where {T,S} = Count{typeof(start+step),S}(start, step)
+countfrom(start::Number, step::Number)   = Count(promote(start, step)...)
+countfrom(start)                         = Count(start, oneunit(start))
+countfrom()                              = Count(1, 1)
 
-eltype(::Type{Count{S}}) where {S} = S
+
+eltype(::Type{<:Count{T}}) where {T} = T
 
 iterate(it::Count, state=it.start) = (state, state + it.step)
 
@@ -786,7 +817,7 @@ end
 
 IteratorSize(::Type{<:TakeWhile}) = SizeUnknown()
 eltype(::Type{TakeWhile{I,P}} where P) where {I} = eltype(I)
-IteratorEltype(::Type{TakeWhile{I}} where P) where {I} = IteratorEltype(I)
+IteratorEltype(::Type{TakeWhile{I, P}} where P) where {I} = IteratorEltype(I)
 
 
 # dropwhile
@@ -879,6 +910,7 @@ function iterate(it::Cycle, state)
 end
 
 reverse(it::Cycle) = Cycle(reverse(it.xs))
+last(it::Cycle) = last(it.xs)
 
 # Repeated - repeat an object infinitely many times
 
@@ -917,6 +949,7 @@ IteratorSize(::Type{<:Repeated}) = IsInfinite()
 IteratorEltype(::Type{<:Repeated}) = HasEltype()
 
 reverse(it::Union{Repeated,Take{<:Repeated}}) = it
+last(it::Union{Repeated,Take{<:Repeated}}) = first(it)
 
 # Product -- cartesian product of iterators
 struct ProductIterator{T<:Tuple}
@@ -1048,6 +1081,7 @@ end
 end
 
 reverse(p::ProductIterator) = ProductIterator(Base.map(reverse, p.iterators))
+last(p::ProductIterator) = Base.map(last, p.iterators)
 
 # flatten an iterator of iterators
 
@@ -1127,6 +1161,7 @@ length(f::Flatten{Tuple{}}) = 0
 end
 
 reverse(f::Flatten) = Flatten(reverse(itr) for itr in reverse(f.it))
+last(f::Flatten) = last(last(f.it))
 
 """
     partition(collection, n)
@@ -1191,11 +1226,11 @@ end
 struct IterationCutShort; end
 
 function iterate(itr::PartitionIterator, state...)
-    v = Vector{eltype(itr.c)}(undef, itr.n)
     # This is necessary to remember whether we cut the
     # last element short. In such cases, we do return that
     # element, but not the next one
     state === (IterationCutShort(),) && return nothing
+    v = Vector{eltype(itr.c)}(undef, itr.n)
     i = 0
     y = iterate(itr.c, state...)
     while y !== nothing
@@ -1340,13 +1375,32 @@ length(s::Stateful) = length(s.itr) - s.taken
 """
     only(x)
 
-Returns the one and only element of collection `x`, and throws an `ArgumentError` if the
+Return the one and only element of collection `x`, or throw an [`ArgumentError`](@ref) if the
 collection has zero or multiple elements.
 
 See also [`first`](@ref), [`last`](@ref).
 
 !!! compat "Julia 1.4"
     This method requires at least Julia 1.4.
+
+# Examples
+```jldoctest
+julia> only(["a"])
+"a"
+
+julia> only("a")
+'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)
+
+julia> only(())
+ERROR: ArgumentError: Tuple contains 0 elements, must contain exactly 1 element
+Stacktrace:
+[...]
+
+julia> only(('a', 'b'))
+ERROR: ArgumentError: Tuple contains 2 elements, must contain exactly 1 element
+Stacktrace:
+[...]
+```
 """
 @propagate_inbounds function only(x)
     i = iterate(x)
diff --git a/base/libc.jl b/base/libc.jl
index 98d2910917ee4..8cce4ce2a259b 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -404,36 +404,36 @@ srand(seed=floor(Int, time()) % Cuint) = ccall(:srand, Cvoid, (Cuint,), seed)
 
 struct Cpasswd
    username::Cstring
-   uid::Clong
-   gid::Clong
+   uid::Culong
+   gid::Culong
    shell::Cstring
    homedir::Cstring
    gecos::Cstring
-   Cpasswd() = new(C_NULL, -1, -1, C_NULL, C_NULL, C_NULL)
+   Cpasswd() = new(C_NULL, typemax(Culong), typemax(Culong), C_NULL, C_NULL, C_NULL)
 end
 mutable struct Cgroup
-    groupname::Cstring     # group name
-    gid::Clong        # group ID
-    mem::Ptr{Cstring} # group members
-    Cgroup() = new(C_NULL, -1, C_NULL)
+    groupname::Cstring # group name
+    gid::Culong        # group ID
+    mem::Ptr{Cstring}  # group members
+    Cgroup() = new(C_NULL, typemax(Culong), C_NULL)
 end
 struct Passwd
     username::String
-    uid::Int
-    gid::Int
+    uid::UInt
+    gid::UInt
     shell::String
     homedir::String
     gecos::String
 end
 struct Group
     groupname::String
-    gid::Int
+    gid::UInt
     mem::Vector{String}
 end
 
 function getpwuid(uid::Unsigned, throw_error::Bool=true)
     ref_pd = Ref(Cpasswd())
-    ret = ccall(:jl_os_get_passwd, Cint, (Ref{Cpasswd}, UInt), ref_pd, uid)
+    ret = ccall(:jl_os_get_passwd, Cint, (Ref{Cpasswd}, Culong), ref_pd, uid)
     if ret != 0
         throw_error && Base.uv_error("getpwuid", ret)
         return
@@ -452,7 +452,7 @@ function getpwuid(uid::Unsigned, throw_error::Bool=true)
 end
 function getgrgid(gid::Unsigned, throw_error::Bool=true)
     ref_gp = Ref(Cgroup())
-    ret = ccall(:jl_os_get_group, Cint, (Ref{Cgroup}, UInt), ref_gp, gid)
+    ret = ccall(:jl_os_get_group, Cint, (Ref{Cgroup}, Culong), ref_gp, gid)
     if ret != 0
         throw_error && Base.uv_error("getgrgid", ret)
         return
@@ -475,6 +475,9 @@ function getgrgid(gid::Unsigned, throw_error::Bool=true)
     return gp
 end
 
+getuid() = ccall(:jl_getuid, Culong, ())
+geteuid() = ccall(:jl_geteuid, Culong, ())
+
 # Include dlopen()/dlpath() code
 include("libdl.jl")
 using .Libdl
diff --git a/base/libuv.jl b/base/libuv.jl
index c63045f4b1b68..c64cbff564b66 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -107,6 +107,7 @@ end
 function uv_alloc_buf end
 function uv_readcb end
 function uv_writecb_task end
+function uv_shutdowncb_task end
 function uv_return_spawn end
 function uv_asynccb end
 function uv_timercb end
diff --git a/base/loading.jl b/base/loading.jl
index 851ebf17cc3b9..0cbf0a52b9e68 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Base.require is the implementation for the `import` statement
+const require_lock = ReentrantLock()
 
 # Cross-platform case-sensitive path canonicalization
 
@@ -129,6 +130,7 @@ end
 const ns_dummy_uuid = UUID("fe0723d6-3a44-4c41-8065-ee0f42c8ceab")
 
 function dummy_uuid(project_file::String)
+    @lock require_lock begin
     cache = LOADING_CACHE[]
     if cache !== nothing
         uuid = get(cache.dummy_uuid, project_file, nothing)
@@ -144,6 +146,7 @@ function dummy_uuid(project_file::String)
         cache.dummy_uuid[project_file] = uuid
     end
     return uuid
+    end
 end
 
 ## package path slugs: turning UUID + SHA1 into a pair of 4-byte "slugs" ##
@@ -236,8 +239,7 @@ struct TOMLCache
 end
 const TOML_CACHE = TOMLCache(TOML.Parser(), Dict{String, Dict{String, Any}}())
 
-const TOML_LOCK = ReentrantLock()
-parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, TOML_LOCK)
+parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, require_lock)
 function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock)
     lock(toml_lock) do
         cache = LOADING_CACHE[]
@@ -337,13 +339,15 @@ Use [`dirname`](@ref) to get the directory part and [`basename`](@ref)
 to get the file name part of the path.
 """
 function pathof(m::Module)
-    pkgid = get(Base.module_keys, m, nothing)
+    @lock require_lock begin
+    pkgid = get(module_keys, m, nothing)
     pkgid === nothing && return nothing
-    origin = get(Base.pkgorigins, pkgid, nothing)
+    origin = get(pkgorigins, pkgid, nothing)
     origin === nothing && return nothing
     path = origin.path
     path === nothing && return nothing
     return fixup_stdlib_path(path)
+    end
 end
 
 """
@@ -354,7 +358,7 @@ or `nothing` if `m` was not imported from a package. Optionally further
 path component strings can be provided to construct a path within the
 package root.
 
-```julia
+```julia-repl
 julia> pkgdir(Foo)
 "/path/to/Foo.jl"
 
@@ -366,7 +370,7 @@ julia> pkgdir(Foo, "src", "file.jl")
     The optional argument `paths` requires at least Julia 1.7.
 """
 function pkgdir(m::Module, paths::String...)
-    rootmodule = Base.moduleroot(m)
+    rootmodule = moduleroot(m)
     path = pathof(rootmodule)
     path === nothing && return nothing
     return joinpath(dirname(dirname(path)), paths...)
@@ -383,6 +387,7 @@ const preferences_names = ("JuliaLocalPreferences.toml", "LocalPreferences.toml"
 #  - `true`: `env` is an implicit environment
 #  - `path`: the path of an explicit project file
 function env_project_file(env::String)::Union{Bool,String}
+    @lock require_lock begin
     cache = LOADING_CACHE[]
     if cache !== nothing
         project_file = get(cache.env_project_file, env, nothing)
@@ -406,6 +411,7 @@ function env_project_file(env::String)::Union{Bool,String}
         cache.env_project_file[env] = project_file
     end
     return project_file
+    end
 end
 
 function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
@@ -473,6 +479,7 @@ end
 
 # find project file's corresponding manifest file
 function project_file_manifest_path(project_file::String)::Union{Nothing,String}
+    @lock require_lock begin
     cache = LOADING_CACHE[]
     if cache !== nothing
         manifest_path = get(cache.project_file_manifest_path, project_file, missing)
@@ -501,6 +508,7 @@ function project_file_manifest_path(project_file::String)::Union{Nothing,String}
         cache.project_file_manifest_path[project_file] = manifest_path
     end
     return manifest_path
+    end
 end
 
 # given a directory (implicit env from LOAD_PATH) and a name,
@@ -688,7 +696,7 @@ function implicit_manifest_deps_get(dir::String, where::PkgId, name::String)::Un
     @assert where.uuid !== nothing
     project_file = entry_point_and_project_file(dir, where.name)[2]
     project_file === nothing && return nothing # a project file is mandatory for a package with a uuid
-    proj = project_file_name_uuid(project_file, where.name, )
+    proj = project_file_name_uuid(project_file, where.name)
     proj == where || return nothing # verify that this is the correct project file
     # this is the correct project, so stop searching here
     pkg_uuid = explicit_project_deps_get(project_file, name)
@@ -753,23 +761,30 @@ function _include_from_serialized(path::String, depmods::Vector{Any})
     if isa(sv, Exception)
         return sv
     end
-    restored = sv[1]
-    if !isa(restored, Exception)
-        for M in restored::Vector{Any}
-            M = M::Module
-            if isdefined(M, Base.Docs.META)
-                push!(Base.Docs.modules, M)
-            end
-            if parentmodule(M) === M
-                register_root_module(M)
-            end
+    sv = sv::SimpleVector
+    restored = sv[1]::Vector{Any}
+    for M in restored
+        M = M::Module
+        if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing
+            push!(Base.Docs.modules, M)
+        end
+        if parentmodule(M) === M
+            register_root_module(M)
+        end
+    end
+    inits = sv[2]::Vector{Any}
+    if !isempty(inits)
+        unlock(require_lock) # temporarily _unlock_ during these callbacks
+        try
+            ccall(:jl_init_restored_modules, Cvoid, (Any,), inits)
+        finally
+            lock(require_lock)
         end
     end
-    isassigned(sv, 2) && ccall(:jl_init_restored_modules, Cvoid, (Any,), sv[2])
     return restored
 end
 
-function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::Union{Nothing, String})
+function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::Union{Nothing, String}, depth::Int = 0)
     if root_module_exists(modkey)
         M = root_module(modkey)
         if PkgId(M) == modkey && module_build_id(M) === build_id
@@ -780,7 +795,7 @@ function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::U
             modpath = locate_package(modkey)
             modpath === nothing && return nothing
         end
-        mod = _require_search_from_serialized(modkey, String(modpath))
+        mod = _require_search_from_serialized(modkey, String(modpath), depth)
         get!(PkgOrigin, pkgorigins, modkey).path = modpath
         if !isa(mod, Bool)
             for callback in package_callbacks
@@ -821,10 +836,14 @@ function _require_from_serialized(path::String)
     return _include_from_serialized(path, depmods)
 end
 
+# use an Int counter so that nested @time_imports calls all remain open
+const TIMING_IMPORTS = Threads.Atomic{Int}(0)
+
 # returns `true` if require found a precompile cache for this sourcepath, but couldn't load it
 # returns `false` if the module isn't known to be precompilable
 # returns the set of modules restored if the cache load succeeded
-function _require_search_from_serialized(pkg::PkgId, sourcepath::String)
+function _require_search_from_serialized(pkg::PkgId, sourcepath::String, depth::Int = 0)
+    t_before = time_ns()
     paths = find_all_in_cache_path(pkg)
     for path_to_try in paths::Vector{String}
         staledeps = stale_cachefile(sourcepath, path_to_try)
@@ -840,7 +859,7 @@ function _require_search_from_serialized(pkg::PkgId, sourcepath::String)
             dep = staledeps[i]
             dep isa Module && continue
             modpath, modkey, build_id = dep::Tuple{String, PkgId, UInt64}
-            dep = _tryrequire_from_serialized(modkey, build_id, modpath)
+            dep = _tryrequire_from_serialized(modkey, build_id, modpath, depth + 1)
             if dep === nothing
                 @debug "Required dependency $modkey failed to load from cache file for $modpath."
                 staledeps = true
@@ -855,6 +874,13 @@ function _require_search_from_serialized(pkg::PkgId, sourcepath::String)
         if isa(restored, Exception)
             @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
         else
+            if TIMING_IMPORTS[] > 0
+                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
+                tree_prefix = depth == 0 ? "" : "$("  "^(depth-1))┌ "
+                print("$(lpad(elapsed, 9)) ms  ")
+                printstyled(tree_prefix, color = :light_black)
+                println(pkg.name)
+            end
             return restored
         end
     end
@@ -862,7 +888,7 @@ function _require_search_from_serialized(pkg::PkgId, sourcepath::String)
 end
 
 # to synchronize multiple tasks trying to import/using something
-const package_locks = Dict{PkgId,Condition}()
+const package_locks = Dict{PkgId,Threads.Condition}()
 
 # to notify downstream consumers that a module was successfully loaded
 # Callbacks take the form (mod::Base.PkgId) -> nothing.
@@ -885,7 +911,9 @@ function _include_dependency(mod::Module, _path::AbstractString)
         path = normpath(joinpath(dirname(prev), _path))
     end
     if _track_dependencies[]
+        @lock require_lock begin
         push!(_require_dependencies, (mod, path, mtime(path)))
+        end
     end
     return path, prev
 end
@@ -957,6 +985,7 @@ For more details regarding code loading, see the manual sections on [modules](@r
 [parallel computing](@ref code-availability).
 """
 function require(into::Module, mod::Symbol)
+    @lock require_lock begin
     LOADING_CACHE[] = LoadingCache()
     try
         uuidkey = identify_package(into, String(mod))
@@ -964,10 +993,20 @@ function require(into::Module, mod::Symbol)
         if uuidkey === nothing
             where = PkgId(into)
             if where.uuid === nothing
+                hint, dots = begin
+                    if isdefined(into, mod) && getfield(into, mod) isa Module
+                        true, "."
+                    elseif isdefined(parentmodule(into), mod) && getfield(parentmodule(into), mod) isa Module
+                        true, ".."
+                    else
+                        false, ""
+                    end
+                end
+                hint_message = hint ? ", maybe you meant `import/using $(dots)$(mod)`" : ""
+                start_sentence = hint ? "Otherwise, run" : "Run"
                 throw(ArgumentError("""
-                    Package $mod not found in current path:
-                    - Run `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package.
-                    """))
+                    Package $mod not found in current path$hint_message.
+                    - $start_sentence `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package."""))
             else
                 s = """
                 Package $(where.name) does not have $mod in its dependencies:
@@ -998,6 +1037,7 @@ function require(into::Module, mod::Symbol)
     finally
         LOADING_CACHE[] = nothing
     end
+    end
 end
 
 mutable struct PkgOrigin
@@ -1009,6 +1049,8 @@ PkgOrigin() = PkgOrigin(nothing, nothing)
 const pkgorigins = Dict{PkgId,PkgOrigin}()
 
 function require(uuidkey::PkgId)
+    @lock require_lock begin
+    just_loaded_pkg = false
     if !root_module_exists(uuidkey)
         cachefile = _require(uuidkey)
         if cachefile !== nothing
@@ -1018,17 +1060,26 @@ function require(uuidkey::PkgId)
         for callback in package_callbacks
             invokelatest(callback, uuidkey)
         end
+        just_loaded_pkg = true
+    end
+    if just_loaded_pkg && !root_module_exists(uuidkey)
+        error("package `$(uuidkey.name)` did not define the expected \
+              module `$(uuidkey.name)`, check for typos in package module name")
     end
     return root_module(uuidkey)
+    end
 end
 
 const loaded_modules = Dict{PkgId,Module}()
 const module_keys = IdDict{Module,PkgId}() # the reverse
 
-is_root_module(m::Module) = haskey(module_keys, m)
-root_module_key(m::Module) = module_keys[m]
+is_root_module(m::Module) = @lock require_lock haskey(module_keys, m)
+root_module_key(m::Module) = @lock require_lock module_keys[m]
 
 function register_root_module(m::Module)
+    # n.b. This is called from C after creating a new module in `Base.__toplevel__`,
+    # instead of adding them to the binding table there.
+    @lock require_lock begin
     key = PkgId(m, String(nameof(m)))
     if haskey(loaded_modules, key)
         oldm = loaded_modules[key]
@@ -1038,6 +1089,7 @@ function register_root_module(m::Module)
     end
     loaded_modules[key] = m
     module_keys[m] = key
+    end
     nothing
 end
 
@@ -1053,12 +1105,13 @@ using Base
 end
 
 # get a top-level Module from the given key
-root_module(key::PkgId) = loaded_modules[key]
+root_module(key::PkgId) = @lock require_lock loaded_modules[key]
 root_module(where::Module, name::Symbol) =
     root_module(identify_package(where, String(name)))
+maybe_root_module(key::PkgId) = @lock require_lock get(loaded_modules, key, nothing)
 
-root_module_exists(key::PkgId) = haskey(loaded_modules, key)
-loaded_modules_array() = collect(values(loaded_modules))
+root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
+loaded_modules_array() = @lock require_lock collect(values(loaded_modules))
 
 function unreference_module(key::PkgId)
     if haskey(loaded_modules, key)
@@ -1077,7 +1130,7 @@ function _require(pkg::PkgId)
         wait(loading)
         return
     end
-    package_locks[pkg] = Condition()
+    package_locks[pkg] = Threads.Condition(require_lock)
 
     last = toplevel_load[]
     try
@@ -1145,10 +1198,12 @@ function _require(pkg::PkgId)
         if uuid !== old_uuid
             ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
         end
+        unlock(require_lock)
         try
             include(__toplevel__, path)
             return
         finally
+            lock(require_lock)
             if uuid !== old_uuid
                 ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
             end
@@ -1241,7 +1296,7 @@ Base.include # defined in Base.jl
 
 # Full include() implementation which is used after bootstrap
 function _include(mapexpr::Function, mod::Module, _path::AbstractString)
-    @_noinline_meta # Workaround for module availability in _simplify_include_frames
+    @noinline # Workaround for module availability in _simplify_include_frames
     path, prev = _include_dependency(mod, _path)
     for callback in include_callbacks # to preserve order, must come before eval in include_string
         invokelatest(callback, mod, path)
@@ -1290,7 +1345,7 @@ function load_path_setup_code(load_path::Bool=true)
         code *= """
         append!(empty!(Base.LOAD_PATH), $(repr(load_path)))
         ENV["JULIA_LOAD_PATH"] = $(repr(join(load_path, Sys.iswindows() ? ';' : ':')))
-        Base.ACTIVE_PROJECT[] = nothing
+        Base.set_active_project(nothing)
         """
     end
     return code
@@ -1303,7 +1358,7 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
     append!(empty!(Base.DL_LOAD_PATH), dl_load_path)
     append!(empty!(Base.LOAD_PATH), load_path)
     ENV["JULIA_LOAD_PATH"] = join(load_path, Sys.iswindows() ? ';' : ':')
-    Base.ACTIVE_PROJECT[] = nothing
+    set_active_project(nothing)
     Base._track_dependencies[] = true
     get!(Base.PkgOrigin, Base.pkgorigins, pkg).path = input
     append!(empty!(Base._concrete_dependencies), concrete_deps)
@@ -1345,8 +1400,8 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, concrete_d
     for (pkg, build_id) in concrete_deps
         push!(deps_strs, "$(pkg_str(pkg)) => $(repr(build_id))")
     end
-    deps = repr(eltype(concrete_deps)) * "[" * join(deps_strs, ",") * "]"
-
+    deps_eltype = sprint(show, eltype(concrete_deps); context = :module=>nothing)
+    deps = deps_eltype * "[" * join(deps_strs, ",") * "]"
     trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])` : ``
     io = open(pipeline(`$(julia_cmd()::Cmd) -O0
                        --output-ji $output --output-incremental=yes
@@ -1435,8 +1490,8 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             open(tmppath, "a+") do f
                 write(f, _crc32c(seekstart(f)))
             end
-            # inherit permission from the source file
-            chmod(tmppath, filemode(path) & 0o777)
+            # inherit permission from the source file (and make them writable)
+            chmod(tmppath, filemode(path) & 0o777 | 0o200)
 
             # Read preferences hash back from .ji file (we can't precompute because
             # we don't actually know what the list of compile-time preferences are without compiling)
@@ -1652,7 +1707,8 @@ function srctext_files(f::IO, srctextpos::Int64)
 end
 
 # Test to see if this UUID is mentioned in this `Project.toml`; either as
-# the top-level UUID (e.g. that of the project itself) or as a dependency.
+# the top-level UUID (e.g. that of the project itself), as a dependency,
+# or as a extra for Preferences.
 function get_uuid_name(project::Dict{String, Any}, uuid::UUID)
     uuid_p = get(project, "uuid", nothing)::Union{Nothing, String}
     name = get(project, "name", nothing)::Union{Nothing, String}
@@ -1667,6 +1723,16 @@ function get_uuid_name(project::Dict{String, Any}, uuid::UUID)
             end
         end
     end
+    for subkey in ("deps", "extras")
+        subsection = get(project, subkey, nothing)::Union{Nothing, Dict{String, Any}}
+        if subsection !== nothing
+            for (k, v) in subsection
+                if uuid == UUID(v::String)
+                    return k
+                end
+            end
+        end
+    end
     return nothing
 end
 
@@ -1793,7 +1859,7 @@ get_compiletime_preferences(::Nothing) = String[]
 
 # returns true if it "cachefile.ji" is stale relative to "modpath.jl"
 # otherwise returns the list of dependencies to also check
-function stale_cachefile(modpath::String, cachefile::String)
+function stale_cachefile(modpath::String, cachefile::String; ignore_loaded = false)
     io = open(cachefile, "r")
     try
         if !isvalid_cache_header(io)
@@ -1814,11 +1880,15 @@ function stale_cachefile(modpath::String, cachefile::String)
                 M = root_module(req_key)
                 if PkgId(M) == req_key && module_build_id(M) === req_build_id
                     depmods[i] = M
+                elseif ignore_loaded
+                    # Used by Pkg.precompile given that there it's ok to precompile different versions of loaded packages
+                    @goto locate_branch
                 else
                     @debug "Rejecting cache file $cachefile because module $req_key is already loaded and incompatible."
                     return true # Won't be able to fulfill dependency
                 end
             else
+                @label locate_branch
                 path = locate_package(req_key)
                 get!(PkgOrigin, pkgorigins, req_key).path = path
                 if path === nothing
@@ -1862,8 +1932,9 @@ function stale_cachefile(modpath::String, cachefile::String)
                 f, ftime_req = chi.filename, chi.mtime
                 # Issue #13606: compensate for Docker images rounding mtimes
                 # Issue #20837: compensate for GlusterFS truncating mtimes to microseconds
+                # The `ftime != 1.0` condition below provides compatibility with Nix mtime.
                 ftime = mtime(f)
-                if ftime != ftime_req && ftime != floor(ftime_req) && ftime != trunc(ftime_req, digits=6)
+                if ftime != ftime_req && ftime != floor(ftime_req) && ftime != trunc(ftime_req, digits=6) && ftime != 1.0
                     @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed"
                     return true
                 end
@@ -1926,11 +1997,13 @@ function precompile(@nospecialize(f), args::Tuple)
     precompile(Tuple{Core.Typeof(f), args...})
 end
 
+const ENABLE_PRECOMPILE_WARNINGS = Ref(false)
 function precompile(argt::Type)
-    if ccall(:jl_compile_hint, Int32, (Any,), argt) == 0
+    ret = ccall(:jl_compile_hint, Int32, (Any,), argt) != 0
+    if !ret && ENABLE_PRECOMPILE_WARNINGS[]
         @warn "Inactive precompile statement" maxlog=100 form=argt _module=nothing _file=nothing _line=0
     end
-    true
+    return ret
 end
 
 precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
diff --git a/base/lock.jl b/base/lock.jl
index 07253211984fc..f3fdd8822dae2 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -26,11 +26,22 @@ end
 ```
 """
 mutable struct ReentrantLock <: AbstractLock
-    locked_by::Union{Task, Nothing}
-    cond_wait::ThreadSynchronizer
-    reentrancy_cnt::Int
-
-    ReentrantLock() = new(nothing, ThreadSynchronizer(), 0)
+    # offset = 16
+    @atomic locked_by::Union{Task, Nothing}
+    # offset32 = 20, offset64 = 24
+    reentrancy_cnt::UInt32
+    # offset32 = 24, offset64 = 28
+    @atomic havelock::UInt8 # 0x0 = none, 0x1 = lock, 0x2 = conflict
+    # offset32 = 28, offset64 = 32
+    cond_wait::ThreadSynchronizer # 2 words
+    # offset32 = 36, offset64 = 48
+    # sizeof32 = 20, sizeof64 = 32
+    # now add padding to make this a full cache line to minimize false sharing between objects
+    _::NTuple{Int === Int32 ? 2 : 3, Int}
+    # offset32 = 44, offset64 = 72 == sizeof+offset
+    # sizeof32 = 28, sizeof64 = 56
+
+    ReentrantLock() = new(nothing, 0x0000_0000, 0x00, ThreadSynchronizer())
 end
 
 assert_havelock(l::ReentrantLock) = assert_havelock(l, l.locked_by)
@@ -42,7 +53,7 @@ Check whether the `lock` is held by any task/thread.
 This should not be used for synchronization (see instead [`trylock`](@ref)).
 """
 function islocked(rl::ReentrantLock)
-    return rl.reentrancy_cnt != 0
+    return rl.havelock != 0
 end
 
 """
@@ -55,23 +66,26 @@ return `false`.
 
 Each successful `trylock` must be matched by an [`unlock`](@ref).
 """
-function trylock(rl::ReentrantLock)
-    t = current_task()
-    if t === rl.locked_by
-        rl.reentrancy_cnt += 1
+@inline function trylock(rl::ReentrantLock)
+    ct = current_task()
+    if rl.locked_by === ct
+        #@assert rl.havelock !== 0x00
+        rl.reentrancy_cnt += 0x0000_0001
         return true
     end
-    lock(rl.cond_wait)
-    if rl.reentrancy_cnt == 0
-        rl.locked_by = t
-        rl.reentrancy_cnt = 1
-        GC.disable_finalizers()
-        got = true
-    else
-        got = false
+    return _trylock(rl, ct)
+end
+@noinline function _trylock(rl::ReentrantLock, ct::Task)
+    GC.disable_finalizers()
+    if (@atomicreplace :acquire rl.havelock 0x00 => 0x01).success
+        #@assert rl.locked_by === nothing
+        #@assert rl.reentrancy_cnt === 0
+        rl.reentrancy_cnt = 0x0000_0001
+        @atomic :release rl.locked_by = ct
+        return true
     end
-    unlock(rl.cond_wait)
-    return got
+    GC.enable_finalizers()
+    return false
 end
 
 """
@@ -83,28 +97,23 @@ wait for it to become available.
 
 Each `lock` must be matched by an [`unlock`](@ref).
 """
-function lock(rl::ReentrantLock)
-    t = current_task()
-    if t === rl.locked_by
-        rl.reentrancy_cnt += 1
-    else
-        lock(rl.cond_wait)
-        while true
-            if rl.reentrancy_cnt == 0
-                rl.locked_by = t
-                rl.reentrancy_cnt = 1
-                GC.disable_finalizers()
-                break
-            end
-            try
-                wait(rl.cond_wait)
-            catch
-                unlock(rl.cond_wait)
-                rethrow()
+@inline function lock(rl::ReentrantLock)
+    trylock(rl) || (@noinline function slowlock(rl::ReentrantLock)
+        c = rl.cond_wait
+        lock(c.lock)
+        try
+            while true
+                if (@atomicreplace rl.havelock 0x01 => 0x02).old == 0x00 # :sequentially_consistent ? # now either 0x00 or 0x02
+                    # it was unlocked, so try to lock it ourself
+                    _trylock(rl, current_task()) && break
+                else # it was locked, so now wait for the release to notify us
+                    wait(c)
+                end
             end
+        finally
+            unlock(c.lock)
         end
-        unlock(rl.cond_wait)
-    end
+    end)(rl)
     return
 end
 
@@ -116,58 +125,42 @@ Releases ownership of the `lock`.
 If this is a recursive lock which has been acquired before, decrement an
 internal counter and return immediately.
 """
-function unlock(rl::ReentrantLock)
-    t = current_task()
-    n = rl.reentrancy_cnt
-    n == 0 && error("unlock count must match lock count")
-    rl.locked_by === t || error("unlock from wrong thread")
-    if n > 1
-        rl.reentrancy_cnt = n - 1
-    else
-        lock(rl.cond_wait)
-        rl.reentrancy_cnt = 0
-        rl.locked_by = nothing
-        if !isempty(rl.cond_wait.waitq)
-            try
-                notify(rl.cond_wait)
-            catch
-                unlock(rl.cond_wait)
-                rethrow()
+@inline function unlock(rl::ReentrantLock)
+    rl.locked_by === current_task() ||
+        error(rl.reentrancy_cnt == 0x0000_0000 ? "unlock count must match lock count" : "unlock from wrong thread")
+    (@noinline function _unlock(rl::ReentrantLock)
+        n = rl.reentrancy_cnt - 0x0000_0001
+        rl.reentrancy_cnt = n
+        if n == 0x0000_00000
+            @atomic :monotonic rl.locked_by = nothing
+            if (@atomicswap :release rl.havelock = 0x00) == 0x02
+                (@noinline function notifywaiters(rl)
+                    cond_wait = rl.cond_wait
+                    lock(cond_wait)
+                    try
+                        notify(cond_wait)
+                    finally
+                        unlock(cond_wait)
+                    end
+                end)(rl)
             end
+            return true
         end
-        GC.enable_finalizers()
-        unlock(rl.cond_wait)
-    end
-    return
+        return false
+    end)(rl) && GC.enable_finalizers()
+    nothing
 end
 
 function unlockall(rl::ReentrantLock)
-    t = current_task()
-    n = rl.reentrancy_cnt
-    rl.locked_by === t || error("unlock from wrong thread")
-    n == 0 && error("unlock count must match lock count")
-    lock(rl.cond_wait)
-    rl.reentrancy_cnt = 0
-    rl.locked_by = nothing
-    if !isempty(rl.cond_wait.waitq)
-        try
-            notify(rl.cond_wait)
-        catch
-            unlock(rl.cond_wait)
-            rethrow()
-        end
-    end
-    GC.enable_finalizers()
-    unlock(rl.cond_wait)
+    n = @atomicswap :not_atomic rl.reentrancy_cnt = 0x0000_0001
+    unlock(rl)
     return n
 end
 
-function relockall(rl::ReentrantLock, n::Int)
-    t = current_task()
+function relockall(rl::ReentrantLock, n::UInt32)
     lock(rl)
-    n1 = rl.reentrancy_cnt
-    rl.reentrancy_cnt = n
-    n1 == 1 || concurrency_violation()
+    old = @atomicswap :not_atomic rl.reentrancy_cnt = n
+    old == 0x0000_0001 || concurrency_violation()
     return
 end
 
@@ -180,6 +173,9 @@ available.
 
 When this function returns, the `lock` has been released, so the caller should
 not attempt to `unlock` it.
+
+!!! compat "Julia 1.7"
+    Using a [`Channel`](@ref) as the second argument requires Julia 1.7 or later.
 """
 function lock(f, l::AbstractLock)
     lock(l)
@@ -342,7 +338,7 @@ end
     Event()
 
 Create a level-triggered event source. Tasks that call [`wait`](@ref) on an
-`Event` are suspended and queued until `notify` is called on the `Event`.
+`Event` are suspended and queued until [`notify`](@ref) is called on the `Event`.
 After `notify` is called, the `Event` remains in a signaled state and
 tasks will no longer block when waiting for it.
 
diff --git a/base/locks-mt.jl b/base/locks-mt.jl
index 41e1ef33c574c..7ede8704ec498 100644
--- a/base/locks-mt.jl
+++ b/base/locks-mt.jl
@@ -27,46 +27,19 @@ contending threads. If you have more contention than that, different
 synchronization approaches should be considered.
 """
 mutable struct SpinLock <: AbstractLock
-    owned::Int
+    # we make this much larger than necessary to minimize false-sharing
+    @atomic owned::Int
     SpinLock() = new(0)
 end
 
-import Base.Sys.WORD_SIZE
-
-@eval _xchg!(x::SpinLock, v::Int) =
-    llvmcall($"""
-             %ptr = inttoptr i$WORD_SIZE %0 to i$WORD_SIZE*
-             %rv = atomicrmw xchg i$WORD_SIZE* %ptr, i$WORD_SIZE %1 acq_rel
-             ret i$WORD_SIZE %rv
-             """, Int, Tuple{Ptr{Int}, Int}, unsafe_convert(Ptr{Int}, pointer_from_objref(x)), v)
-
-@eval _get(x::SpinLock) =
-    llvmcall($"""
-             %ptr = inttoptr i$WORD_SIZE %0 to i$WORD_SIZE*
-             %rv = load atomic i$WORD_SIZE, i$WORD_SIZE* %ptr monotonic, align $(gc_alignment(Int))
-             ret i$WORD_SIZE %rv
-             """, Int, Tuple{Ptr{Int}}, unsafe_convert(Ptr{Int}, pointer_from_objref(x)))
-
-@eval _set!(x::SpinLock, v::Int) =
-    llvmcall($"""
-             %ptr = inttoptr i$WORD_SIZE %0 to i$WORD_SIZE*
-             store atomic i$WORD_SIZE %1, i$WORD_SIZE* %ptr release, align $(gc_alignment(Int))
-             ret void
-             """, Cvoid, Tuple{Ptr{Int}, Int}, unsafe_convert(Ptr{Int}, pointer_from_objref(x)), v)
-
 # Note: this cannot assert that the lock is held by the correct thread, because we do not
 # track which thread locked it. Users beware.
 Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_violation()
 
 function lock(l::SpinLock)
     while true
-        if _get(l) == 0
-            GC.disable_finalizers()
-            p = _xchg!(l, 1)
-            if p == 0
-                return
-            end
-            GC.enable_finalizers()
+        if @inline trylock(l)
+            return
         end
         ccall(:jl_cpu_pause, Cvoid, ())
         # Temporary solution before we have gc transition support in codegen.
@@ -75,9 +48,9 @@ function lock(l::SpinLock)
 end
 
 function trylock(l::SpinLock)
-    if _get(l) == 0
+    if l.owned == 0
         GC.disable_finalizers()
-        p = _xchg!(l, 1)
+        p = @atomicswap :acquire l.owned = 1
         if p == 0
             return true
         end
@@ -87,13 +60,14 @@ function trylock(l::SpinLock)
 end
 
 function unlock(l::SpinLock)
-    _get(l) == 0 && error("unlock count must match lock count")
-    _set!(l, 0)
+    if (@atomicswap :release l.owned = 0) == 0
+        error("unlock count must match lock count")
+    end
     GC.enable_finalizers()
     ccall(:jl_cpu_wake, Cvoid, ())
     return
 end
 
 function islocked(l::SpinLock)
-    return _get(l) != 0
+    return l.owned != 0
 end
diff --git a/base/logging.jl b/base/logging.jl
index fd35f547c82a6..731b203a950ba 100644
--- a/base/logging.jl
+++ b/base/logging.jl
@@ -79,15 +79,15 @@ function _invoked_shouldlog(logger, level, _module, group, id)
         shouldlog,
         Tuple{typeof(logger), typeof(level), typeof(_module), typeof(group), typeof(id)},
         logger, level, _module, group, id
-    )
+    )::Bool
 end
 
 function _invoked_min_enabled_level(@nospecialize(logger))
-    return invoke(min_enabled_level, Tuple{typeof(logger)}, logger)
+    return invoke(min_enabled_level, Tuple{typeof(logger)}, logger)::LogLevel
 end
 
 function _invoked_catch_exceptions(@nospecialize(logger))
-    return invoke(catch_exceptions, Tuple{typeof(logger)}, logger)
+    return invoke(catch_exceptions, Tuple{typeof(logger)}, logger)::Bool
 end
 
 """
@@ -670,14 +670,15 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
     buf = IOBuffer()
     stream = logger.stream
     if !isopen(stream)
-        stream = level < Warn ? stdout : stderr
+        stream = stderr
     end
     iob = IOContext(buf, stream)
     levelstr = level == Warn ? "Warning" : string(level)
-    msglines = split(chomp(string(message)::String), '\n')
-    println(iob, "┌ ", levelstr, ": ", msglines[1])
-    for i in 2:length(msglines)
-        println(iob, "│ ", msglines[i])
+    msglines = eachsplit(chomp(string(message)::String), '\n')
+    msg1, rest = Iterators.peel(msglines)
+    println(iob, "┌ ", levelstr, ": ", msg1)
+    for msg in rest
+        println(iob, "│ ", msg)
     end
     for (key, val) in kwargs
         key === :maxlog && continue
diff --git a/base/math.jl b/base/math.jl
index 003d4e0c8f831..820c728a86165 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -41,6 +41,24 @@ end
 
 # non-type specific math functions
 
+@inline function two_mul(x::Float64, y::Float64)
+    if Core.Intrinsics.have_fma(Float64)
+        xy = x*y
+        return xy, fma(x, y, -xy)
+    end
+    return Base.twomul(x,y)
+end
+
+@inline function two_mul(x::T, y::T) where T<: Union{Float16, Float32}
+    if Core.Intrinsics.have_fma(T)
+        xy = x*y
+        return xy, fma(x, y, -xy)
+    end
+    xy = widen(x)*y
+    Txy = T(xy)
+    return Txy, T(xy-Txy)
+end
+
 """
     clamp(x, lo, hi)
 
@@ -49,6 +67,9 @@ are promoted to a common type.
 
 See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
 
+!!! compat "Julia 1.3"
+    `missing` as the first argument requires at least Julia 1.3.
+
 # Examples
 ```jldoctest
 julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
@@ -98,6 +119,9 @@ clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
 Restrict values in `array` to the specified range, in-place.
 See also [`clamp`](@ref).
 
+!!! compat "Julia 1.3"
+    `missing` entries in `array` require at least Julia 1.3.
+
 # Examples
 ```jldoctest
 julia> row = collect(-4:4)';
@@ -272,8 +296,7 @@ end
     hi, lo = p[end], zero(x)
     for i in length(p)-1:-1:1
         pi = p[i]
-        prod = hi*x
-        err = fma(hi, x, -prod)
+        prod, err = two_mul(hi,x)
         hi = pi+prod
         lo = fma(lo, x, prod - (hi - pi) + err)
     end
@@ -680,7 +703,7 @@ function _hypot(x, y)
     end
     h = sqrt(muladd(ax, ax, ay*ay))
     # This branch is correctly rounded but requires a native hardware fma.
-    if Base.Math.FMA_NATIVE
+    if Core.Intrinsics.have_fma(typeof(h))
         hsquared = h*h
         axsquared = ax*ax
         h -= (fma(-ay, ay, hsquared-axsquared) + fma(h, h,-hsquared) - fma(ax, ax, -axsquared))/(2*h)
@@ -696,7 +719,20 @@ function _hypot(x, y)
     end
     return h*scale*oneunit(axu)
 end
-_hypot(x::Float16, y::Float16) = Float16(_hypot(Float32(x), Float32(y)))
+@inline function _hypot(x::Float32, y::Float32)
+    if isinf(x) || isinf(y)
+        return Inf32
+    end
+    _x, _y = Float64(x), Float64(y)
+    return Float32(sqrt(muladd(_x, _x, _y*_y)))
+end
+@inline function _hypot(x::Float16, y::Float16)
+    if isinf(x) || isinf(y)
+        return Inf16
+    end
+    _x, _y = Float32(x), Float32(y)
+    return Float16(sqrt(muladd(_x, _x, _y*_y)))
+end
 _hypot(x::ComplexF16, y::ComplexF16) = Float16(_hypot(ComplexF32(x), ComplexF32(y)))
 
 function _hypot(x...)
@@ -812,17 +848,24 @@ end
 """
     significand(x)
 
-Extract the `significand(s)` (a.k.a. mantissa), in binary representation, of a
-floating-point number. If `x` is a non-zero finite number, then the result will be
-a number of the same type on the interval ``[1,2)``. Otherwise `x` is returned.
+Extract the significand (a.k.a. mantissa) of a floating-point number. If `x` is
+a non-zero finite number, then the result will be a number of the same type and
+sign as `x`, and whose absolute value is on the interval ``[1,2)``. Otherwise
+`x` is returned.
 
 # Examples
 ```jldoctest
-julia> significand(15.2)/15.2
-0.125
+julia> significand(15.2)
+1.9
+
+julia> significand(-15.2)
+-1.9
 
-julia> significand(15.2)*8
-15.2
+julia> significand(-15.2) * 2^3
+-15.2
+
+julia> significand(-Inf), significand(Inf), significand(NaN)
+(-Inf, Inf, NaN)
 ```
 """
 function significand(x::T) where T<:IEEEFloat
@@ -867,11 +910,39 @@ function frexp(x::T) where T<:IEEEFloat
     return reinterpret(T, xu), k
 end
 
-rem(x::Float64, y::Float64, ::RoundingMode{:Nearest}) =
-    ccall((:remainder, libm),Float64,(Float64,Float64),x,y)
-rem(x::Float32, y::Float32, ::RoundingMode{:Nearest}) =
-    ccall((:remainderf, libm),Float32,(Float32,Float32),x,y)
-rem(x::Float16, y::Float16, r::RoundingMode{:Nearest}) = Float16(rem(Float32(x), Float32(y), r))
+# NOTE: This `rem` method is adapted from the msun `remainder` and `remainderf`
+# functions, which are under the following license:
+#
+# Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+#
+# Developed at SunSoft, a Sun Microsystems, Inc. business.
+# Permission to use, copy, modify, and distribute this
+# software is freely granted, provided that this notice
+# is preserved.
+function rem(x::T, p::T, ::RoundingMode{:Nearest}) where T<:IEEEFloat
+    (iszero(p) || !isfinite(x) || isnan(p)) && return T(NaN)
+    x == p && return copysign(zero(T), x)
+    oldx = x
+    x = abs(rem(x, 2p))  # 2p may overflow but that's okay
+    p = abs(p)
+    if p < 2 * floatmin(T)  # Check whether dividing p by 2 will underflow
+        if 2x > p
+            x -= p
+            if 2x >= p
+                x -= p
+            end
+        end
+    else
+        p_half = p / 2
+        if x > p_half
+            x -= p
+            if x >= p_half
+                x -= p
+            end
+        end
+    end
+    return flipsign(x, oldx)
+end
 
 
 """
@@ -891,49 +962,67 @@ julia> modf(-3.5)
 """
 modf(x) = isinf(x) ? (flipsign(zero(x), x), x) : (rem(x, one(x)), trunc(x))
 
-function modf(x::Float32)
-    temp = Ref{Float32}()
-    f = ccall((:modff, libm), Float32, (Float32, Ptr{Float32}), x, temp)
-    f, temp[]
+function modf(x::T) where T<:IEEEFloat
+    isinf(x) && return (copysign(zero(T), x), x)
+    ix = trunc(x)
+    rx = copysign(x - ix, x)
+    return (rx, ix)
 end
 
-function modf(x::Float64)
-    temp = Ref{Float64}()
-    f = ccall((:modf, libm), Float64, (Float64, Ptr{Float64}), x, temp)
-    f, temp[]
+function ^(x::Float64, y::Float64)
+    yint = unsafe_trunc(Int, y) # Note, this is actually safe since julia freezes the result
+    y == yint && return x^yint
+    x<0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
+    x == 1 && return 1.0
+    !isfinite(x) && return x*(y>0 || isnan(x))
+    x==0 && return abs(y)*Inf*(!(y>0))
+    logxhi,logxlo = Base.Math._log_ext(x)
+    xyhi = logxhi*y
+    xylo = logxlo*y
+    hi = xyhi+xylo
+    return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
+end
+function ^(x::T, y::T) where T <: Union{Float16, Float32}
+    yint = unsafe_trunc(Int64, y) # Note, this is actually safe since julia freezes the result
+    y == yint && return x^yint
+    x < 0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
+    x == 1 && return one(T)
+    !isfinite(x) && return x*(y>0 || isnan(x))
+    x==0 && return abs(y)*T(Inf)*(!(y>0))
+    return T(exp2(log2(abs(widen(x))) * y))
 end
 
-@inline function ^(x::Float64, y::Float64)
-    z = ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, y)
-    if isnan(z) & !isnan(x+y)
-        throw_exp_domainerror(x)
+# compensated power by squaring
+function ^(x::Float64, n::Integer)
+    n == 0 && return one(x)
+    y = 1.0
+    xnlo = ynlo = 0.0
+    if n < 0
+        rx = inv(x)
+        n==-2 && return rx*rx #keep compatability with literal_pow
+        isfinite(x) && (xnlo = -fma(x, rx, -1.) * rx)
+        x = rx
+        n = -n
     end
-    z
-end
-@inline function ^(x::Float32, y::Float32)
-    z = ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, y)
-    if isnan(z) & !isnan(x+y)
-        throw_exp_domainerror(x)
+    n == 3 && return x*x*x # keep compatibility with literal_pow
+    while n > 1
+        if n&1 > 0
+            err = muladd(y, xnlo, x*ynlo)
+            y, ynlo = two_mul(x,y)
+            ynlo += err
+        end
+        err = x*2*xnlo
+        x, xnlo = two_mul(x, x)
+        xnlo += err
+        n >>>= 1
     end
-    z
-end
-@inline ^(x::Float16, y::Float16) = Float16(Float32(x)^Float32(y))  # TODO: optimize
-
-@inline function ^(x::Float64, y::Integer)
-    y == -1 && return inv(x)
-    y == 0 && return one(x)
-    y == 1 && return x
-    y == 2 && return x*x
-    y == 3 && return x*x*x
-    ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, Float64(y))
+    !isfinite(x) && return x*y
+    return muladd(x, y, muladd(y, xnlo, x*ynlo))
 end
-@inline function ^(x::Float32, y::Integer)
-    y == -1 && return inv(x)
-    y == 0 && return one(x)
-    y == 1 && return x
-    y == 2 && return x*x
-    y == 3 && return x*x*x
-    ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, Float32(y))
+function ^(x::Float32, n::Integer)
+    n < 0 && return inv(x)^(-n)
+    n == 3 && return x*x*x #keep compatibility with literal_pow
+    Float32(Base.power_by_squaring(Float64(x),n))
 end
 @inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ y)
 @inline literal_pow(::typeof(^), x::Float16, ::Val{p}) where {p} = Float16(literal_pow(^,Float32(x),Val(p)))
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index 156dc9e1ce39a..3bb4bb52ad07f 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -76,7 +76,7 @@ julia> Base.MathConstants.eulergamma
 julia> dx = 10^-6;
 
 julia> sum(-exp(-x) * log(x) for x in dx:dx:100) * dx
-0.5772078382499134
+0.5772078382499133
 ```
 """
 γ, const eulergamma = γ
@@ -123,4 +123,10 @@ Base.literal_pow(::typeof(^), ::Irrational{:ℯ}, ::Val{p}) where {p} = exp(p)
 Base.log(::Irrational{:ℯ}) = 1 # use 1 to correctly promote expressions like log(x)/log(ℯ)
 Base.log(::Irrational{:ℯ}, x::Number) = log(x)
 
+Base.sin(::Irrational{:π}) = 0.0
+Base.cos(::Irrational{:π}) = -1.0
+Base.sincos(::Irrational{:π}) = (0.0, -1.0)
+Base.tan(::Irrational{:π}) = 0.0
+Base.cot(::Irrational{:π}) = -1/0
+
 end # module
diff --git a/base/meta.jl b/base/meta.jl
index b483630a92f8f..649ffe9d1a19c 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -440,7 +440,7 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                 @assert isa(arg, Union{GlobalRef, Symbol})
                 return x
             end
-        elseif !is_meta_expr_head(head)
+        elseif !Core.Compiler.is_meta_expr_head(head)
             partially_inline!(x.args, slot_replacements, type_signature, static_param_values,
                               slot_offset, statement_offset, boundscheck)
         end
@@ -450,6 +450,4 @@ end
 
 _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals)
 
-is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo)
-
 end # module
diff --git a/base/mpfr.jl b/base/mpfr.jl
index a53852626f42c..e85f281619ac0 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -11,7 +11,7 @@ import
         inv, exp, exp2, exponent, factorial, floor, fma, hypot, isinteger,
         isfinite, isinf, isnan, ldexp, log, log2, log10, max, min, mod, modf,
         nextfloat, prevfloat, promote_rule, rem, rem2pi, round, show, float,
-        sum, sqrt, string, print, trunc, precision, exp10, expm1, log1p,
+        sum, sqrt, string, print, trunc, precision, _precision, exp10, expm1, log1p,
         eps, signbit, sign, sin, cos, sincos, tan, sec, csc, cot, acos, asin, atan,
         cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh, lerpi,
         cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
@@ -181,7 +181,7 @@ widen(::Type{Float64}) = BigFloat
 widen(::Type{BigFloat}) = BigFloat
 
 function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
-    if precision == MPFR.precision(x)
+    if precision == _precision(x)
         return x
     else
         z = BigFloat(;precision=precision)
@@ -192,7 +192,7 @@ function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::I
 end
 
 function _duplicate(x::BigFloat)
-    z = BigFloat(;precision=precision(x))
+    z = BigFloat(;precision=_precision(x))
     ccall((:mpfr_set, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Int32), z, x, 0)
     return z
 end
@@ -338,8 +338,23 @@ Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
     _cpynansgn(ccall((:mpfr_get_flt,:libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
 Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
 
-# TODO: avoid double rounding
-Float16(x::BigFloat) = Float16(Float64(x))
+function Float16(x::BigFloat) :: Float16
+    res = Float32(x)
+    resi = reinterpret(UInt32, res)
+    if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
+        #shift so that the mantissa lines up where it would for normal Float16
+        shift = 113-((resi & 0x7f800000)>>23)
+        if shift<23
+            resi |= 0x0080_0000 # set implicit bit
+            resi >>= shift
+        end
+    end
+    if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
+        # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
+        res = nextfloat(res, cmp(x, res))
+    end
+    return res
+end
 
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
 promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat
@@ -792,37 +807,37 @@ function sign(x::BigFloat)
     return c < 0 ? -one(x) : one(x)
 end
 
-function precision(x::BigFloat)  # precision of an object of type BigFloat
+function _precision(x::BigFloat)  # precision of an object of type BigFloat
     return ccall((:mpfr_get_prec, :libmpfr), Clong, (Ref{BigFloat},), x)
 end
+precision(x::BigFloat; base::Integer=2) = _precision(x, base)
 
-"""
-    precision(BigFloat)
-
-Get the precision (in bits) currently used for [`BigFloat`](@ref) arithmetic.
-"""
-precision(::Type{BigFloat}) = Int(DEFAULT_PRECISION[]) # precision of the type BigFloat itself
+_precision(::Type{BigFloat}) = Int(DEFAULT_PRECISION[]) # default precision of the type BigFloat itself
 
 """
-    setprecision([T=BigFloat,] precision::Int)
+    setprecision([T=BigFloat,] precision::Int; base=2)
 
-Set the precision (in bits) to be used for `T` arithmetic.
+Set the precision (in bits, by default) to be used for `T` arithmetic.
+If `base` is specified, then the precision is the minimum required to give
+at least `precision` digits in the given `base`.
 
 !!! warning
 
     This function is not thread-safe. It will affect code running on all threads, but
     its behavior is undefined if called concurrently with computations that use the
     setting.
+
+!!! compat "Julia 1.8"
+    The `base` keyword requires at least Julia 1.8.
 """
-function setprecision(::Type{BigFloat}, precision::Integer)
-    if precision < 2
-        throw(DomainError(precision, "`precision` cannot be less than 2."))
-    end
-    DEFAULT_PRECISION[] = precision
+function setprecision(::Type{BigFloat}, precision::Integer; base::Integer=2)
+    base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
+    precision > 0 || throw(DomainError(precision, "`precision` cannot be less than 1."))
+    DEFAULT_PRECISION[] = base == 2 ? precision : ceil(Int, precision * log2(base))
     return precision
 end
 
-setprecision(precision::Integer) = setprecision(BigFloat, precision)
+setprecision(precision::Integer; base::Integer=2) = setprecision(BigFloat, precision; base)
 
 maxintfloat(x::BigFloat) = BigFloat(2)^precision(x)
 maxintfloat(::Type{BigFloat}) = BigFloat(2)^precision(BigFloat)
@@ -916,9 +931,9 @@ floatmin(::Type{BigFloat}) = nextfloat(zero(BigFloat))
 floatmax(::Type{BigFloat}) = prevfloat(BigFloat(Inf))
 
 """
-    setprecision(f::Function, [T=BigFloat,] precision::Integer)
+    setprecision(f::Function, [T=BigFloat,] precision::Integer; base=2)
 
-Change the `T` arithmetic precision (in bits) for the duration of `f`.
+Change the `T` arithmetic precision (in the given `base`) for the duration of `f`.
 It is logically equivalent to:
 
     old = precision(BigFloat)
@@ -929,11 +944,14 @@ It is logically equivalent to:
 Often used as `setprecision(T, precision) do ... end`
 
 Note: `nextfloat()`, `prevfloat()` do not use the precision mentioned by
-`setprecision`
+`setprecision`.
+
+!!! compat "Julia 1.8"
+    The `base` keyword requires at least Julia 1.8.
 """
-function setprecision(f::Function, ::Type{T}, prec::Integer) where T
+function setprecision(f::Function, ::Type{T}, prec::Integer; kws...) where T
     old_prec = precision(T)
-    setprecision(T, prec)
+    setprecision(T, prec; kws...)
     try
         return f()
     finally
@@ -941,7 +959,7 @@ function setprecision(f::Function, ::Type{T}, prec::Integer) where T
     end
 end
 
-setprecision(f::Function, prec::Integer) = setprecision(f, BigFloat, prec)
+setprecision(f::Function, prec::Integer; base::Integer=2) = setprecision(f, BigFloat, prec; base)
 
 function string_mpfr(x::BigFloat, fmt::String)
     pc = Ref{Ptr{UInt8}}()
@@ -962,7 +980,7 @@ function string_mpfr(x::BigFloat, fmt::String)
 end
 
 function _prettify_bigfloat(s::String)::String
-    mantissa, exponent = split(s, 'e')
+    mantissa, exponent = eachsplit(s, 'e')
     if !occursin('.', mantissa)
         mantissa = string(mantissa, '.')
     end
@@ -973,7 +991,7 @@ function _prettify_bigfloat(s::String)::String
     expo = parse(Int, exponent)
     if -5 < expo < 6
         expo == 0 && return mantissa
-        int, frac = split(mantissa, '.')
+        int, frac = eachsplit(mantissa, '.')
         if expo > 0
             expo < length(frac) ?
                 string(int, frac[1:expo], '.', frac[expo+1:end]) :
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 8104cddb34387..11bfcd418905f 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -125,11 +125,9 @@ module IteratorsMD
 
     # comparison
     @inline isless(I1::CartesianIndex{N}, I2::CartesianIndex{N}) where {N} = _isless(0, I1.I, I2.I)
-    @inline function _isless(ret, I1::Tuple{Int,Vararg{Int,N}}, I2::Tuple{Int,Vararg{Int,N}}) where {N}
+    @inline function _isless(ret, I1::Tuple{Int,Vararg{Int}}, I2::Tuple{Int,Vararg{Int}})
         newret = ifelse(ret==0, icmp(last(I1), last(I2)), ret)
-        t1, t2 = Base.front(I1), Base.front(I2)
-        # avoid dynamic dispatch by telling the compiler relational invariants
-        return isa(t1, Tuple{}) ? _isless(newret, (), ()) : _isless(newret, t1, t2::Tuple{Int,Vararg{Int}})
+        return _isless(newret, Base.front(I1), Base.front(I2))
     end
     _isless(ret, ::Tuple{}, ::Tuple{}) = ifelse(ret==1, true, false)
     icmp(a, b) = ifelse(isless(a,b), 1, ifelse(a==b, 0, -1))
@@ -353,8 +351,38 @@ module IteratorsMD
     # AbstractArray implementation
     Base.axes(iter::CartesianIndices{N,R}) where {N,R} = map(Base.axes1, iter.indices)
     Base.IndexStyle(::Type{CartesianIndices{N,R}}) where {N,R} = IndexCartesian()
-    @propagate_inbounds function Base.getindex(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
-        CartesianIndex(getindex.(iter.indices, I))
+    # getindex for a 0D CartesianIndices is necessary for disambiguation
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R}
+        CartesianIndex()
+    end
+    @inline function Base.getindex(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
+        # Eagerly do boundscheck before calculating each item of the CartesianIndex so that
+        # we can pass `@inbounds` hint to inside the map and generates more efficient SIMD codes (#42115)
+        @boundscheck checkbounds(iter, I...)
+        index = map(iter.indices, I) do r, i
+            @inbounds getindex(r, i)
+        end
+        CartesianIndex(index)
+    end
+
+    # CartesianIndices act as a multidimensional range, so cartesian indexing of CartesianIndices
+    # with compatible dimensions may be seen as indexing into the component ranges.
+    # This may use the special indexing behavior implemented for ranges to return another CartesianIndices
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{N,R},
+        I::Vararg{Union{OrdinalRange{<:Integer, <:Integer}, Colon}, N}) where {N,R}
+        CartesianIndices(getindex.(iter.indices, I))
+    end
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{N},
+        C::CartesianIndices{N}) where {N}
+        CartesianIndices(getindex.(iter.indices, C.indices))
+    end
+
+    # If dimensions permit, we may index into a CartesianIndices directly instead of constructing a SubArray wrapper
+    @propagate_inbounds function Base.view(c::CartesianIndices{N}, r::Vararg{Union{OrdinalRange{<:Integer, <:Integer}, Colon},N}) where {N}
+        getindex(c, r...)
+    end
+    @propagate_inbounds function Base.view(c::CartesianIndices{N}, C::CartesianIndices{N}) where {N}
+        getindex(c, C)
     end
 
     ndims(R::CartesianIndices) = ndims(typeof(R))
@@ -403,15 +431,13 @@ module IteratorsMD
         valid = __is_valid_range(I, rng) && state[1] != last(rng)
         return valid, (I, )
     end
-    @inline function __inc(state::Tuple{Int,Int,Vararg{Int,N}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt,N}}) where {N}
+    @inline function __inc(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
         I = state[1] + step(rng)
         if __is_valid_range(I, rng) && state[1] != last(rng)
             return true, (I, tail(state)...)
         end
-        t1, t2 = tail(state), tail(indices)
-        # avoid dynamic dispatch by telling the compiler relational invariants
-        valid, I = isa(t1, Tuple{Int}) ? __inc(t1, t2::Tuple{OrdinalRangeInt}) : __inc(t1, t2::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
+        valid, I = __inc(tail(state), tail(indices))
         return valid, (first(rng), I...)
     end
 
@@ -516,15 +542,13 @@ module IteratorsMD
         valid = __is_valid_range(I, rng) && state[1] != first(rng)
         return valid, (I,)
     end
-    @inline function __dec(state::Tuple{Int,Int,Vararg{Int,N}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt,N}}) where {N}
+    @inline function __dec(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
         I = state[1] - step(rng)
         if __is_valid_range(I, rng) && state[1] != first(rng)
             return true, (I, tail(state)...)
         end
-        t1, t2 = tail(state), tail(indices)
-        # avoid dynamic dispatch by telling the compiler relational invariants
-        valid, I = isa(t1, Tuple{Int}) ? __dec(t1, t2::Tuple{OrdinalRangeInt}) : __dec(t1, t2::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
+        valid, I = __dec(tail(state), tail(indices))
         return valid, (last(rng), I...)
     end
 
@@ -627,7 +651,7 @@ using .IteratorsMD
 ## Bounds-checking with CartesianIndex
 # Disallow linear indexing with CartesianIndex
 function checkbounds(::Type{Bool}, A::AbstractArray, i::Union{CartesianIndex, AbstractArray{<:CartesianIndex}})
-    @_inline_meta
+    @inline
     checkbounds_indices(Bool, axes(A), (i,))
 end
 
@@ -856,7 +880,7 @@ end
 
 function _generate_unsafe_getindex!_body(N::Int)
     quote
-        @_inline_meta
+        @inline
         D = eachindex(dest)
         Dy = iterate(D)
         @inbounds @nloops $N j d->I[d] begin
@@ -889,7 +913,7 @@ end
 
 ## setindex! ##
 function _setindex!(l::IndexStyle, A::AbstractArray, x, I::Union{Real, AbstractArray}...)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     _unsafe_setindex!(l, _maybe_reshape(l, A, I...), x, I...)
     A
@@ -970,7 +994,7 @@ function diff(a::AbstractArray{T,N}; dims::Integer) where {T,N}
 end
 function diff(r::AbstractRange{T}; dims::Integer=1) where {T}
     dims == 1 || throw(ArgumentError("dimension $dims out of range (1:1)"))
-    return T[@inbounds r[i+1] - r[i] for i in firstindex(r):lastindex(r)-1]
+    return [@inbounds r[i+1] - r[i] for i in firstindex(r):lastindex(r)-1]
 end
 
 ### from abstractarray.jl
@@ -1097,6 +1121,25 @@ end
 
 Copy the block of `src` in the range of `Rsrc` to the block of `dest`
 in the range of `Rdest`. The sizes of the two regions must match.
+
+# Examples
+```jldoctest
+julia> A = zeros(5, 5);
+
+julia> B = [1 2; 3 4];
+
+julia> Ainds = CartesianIndices((2:3, 2:3));
+
+julia> Binds = CartesianIndices(B);
+
+julia> copyto!(A, Ainds, B, Binds)
+5×5 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  1.0  2.0  0.0  0.0
+ 0.0  3.0  4.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
+```
 """
 copyto!(::AbstractArray, ::CartesianIndices, ::AbstractArray, ::CartesianIndices)
 
@@ -1117,6 +1160,7 @@ See also [`circshift`](@ref).
     dest === src && throw(ArgumentError("dest and src must be separate arrays"))
     inds = axes(src)
     axes(dest) == inds || throw(ArgumentError("indices of src and dest must match (got $inds and $(axes(dest)))"))
+    isempty(src) && return dest
     _circshift!(dest, (), src, (), inds, fill_to_length(shiftamt, 0, Val(N)))
 end
 
@@ -1227,14 +1271,14 @@ end
 
 # contiguous multidimensional indexing: if the first dimension is a range,
 # we can get some performance from using copy_chunks!
-@inline function _unsafe_getindex!(X::BitArray, B::BitArray, I0::Union{UnitRange{Int},Slice})
+@inline function _unsafe_getindex!(X::BitArray, B::BitArray, I0::Union{AbstractUnitRange{Int},Slice})
     copy_chunks!(X.chunks, 1, B.chunks, indexoffset(I0)+1, length(I0))
     return X
 end
 
 # Optimization where the inner dimension is contiguous improves perf dramatically
 @generated function _unsafe_getindex!(X::BitArray, B::BitArray,
-        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Slice}...)
+        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Slice}...)
     N = length(I)
     quote
         $(Expr(:meta, :inline))
@@ -1369,7 +1413,7 @@ end
 # contiguous multidimensional indexing: if the first dimension is a range,
 # we can get some performance from using copy_chunks!
 
-@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::Union{Colon,UnitRange{Int}})
+@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::Union{Colon,AbstractUnitRange{Int}})
     I0 = to_indices(B, (J0,))[1]
     @boundscheck checkbounds(B, I0)
     l0 = length(I0)
@@ -1381,13 +1425,13 @@ end
 end
 
 @inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray},
-        I0::Union{Colon,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Colon}...)
+        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
     J = to_indices(B, (I0, I...))
     @boundscheck checkbounds(B, J...)
     _unsafe_setindex!(B, X, J...)
 end
 @generated function _unsafe_setindex!(B::BitArray, X::Union{StridedArray,BitArray},
-        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Slice}...)
+        I0::Union{Slice,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Slice}...)
     N = length(I)
     quote
         idxlens = @ncall $N index_lengths I0 d->I[d]
@@ -1422,12 +1466,12 @@ end
 end
 
 @propagate_inbounds function setindex!(B::BitArray, X::AbstractArray,
-        I0::Union{Colon,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Colon}...)
+        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
     _setindex!(IndexStyle(B), B, X, to_indices(B, (I0, I...))...)
 end
 
 ## fill! contiguous views of BitArrays with a single value
-function fill!(V::SubArray{Bool, <:Any, <:BitArray, Tuple{AbstractUnitRange{Int}}}, x)
+function fill!(V::SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}}}, x)
     B = V.parent
     I0 = V.indices[1]
     l0 = length(I0)
@@ -1436,7 +1480,7 @@ function fill!(V::SubArray{Bool, <:Any, <:BitArray, Tuple{AbstractUnitRange{Int}
     return V
 end
 
-fill!(V::SubArray{Bool, <:Any, <:BitArray, Tuple{AbstractUnitRange{Int}, Vararg{Union{Int,AbstractUnitRange{Int}}}}}, x) =
+fill!(V::SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}, Vararg{Union{Int,AbstractUnitRange{Int}}}}}, x) =
     _unsafe_fill_indices!(V.parent, x, V.indices...)
 
 @generated function _unsafe_fill_indices!(B::BitArray, x,
@@ -1527,13 +1571,12 @@ for (V, PT, BT) in Any[((:N,), BitArray, BitArray), ((:T,:N), Array, StridedArra
             #Creates offset, because indexing starts at 1
             offset = 1 - sum(@ntuple $N d->strides_{d+1})
 
+            sumc = 0
             ind = 1
-            @nexprs 1 d->(counts_{$N+1} = strides_{$N+1}) # a trick to set counts_($N+1)
             @nloops($N, i, P,
-                    d->(counts_d = strides_d), # PRE
-                    d->(counts_{d+1} += strides_{d+1}), # POST
+                    d->(sumc += i_d*strides_{d+1}), # PRE
+                    d->(sumc -= i_d*strides_{d+1}), # POST
                     begin # BODY
-                        sumc = sum(@ntuple $N d->counts_{d+1})
                         @inbounds P[ind] = B[sumc+offset]
                         ind += 1
                     end)
@@ -1623,7 +1666,7 @@ _unique_dims(A::AbstractArray, dims::Colon) = invoke(unique, Tuple{Any}, A)
             else
                 j_d = i_d
             end) begin
-                if (@nref $N A j) != (@nref $N A i)
+                if !isequal((@nref $N A j), (@nref $N A i))
                     collided[k] = true
                 end
             end
@@ -1653,7 +1696,7 @@ _unique_dims(A::AbstractArray, dims::Colon) = invoke(unique, Tuple{Any}, A)
                         j_d = i_d
                     end
                 end begin
-                    if (@nref $N A j) != (@nref $N A i)
+                    if !isequal((@nref $N A j), (@nref $N A i))
                         nowcollided[k] = true
                     end
                 end
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index 9361fde52619b..d05ad6e10b544 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -165,7 +165,8 @@ function show(io::IO, t::NamedTuple)
         typeinfo = get(io, :typeinfo, Any)
         print(io, "(")
         for i = 1:n
-            print(io, fieldname(typeof(t),i), " = ")
+            show_sym(io, fieldname(typeof(t), i))
+            print(io, " = ")
             show(IOContext(io, :typeinfo =>
                            t isa typeinfo <: NamedTuple ? fieldtype(typeinfo, i) : Any),
                  getfield(t, i))
diff --git a/base/ntuple.jl b/base/ntuple.jl
index a5608dfa927c3..6f70b49481223 100644
--- a/base/ntuple.jl
+++ b/base/ntuple.jl
@@ -32,22 +32,22 @@ julia> ntuple(i -> 2*i, 4)
 end
 
 function _ntuple(f::F, n) where F
-    @_noinline_meta
+    @noinline
     (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
     ([f(i) for i = 1:n]...,)
 end
 
 function ntupleany(f, n)
-    @_noinline_meta
+    @noinline
     (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
     (Any[f(i) for i = 1:n]...,)
 end
 
 # inferrable ntuple (enough for bootstrapping)
 ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@_inline_meta; (f(1),))
-ntuple(f, ::Val{2}) = (@_inline_meta; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@_inline_meta; (f(1), f(2), f(3)))
+ntuple(f, ::Val{1}) = (@inline; (f(1),))
+ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
+ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
 
 """
     ntuple(f, ::Val{N})
diff --git a/base/number.jl b/base/number.jl
index 852c8b715af1d..d3bf14d566250 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -94,12 +94,12 @@ keys(::Number) = OneTo(1)
 
 getindex(x::Number) = x
 function getindex(x::Number, i::Integer)
-    @_inline_meta
+    @inline
     @boundscheck i == 1 || throw(BoundsError())
     x
 end
 function getindex(x::Number, I::Integer...)
-    @_inline_meta
+    @inline
     @boundscheck all(isone, I) || throw(BoundsError())
     x
 end
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
index d14e72db57213..3720a1ef2043c 100644
--- a/base/opaque_closure.jl
+++ b/base/opaque_closure.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 """
     @opaque (args...) -> body
 
diff --git a/base/operators.jl b/base/operators.jl
index 8c1a024dc48e0..5bfa7387ec9b7 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -137,7 +137,7 @@ julia> isequal(missing, missing)
 true
 ```
 """
-isequal(x, y) = x == y
+isequal(x, y) = (x == y)::Bool # all `missing` cases are handled in missing.jl
 
 signequal(x, y) = signbit(x)::Bool == signbit(y)::Bool
 signless(x, y) = signbit(x)::Bool & !signbit(y)::Bool
@@ -429,22 +429,6 @@ const ≥ = >=
 # which is more idiomatic:
 isless(x::Real, y::Real) = x<y
 
-"""
-    ifelse(condition::Bool, x, y)
-
-Return `x` if `condition` is `true`, otherwise return `y`. This differs from `?` or `if` in
-that it is an ordinary function, so all the arguments are evaluated first. In some cases,
-using `ifelse` instead of an `if` statement can eliminate the branch in generated code and
-provide higher performance in tight loops.
-
-# Examples
-```jldoctest
-julia> ifelse(1 > 2, 1, 2)
-2
-```
-"""
-ifelse
-
 """
     cmp(x,y)
 
@@ -652,7 +636,7 @@ for op in (:+, :*, :&, :|, :xor, :min, :max, :kron)
         # note: these definitions must not cause a dispatch loop when +(a,b) is
         # not defined, and must only try to call 2-argument definitions, so
         # that defining +(a,b) is sufficient for full functionality.
-        ($op)(a, b, c, xs...) = afoldl($op, ($op)(($op)(a,b),c), xs...)
+        ($op)(a, b, c, xs...) = (@inline; afoldl($op, ($op)(($op)(a,b),c), xs...))
         # a further concern is that it's easy for a type like (Int,Int...)
         # to match many definitions, so we need to keep the number of
         # definitions down to avoid losing type information.
@@ -717,13 +701,13 @@ julia> bitstring(Int8(12))
 See also [`>>`](@ref), [`>>>`](@ref), [`exp2`](@ref), [`ldexp`](@ref).
 """
 function <<(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     typemin(Int) <= c <= typemax(Int) && return x << (c % Int)
     (x >= 0 || c >= 0) && return zero(x) << 0  # for type stability
     oftype(x, -1)
 end
 function <<(x::Integer, c::Unsigned)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(<<, (x, c)))
     end
@@ -762,7 +746,7 @@ julia> bitstring(Int8(-4))
 See also [`>>>`](@ref), [`<<`](@ref).
 """
 function >>(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(>>, (x, c)))
     end
@@ -800,11 +784,11 @@ is equivalent to [`>>`](@ref).
 See also [`>>`](@ref), [`<<`](@ref).
 """
 function >>>(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     typemin(Int) <= c <= typemax(Int) ? x >>> (c % Int) : zero(x) >>> 0
 end
 function >>>(x::Integer, c::Unsigned)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(>>>, (x, c)))
     end
@@ -995,7 +979,7 @@ julia> f.value
 ```
 
 !!! compat "Julia 1.7"
-    Returns requires at least Julia 1.7.
+    `Returns` requires at least Julia 1.7.
 """
 struct Returns{V} <: Function
     value::V
diff --git a/base/options.jl b/base/options.jl
index 16dc884e8651e..2af8337673b93 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -47,6 +47,8 @@ struct JLOptions
     warn_scope::Int8
     image_codegen::Int8
     rr_detach::Int8
+    strip_metadata::Int8
+    strip_ir::Int8
 end
 
 # This runs early in the sysimage != is not defined yet
diff --git a/base/ordering.jl b/base/ordering.jl
index 65ea7566cac55..e49102159c962 100644
--- a/base/ordering.jl
+++ b/base/ordering.jl
@@ -6,7 +6,7 @@ module Order
 import ..@__MODULE__, ..parentmodule
 const Base = parentmodule(@__MODULE__)
 import .Base:
-    AbstractVector, @propagate_inbounds, isless, identity, getindex,
+    AbstractVector, @propagate_inbounds, isless, identity, getindex, reverse,
     +, -, !, &, <, |
 
 ## notions of element ordering ##
@@ -46,6 +46,14 @@ ReverseOrdering(rev::ReverseOrdering) = rev.fwd
 ReverseOrdering(fwd::Fwd) where {Fwd} = ReverseOrdering{Fwd}(fwd)
 ReverseOrdering() = ReverseOrdering(ForwardOrdering())
 
+"""
+    reverse(o::Base.Ordering)
+
+reverses ordering specified by `o`.
+
+"""
+reverse(o::Ordering) = ReverseOrdering(o)
+
 const DirectOrdering = Union{ForwardOrdering,ReverseOrdering{ForwardOrdering}}
 
 """
diff --git a/base/pair.jl b/base/pair.jl
index 7481d50b7458b..b5dffbb4e7e86 100644
--- a/base/pair.jl
+++ b/base/pair.jl
@@ -1,18 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct Pair{A, B}
-    first::A
-    second::B
-    function Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B}
-        @_inline_meta
-        # if we didn't inline this, it's probably because the callsite was actually dynamic
-        # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
-        # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
-        # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
-        return new(a, b)
-    end
-end
-Pair(a, b) = Pair{typeof(a), typeof(b)}(a, b)
 const => = Pair
 
 """
diff --git a/base/parse.jl b/base/parse.jl
index 1097e8a19b804..1c911c96e1479 100644
--- a/base/parse.jl
+++ b/base/parse.jl
@@ -194,10 +194,10 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},
     orig_end   = endpos
 
     # Ignore leading and trailing whitespace
-    while isspace(sbuff[startpos]) && startpos <= endpos
+    while startpos <= endpos && isspace(sbuff[startpos])
         startpos = nextind(sbuff, startpos)
     end
-    while isspace(sbuff[endpos]) && endpos >= startpos
+    while endpos >= startpos && isspace(sbuff[endpos])
         endpos = prevind(sbuff, endpos)
     end
 
diff --git a/base/path.jl b/base/path.jl
index 013ed5db26a5a..454fe5bd65d32 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -36,7 +36,7 @@ elseif Sys.iswindows()
 
     function splitdrive(path::String)
         m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"s, path)
-        String(m.captures[1]), String(m.captures[2])
+        String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
     error("path primitives for this OS need to be defined")
@@ -208,7 +208,7 @@ function splitext(path::String)
     a, b = splitdrive(path)
     m = match(path_ext_splitter, b)
     m === nothing && return (path,"")
-    a*m.captures[1], String(m.captures[2])
+    (a*something(m.captures[1])), String(something(m.captures[2]))
 end
 
 # NOTE: deprecated in 1.4
@@ -356,20 +356,24 @@ joinpath
 """
     normpath(path::AbstractString) -> String
 
-Normalize a path, removing "." and ".." entries.
+Normalize a path, removing "." and ".." entries and changing "/" to the canonical path separator
+for the system.
 
 # Examples
 ```jldoctest
 julia> normpath("/home/myuser/../example.jl")
 "/home/example.jl"
+
+julia> normpath("Documents/Julia") == joinpath("Documents", "Julia")
+true
 ```
 """
 function normpath(path::String)
     isabs = isabspath(path)
     isdir = isdirpath(path)
     drive, path = splitdrive(path)
-    parts = split(path, path_separator_re)
-    filter!(x->!isempty(x) && x!=".", parts)
+    parts = split(path, path_separator_re; keepempty=false)
+    filter!(!=("."), parts)
     while true
         clean = true
         for j = 1:length(parts)-1
diff --git a/base/pointer.jl b/base/pointer.jl
index b315e589ffd9a..b9475724f7637 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -142,7 +142,7 @@ stable memory addresses.
 See also [`unsafe_pointer_to_objref`](@ref).
 """
 function pointer_from_objref(@nospecialize(x))
-    @_inline_meta
+    @inline
     ismutable(x) || error("pointer_from_objref cannot be used on immutable objects")
     ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), x)
 end
diff --git a/base/process.jl b/base/process.jl
index b3ec79fa1ab4e..10c173e82b34c 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -74,27 +74,29 @@ const SpawnIOs = Vector{Any} # convenience name for readability
 # handle marshalling of `Cmd` arguments from Julia to C
 @noinline function _spawn_primitive(file, cmd::Cmd, stdio::SpawnIOs)
     loop = eventloop()
-    iohandles = Tuple{Cint, UInt}[ # assuming little-endian layout
-        let h = rawhandle(io)
-            h === C_NULL     ? (0x00, UInt(0)) :
-            h isa OS_HANDLE  ? (0x02, UInt(cconvert(@static(Sys.iswindows() ? Ptr{Cvoid} : Cint), h))) :
-            h isa Ptr{Cvoid} ? (0x04, UInt(h)) :
-            error("invalid spawn handle $h from $io")
-        end
-        for io in stdio]
-    handle = Libc.malloc(_sizeof_uv_process)
-    disassociate_julia_struct(handle) # ensure that data field is set to C_NULL
-    (; exec, flags, env, dir) = cmd
-    err = ccall(:jl_spawn, Int32,
-              (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
-               Ptr{Tuple{Cint, UInt}}, Int,
-               UInt32, Ptr{Cstring}, Cstring, Ptr{Cvoid}),
-        file, exec, loop, handle,
-        iohandles, length(iohandles),
-        flags,
-        env === nothing ? C_NULL : env,
-        isempty(dir) ? C_NULL : dir,
-        @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
+    GC.@preserve stdio begin
+        iohandles = Tuple{Cint, UInt}[ # assuming little-endian layout
+            let h = rawhandle(io)
+                h === C_NULL     ? (0x00, UInt(0)) :
+                h isa OS_HANDLE  ? (0x02, UInt(cconvert(@static(Sys.iswindows() ? Ptr{Cvoid} : Cint), h))) :
+                h isa Ptr{Cvoid} ? (0x04, UInt(h)) :
+                error("invalid spawn handle $h from $io")
+            end
+            for io in stdio]
+        handle = Libc.malloc(_sizeof_uv_process)
+        disassociate_julia_struct(handle) # ensure that data field is set to C_NULL
+        (; exec, flags, env, dir) = cmd
+        err = ccall(:jl_spawn, Int32,
+                  (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
+                   Ptr{Tuple{Cint, UInt}}, Int,
+                   UInt32, Ptr{Cstring}, Cstring, Ptr{Cvoid}),
+            file, exec, loop, handle,
+            iohandles, length(iohandles),
+            flags,
+            env === nothing ? C_NULL : env,
+            isempty(dir) ? C_NULL : dir,
+            @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
+    end
     if err != 0
         ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), handle) # will call free on handle eventually
         throw(_UVError("could not spawn " * repr(cmd), err))
@@ -210,10 +212,10 @@ function setup_stdio(stdio::PipeEndpoint, child_readable::Bool)
         rd, wr = link_pipe(!child_readable, child_readable)
         try
             open_pipe!(stdio, child_readable ? wr : rd)
-        catch ex
+        catch
             close_pipe_sync(rd)
             close_pipe_sync(wr)
-            rethrow(ex)
+            rethrow()
         end
         child = child_readable ? rd : wr
         return (child, true)
@@ -252,18 +254,19 @@ function setup_stdio(stdio::FileRedirect, child_readable::Bool)
     return (io, true)
 end
 
-# incrementally move data between an IOBuffer and a system Pipe
+# incrementally move data between an arbitrary IO and a system Pipe,
+# including copying the EOF (shutdown) when finished
 # TODO: probably more efficient (when valid) to use `stdio` directly as the
 #       PipeEndpoint buffer field in some cases
-function setup_stdio(stdio::Union{IOBuffer, BufferStream}, child_readable::Bool)
+function setup_stdio(stdio::IO, child_readable::Bool)
     parent = PipeEndpoint()
     rd, wr = link_pipe(!child_readable, child_readable)
     try
         open_pipe!(parent, child_readable ? wr : rd)
-    catch ex
+    catch
         close_pipe_sync(rd)
         close_pipe_sync(wr)
-        rethrow(ex)
+        rethrow()
     end
     child = child_readable ? rd : wr
     try
@@ -272,24 +275,19 @@ function setup_stdio(stdio::Union{IOBuffer, BufferStream}, child_readable::Bool)
             @async try
                 write(in, out)
             catch ex
-                @warn "Process error" exception=(ex, catch_backtrace())
+                @warn "Process I/O error" exception=(ex, catch_backtrace())
             finally
                 close(parent)
+                child_readable || closewrite(stdio)
             end
         end
-    catch ex
+    catch
         close_pipe_sync(child)
-        rethrow(ex)
+        rethrow()
     end
     return (child, true)
 end
 
-function setup_stdio(io, child_readable::Bool)
-    # if there is no specialization,
-    # assume that rawhandle is defined for it
-    return (io, false)
-end
-
 close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
 close_stdio(stdio) = close(stdio)
 
diff --git a/base/promotion.jl b/base/promotion.jl
index 6d216876ce047..845e16ca499d3 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -161,6 +161,50 @@ function promote_typejoin(@nospecialize(a), @nospecialize(b))
 end
 _promote_typesubtract(@nospecialize(a)) = typesplit(a, Union{Nothing, Missing})
 
+function promote_typejoin_union(::Type{T}) where T
+    if T === Union{}
+        return Union{}
+    elseif T isa UnionAll
+        return Any # TODO: compute more precise bounds
+    elseif T isa Union
+        return promote_typejoin(promote_typejoin_union(T.a), promote_typejoin_union(T.b))
+    elseif T isa DataType
+        T <: Tuple && return typejoin_union_tuple(T)
+        return T
+    else
+        error("unreachable") # not a type??
+    end
+end
+
+function typejoin_union_tuple(T::DataType)
+    @_pure_meta
+    u = Base.unwrap_unionall(T)
+    p = (u::DataType).parameters
+    lr = length(p)::Int
+    if lr == 0
+        return Tuple{}
+    end
+    c = Vector{Any}(undef, lr)
+    for i = 1:lr
+        pi = p[i]
+        U = Core.Compiler.unwrapva(pi)
+        if U === Union{}
+            ci = Union{}
+        elseif U isa Union
+            ci = typejoin(U.a, U.b)
+        elseif U isa UnionAll
+            return Any # TODO: compute more precise bounds
+        else
+            ci = promote_typejoin_union(U)
+        end
+        if i == lr && Core.Compiler.isvarargtype(pi)
+            c[i] = isdefined(pi, :N) ? Vararg{ci, pi.N} : Vararg{ci}
+        else
+            c[i] = ci
+        end
+    end
+    return Base.rewrap_unionall(Tuple{c...}, T)
+end
 
 # Returns length, isfixed
 function full_va_len(p)
@@ -233,7 +277,7 @@ function promote_type end
 
 promote_type()  = Bottom
 promote_type(T) = T
-promote_type(T, S, U, V...) = (@_inline_meta; promote_type(T, promote_type(S, U, V...)))
+promote_type(T, S, U, V...) = (@inline; promote_type(T, promote_type(S, U, V...)))
 
 promote_type(::Type{Bottom}, ::Type{Bottom}) = Bottom
 promote_type(::Type{T}, ::Type{T}) where {T} = T
@@ -241,7 +285,7 @@ promote_type(::Type{T}, ::Type{Bottom}) where {T} = T
 promote_type(::Type{Bottom}, ::Type{T}) where {T} = T
 
 function promote_type(::Type{T}, ::Type{S}) where {T,S}
-    @_inline_meta
+    @inline
     # Try promote_rule in both orders. Typically only one is defined,
     # and there is a fallback returning Bottom below, so the common case is
     #   promote_type(T, S) =>
@@ -261,10 +305,10 @@ function promote_rule end
 
 promote_rule(::Type{<:Any}, ::Type{<:Any}) = Bottom
 
-promote_result(::Type{<:Any},::Type{<:Any},::Type{T},::Type{S}) where {T,S} = (@_inline_meta; promote_type(T,S))
+promote_result(::Type{<:Any},::Type{<:Any},::Type{T},::Type{S}) where {T,S} = (@inline; promote_type(T,S))
 # If no promote_rule is defined, both directions give Bottom. In that
 # case use typejoin on the original types instead.
-promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} = (@_inline_meta; typejoin(T, S))
+promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} = (@inline; typejoin(T, S))
 
 """
     promote(xs...)
@@ -283,19 +327,19 @@ julia> promote(Int8(1), Float16(4.5), Float32(4.1))
 function promote end
 
 function _promote(x::T, y::S) where {T,S}
-    @_inline_meta
+    @inline
     R = promote_type(T, S)
     return (convert(R, x), convert(R, y))
 end
 promote_typeof(x) = typeof(x)
-promote_typeof(x, xs...) = (@_inline_meta; promote_type(typeof(x), promote_typeof(xs...)))
+promote_typeof(x, xs...) = (@inline; promote_type(typeof(x), promote_typeof(xs...)))
 function _promote(x, y, z)
-    @_inline_meta
+    @inline
     R = promote_typeof(x, y, z)
     return (convert(R, x), convert(R, y), convert(R, z))
 end
 function _promote(x, y, zs...)
-    @_inline_meta
+    @inline
     R = promote_typeof(x, y, zs...)
     return (convert(R, x), convert(R, y), convert(Tuple{Vararg{R}}, zs)...)
 end
@@ -307,13 +351,13 @@ promote() = ()
 promote(x) = (x,)
 
 function promote(x, y)
-    @_inline_meta
+    @inline
     px, py = _promote(x, y)
     not_sametype((x,y), (px,py))
     px, py
 end
 function promote(x, y, z)
-    @_inline_meta
+    @inline
     px, py, pz = _promote(x, y, z)
     not_sametype((x,y,z), (px,py,pz))
     px, py, pz
@@ -331,7 +375,7 @@ not_sametype(x::T, y::T) where {T} = sametype_error(x)
 not_sametype(x, y) = nothing
 
 function sametype_error(input)
-    @_noinline_meta
+    @noinline
     error("promotion of types ",
           join(map(x->string(typeof(x)), input), ", ", " and "),
           " failed to change any arguments")
diff --git a/base/range.jl b/base/range.jl
index 355ee2d80bce2..46261755098cb 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -24,9 +24,9 @@
 _colon(::Ordered, ::Any, start::T, step, stop::T) where {T} = StepRange(start, step, stop)
 # for T<:Union{Float16,Float32,Float64} see twiceprecision.jl
 _colon(::Ordered, ::ArithmeticRounds, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Int, (stop-start)/step)+1)
+    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
 _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Int, (stop-start)/step)+1)
+    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
 
 """
     (:)(start, [step], stop)
@@ -58,6 +58,9 @@ Valid invocations of range are:
 * Call `range` with any three of `start`, `step`, `stop`, `length`.
 * Call `range` with two of `start`, `stop`, `length`. In this case `step` will be assumed
   to be one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
+* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be one.
+
+See Extended Help for additional details on the returned type.
 
 # Examples
 ```jldoctest
@@ -87,6 +90,15 @@ julia> range(stop=10, step=1, length=5)
 
 julia> range(start=1, step=1, stop=10)
 1:1:10
+
+julia> range(; length = 10)
+Base.OneTo(10)
+
+julia> range(; stop = 6)
+Base.OneTo(6)
+
+julia> range(; stop = 6.5)
+1.0:1.0:6.0
 ```
 If `length` is not specified and `stop - start` is not an integer multiple of `step`, a range that ends before `stop` will be produced.
 ```jldoctest
@@ -103,6 +115,23 @@ To avoid this induced overhead, see the [`LinRange`](@ref) constructor.
 !!! compat "Julia 1.7"
     The versions without keyword arguments and `start` as a keyword argument
     require at least Julia 1.7.
+
+!!! compat "Julia 1.8"
+    The versions with `stop` as a sole keyword argument,
+    or `length` as a sole keyword argument require at least Julia 1.8.
+
+
+# Extended Help
+
+`range` will produce a `Base.OneTo` when the arguments are Integers and
+* Only `length` is provided
+* Only `stop` is provided
+
+`range` will produce a `UnitRange` when the arguments are Integers and
+* Only `start`  and `stop` are provided
+* Only `length` and `stop` are provided
+
+A `UnitRange` is not produced if `step` is provided even if specified as one.
 """
 function range end
 
@@ -115,8 +144,8 @@ range(;start=nothing, stop=nothing, length::Union{Integer, Nothing}=nothing, ste
     _range(start, step, stop, length)
 
 _range(start::Nothing, step::Nothing, stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
-_range(start::Nothing, step::Nothing, stop::Nothing, len::Any    ) = range_error(start, step, stop, len)
-_range(start::Nothing, step::Nothing, stop::Any    , len::Nothing) = range_error(start, step, stop, len)
+_range(start::Nothing, step::Nothing, stop::Nothing, len::Any    ) = range_length(len)
+_range(start::Nothing, step::Nothing, stop::Any    , len::Nothing) = range_stop(stop)
 _range(start::Nothing, step::Nothing, stop::Any    , len::Any    ) = range_stop_length(stop, len)
 _range(start::Nothing, step::Any    , stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
 _range(start::Nothing, step::Any    , stop::Nothing, len::Any    ) = range_error(start, step, stop, len)
@@ -131,6 +160,14 @@ _range(start::Any    , step::Any    , stop::Nothing, len::Any    ) = range_start
 _range(start::Any    , step::Any    , stop::Any    , len::Nothing) = range_start_step_stop(start, step, stop)
 _range(start::Any    , step::Any    , stop::Any    , len::Any    ) = range_error(start, step, stop, len)
 
+# Length as the only argument
+range_length(len::Integer) = OneTo(len)
+
+# Stop as the only argument
+range_stop(stop) = range_start_stop(oneunit(stop), stop)
+range_stop(stop::Integer) = range_length(stop)
+
+# Stop and length as the only argument
 range_stop_length(a::Real,          len::Integer) = UnitRange{typeof(a)}(oftype(a, a-len+1), a)
 range_stop_length(a::AbstractFloat, len::Integer) = range_step_stop_length(oftype(a, 1), a, len)
 range_stop_length(a,                len::Integer) = range_step_stop_length(oftype(a-a, 1), a, len)
@@ -323,8 +360,8 @@ function steprange_last_empty(start::Integer, step, stop)
     end
     last
 end
-# For types where x+oneunit(x) may not be well-defined
-steprange_last_empty(start, step, stop) = start - step
+# For types where x+oneunit(x) may not be well-defined use the user-given value for stop
+steprange_last_empty(start, step, stop) = stop
 
 StepRange{T}(start, step::S, stop) where {T,S} = StepRange{T,S}(start, step, stop)
 StepRange(start::T, step::S, stop::T) where {T,S} = StepRange{T,S}(start, step, stop)
@@ -357,17 +394,17 @@ UnitRange(start::T, stop::T) where {T<:Real} = UnitRange{T}(start, stop)
 
 unitrange_last(::Bool, stop::Bool) = stop
 unitrange_last(start::T, stop::T) where {T<:Integer} =
-    ifelse(stop >= start, stop, convert(T,start-oneunit(stop-start)))
+    stop >= start ? stop : convert(T,start-oneunit(start-stop))
 unitrange_last(start::T, stop::T) where {T} =
-    ifelse(stop >= start, convert(T,start+floor(stop-start)),
-                          convert(T,start-oneunit(stop-start)))
+    stop >= start ? convert(T,start+floor(stop-start)) :
+                    convert(T,start-oneunit(stop-start))
 
 unitrange(x) = UnitRange(x)
 
 if isdefined(Main, :Base)
     # Constant-fold-able indexing into tuples to functionally expose Base.tail and Base.front
     function getindex(@nospecialize(t::Tuple), r::AbstractUnitRange)
-        @_inline_meta
+        @inline
         require_one_based_indexing(r)
         if length(r) <= 10
             return ntuple(i -> t[i + first(r) - 1], length(r))
@@ -393,15 +430,15 @@ be 1.
 struct OneTo{T<:Integer} <: AbstractUnitRange{T}
     stop::T
     function OneTo{T}(stop) where {T<:Integer}
-        throwbool(r)  = (@_noinline_meta; throw(ArgumentError("invalid index: $r of type Bool")))
+        throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         T === Bool && throwbool(stop)
         return new(max(zero(T), stop))
     end
 
     function OneTo{T}(r::AbstractRange) where {T<:Integer}
-        throwstart(r) = (@_noinline_meta; throw(ArgumentError("first element must be 1, got $(first(r))")))
-        throwstep(r)  = (@_noinline_meta; throw(ArgumentError("step must be 1, got $(step(r))")))
-        throwbool(r)  = (@_noinline_meta; throw(ArgumentError("invalid index: $r of type Bool")))
+        throwstart(r) = (@noinline; throw(ArgumentError("first element must be 1, got $(first(r))")))
+        throwstep(r)  = (@noinline; throw(ArgumentError("step must be 1, got $(step(r))")))
+        throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         first(r) == 1 || throwstart(r)
         step(r)  == 1 || throwstep(r)
         T === Bool && throwbool(r)
@@ -415,10 +452,11 @@ oneto(r) = OneTo(r)
 ## Step ranges parameterized by length
 
 """
-    StepRangeLen{T,R,S}(ref::R, step::S, len, [offset=1]) where {T,R,S}
-    StepRangeLen(       ref::R, step::S, len, [offset=1]) where {  R,S}
+    StepRangeLen(         ref::R, step::S, len, [offset=1]) where {  R,S}
+    StepRangeLen{T,R,S}(  ref::R, step::S, len, [offset=1]) where {T,R,S}
+    StepRangeLen{T,R,S,L}(ref::R, step::S, len, [offset=1]) where {T,R,S,L}
 
-A range `r` where `r[i]` produces values of type `T` (in the second
+A range `r` where `r[i]` produces values of type `T` (in the first
 form, `T` is deduced automatically), parameterized by a `ref`erence
 value, a `step`, and the `len`gth. By default `ref` is the starting
 value `r[1]`, but alternatively you can supply it as the value of
@@ -426,40 +464,45 @@ value `r[1]`, but alternatively you can supply it as the value of
 with `TwicePrecision` this can be used to implement ranges that are
 free of roundoff error.
 """
-struct StepRangeLen{T,R,S} <: AbstractRange{T}
+struct StepRangeLen{T,R,S,L<:Integer} <: AbstractRange{T}
     ref::R       # reference value (might be smallest-magnitude value in the range)
     step::S      # step value
-    len::Int     # length of the range
-    offset::Int  # the index of ref
+    len::L       # length of the range
+    offset::L    # the index of ref
 
-    function StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S}
+    function StepRangeLen{T,R,S,L}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S,L}
         if T <: Integer && !isinteger(ref + step)
             throw(ArgumentError("StepRangeLen{<:Integer} cannot have non-integer step"))
         end
-        len >= 0 || throw(ArgumentError("length cannot be negative, got $len"))
-        1 <= offset <= max(1,len) || throw(ArgumentError("StepRangeLen: offset must be in [1,$len], got $offset"))
-        new(ref, step, len, offset)
+        len = convert(L, len)
+        len >= zero(len) || throw(ArgumentError("length cannot be negative, got $len"))
+        offset = convert(L, offset)
+        L1 = oneunit(typeof(len))
+        L1 <= offset <= max(L1, len) || throw(ArgumentError("StepRangeLen: offset must be in [1,$len], got $offset"))
+        return new(ref, step, len, offset)
     end
 end
 
+StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} =
+    StepRangeLen{T,R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 StepRangeLen(ref::R, step::S, len::Integer, offset::Integer = 1) where {R,S} =
-    StepRangeLen{typeof(ref+zero(step)),R,S}(ref, step, len, offset)
+    StepRangeLen{typeof(ref+zero(step)),R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 StepRangeLen{T}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} =
-    StepRangeLen{T,R,S}(ref, step, len, offset)
+    StepRangeLen{T,R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 
 ## range with computed step
 
 """
-    LinRange{T}
+    LinRange{T,L}
 
 A range with `len` linearly spaced elements between its `start` and `stop`.
 The size of the spacing is controlled by `len`, which must
-be an `Int`.
+be an `Integer`.
 
 # Examples
 ```jldoctest
 julia> LinRange(1.5, 5.5, 9)
-9-element LinRange{Float64}:
+9-element LinRange{Float64, Int64}:
  1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5
 ```
 
@@ -483,26 +526,35 @@ julia> collect(LinRange(-0.1, 0.3, 5))
   0.3
 ```
 """
-struct LinRange{T} <: AbstractRange{T}
+struct LinRange{T,L<:Integer} <: AbstractRange{T}
     start::T
     stop::T
-    len::Int
-    lendiv::Int
+    len::L
+    lendiv::L
 
-    function LinRange{T}(start,stop,len) where T
+    function LinRange{T,L}(start::T, stop::T, len::L) where {T,L<:Integer}
         len >= 0 || throw(ArgumentError("range($start, stop=$stop, length=$len): negative length"))
-        if len == 1
+        onelen = oneunit(typeof(len))
+        if len == onelen
             start == stop || throw(ArgumentError("range($start, stop=$stop, length=$len): endpoints differ"))
-            return new(start, stop, 1, 1)
+            return new(start, stop, len, len)
         end
-        lendiv = max(len-1, 1)
+        lendiv = max(len - onelen, onelen)
         if T <: Integer && !iszero(mod(stop-start, lendiv))
             throw(ArgumentError("LinRange{<:Integer} cannot have non-integer step"))
         end
-        new(start,stop,len,lendiv)
+        return new(start, stop, len, lendiv)
     end
 end
 
+function LinRange{T,L}(start, stop, len::Integer) where {T,L}
+    LinRange{T,L}(convert(T, start), convert(T, stop), convert(L, len))
+end
+
+function LinRange{T}(start, stop, len::Integer) where T
+    LinRange{T,promote_type(Int,typeof(len))}(start, stop, len)
+end
+
 function LinRange(start, stop, len::Integer)
     T = typeof((stop-start)/len)
     LinRange{T}(start, stop, len)
@@ -510,7 +562,7 @@ end
 
 range_start_stop_length(start, stop, len::Integer) =
     range_start_stop_length(promote(start, stop)..., len)
-range_start_stop_length(start::T, stop::T, len::Integer) where {T} = LinRange{T}(start, stop, len)
+range_start_stop_length(start::T, stop::T, len::Integer) where {T} = LinRange(start, stop, len)
 range_start_stop_length(start::T, stop::T, len::Integer) where {T<:Integer} =
     _linspace(float(T), start, stop, len)
 ## for Float16, Float32, and Float64 we hit twiceprecision.jl to lift to higher precision StepRangeLen
@@ -563,22 +615,23 @@ function print_range(io::IO, r::AbstractRange,
     maxpossiblecols = div(screenwidth, 1+sepsize) # assume each element is at least 1 char + 1 separator
     colsr = n <= maxpossiblecols ? (1:n) : [1:div(maxpossiblecols,2)+1; (n-div(maxpossiblecols,2)):n]
     rowmatrix = reshape(r[colsr], 1, length(colsr)) # treat the range as a one-row matrix for print_matrix_row
-    A = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), screenwidth, screenwidth, sepsize) # how much space range takes
+    nrow, idxlast = size(rowmatrix, 2), last(axes(rowmatrix, 2))
+    A = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), screenwidth, screenwidth, sepsize, nrow) # how much space range takes
     if n <= length(A) # cols fit screen, so print out all elements
         print(io, pre) # put in pre chars
-        print_matrix_row(io,rowmatrix,A,1,1:n,sep) # the entire range
+        print_matrix_row(io,rowmatrix,A,1,1:n,sep,idxlast) # the entire range
         print(io, post) # add the post characters
     else # cols don't fit so put horiz ellipsis in the middle
         # how many chars left after dividing width of screen in half
         # and accounting for the horiz ellipsis
         c = div(screenwidth-length(hdots)+1,2)+1 # chars remaining for each side of rowmatrix
-        alignR = reverse(alignment(io, rowmatrix, 1:m, length(rowmatrix):-1:1, c, c, sepsize)) # which cols of rowmatrix to put on the right
+        alignR = reverse(alignment(io, rowmatrix, 1:m, length(rowmatrix):-1:1, c, c, sepsize, nrow)) # which cols of rowmatrix to put on the right
         c = screenwidth - sum(map(sum,alignR)) - (length(alignR)-1)*sepsize - length(hdots)
-        alignL = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), c, c, sepsize) # which cols of rowmatrix to put on the left
+        alignL = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), c, c, sepsize, nrow) # which cols of rowmatrix to put on the left
         print(io, pre)   # put in pre chars
-        print_matrix_row(io, rowmatrix,alignL,1,1:length(alignL),sep) # left part of range
+        print_matrix_row(io, rowmatrix,alignL,1,1:length(alignL),sep,idxlast) # left part of range
         print(io, hdots) # horizontal ellipsis
-        print_matrix_row(io, rowmatrix,alignR,1,length(rowmatrix)-length(alignR)+1:length(rowmatrix),sep) # right part of range
+        print_matrix_row(io, rowmatrix,alignR,1,length(rowmatrix)-length(alignR)+1:length(rowmatrix),sep,idxlast) # right part of range
         print(io, post)  # post chars
     end
 end
@@ -621,6 +674,7 @@ step(r::StepRangeLen) = r.step
 step(r::StepRangeLen{T}) where {T<:AbstractFloat} = T(r.step)
 step(r::LinRange) = (last(r)-first(r))/r.lendiv
 
+# high-precision step
 step_hp(r::StepRangeLen) = r.step
 step_hp(r::AbstractRange) = step(r)
 
@@ -638,7 +692,7 @@ function checked_length(r::OrdinalRange{T}) where T
     # s != 0, by construction, but avoids the division error later
     start = first(r)
     if s == zero(s) || isempty(r)
-        return Integer(start - start + zero(s))
+        return Integer(div(start - start, oneunit(s)))
     end
     stop = last(r)
     if isless(s, zero(s))
@@ -647,8 +701,8 @@ function checked_length(r::OrdinalRange{T}) where T
     else
         diff = checked_sub(stop, start)
     end
-    a = Integer(div(diff, s))
-    return checked_add(a, one(a))
+    a = div(diff, s)
+    return Integer(checked_add(a, oneunit(a)))
 end
 
 function checked_length(r::AbstractUnitRange{T}) where T
@@ -656,8 +710,8 @@ function checked_length(r::AbstractUnitRange{T}) where T
     if isempty(r)
         return Integer(first(r) - first(r))
     end
-    a = Integer(checked_add(checked_sub(last(r), first(r))))
-    return checked_add(a, one(a))
+    a = checked_sub(last(r), first(r))
+    return Integer(checked_add(a, oneunit(a)))
 end
 
 function length(r::OrdinalRange{T}) where T
@@ -665,7 +719,7 @@ function length(r::OrdinalRange{T}) where T
     # s != 0, by construction, but avoids the division error later
     start = first(r)
     if s == zero(s) || isempty(r)
-        return Integer(start - start + zero(s))
+        return Integer(div(start - start, oneunit(s)))
     end
     stop = last(r)
     if isless(s, zero(s))
@@ -674,15 +728,20 @@ function length(r::OrdinalRange{T}) where T
     else
         diff = stop - start
     end
-    a = Integer(div(diff, s))
-    return a + one(a)
+    a = div(diff, s)
+    return Integer(a + oneunit(a))
 end
 
-
 function length(r::AbstractUnitRange{T}) where T
-    @_inline_meta
-    a = Integer(last(r) - first(r)) # even when isempty, by construction (with overflow)
-    return a + one(a)
+    @inline
+    start, stop = first(r), last(r)
+    a = oneunit(zero(stop) - zero(start))
+    if a isa Signed || stop >= start
+        a += stop - start # Signed are allowed to go negative
+    else
+        a = zero(a) # Unsigned don't necessarily underflow
+    end
+    return Integer(a)
 end
 
 length(r::OneTo) = Integer(r.stop - zero(r.stop))
@@ -710,7 +769,7 @@ let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}
         else
             a = div(unsigned(diff), s) % typeof(diff)
         end
-        return Integer(a) + one(a)
+        return Integer(a) + oneunit(a)
     end
     function checked_length(r::OrdinalRange{T}) where T<:bigints
         s = step(r)
@@ -729,7 +788,7 @@ let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}
         else
             a = div(checked_sub(start, stop), -s)
         end
-        return checked_add(a, one(a))
+        return checked_add(a, oneunit(a))
     end
 end
 
@@ -739,7 +798,12 @@ let smallints = (Int === Int64 ?
                 Union{Int8, UInt8, Int16, UInt16})
     global length, checked_length
     # n.b. !(step isa T)
-    length(r::OrdinalRange{<:smallints}) = div(Int(last(r)) - Int(first(r)), step(r)) + 1
+    function length(r::OrdinalRange{<:smallints})
+        s = step(r)
+        s == zero(s) && return 0 # unreachable, by construction, but avoids the error case here later
+        isempty(r) && return 0
+        return div(Int(last(r)) - Int(first(r)), s) + 1
+    end
     length(r::AbstractUnitRange{<:smallints}) = Int(last(r)) - Int(first(r)) + 1
     length(r::OneTo{<:smallints}) = Int(r.stop)
     checked_length(r::OrdinalRange{<:smallints}) = length(r)
@@ -752,7 +816,7 @@ first(r::OneTo{T}) where {T} = oneunit(T)
 first(r::StepRangeLen) = unsafe_getindex(r, 1)
 first(r::LinRange) = r.start
 
-last(r::OrdinalRange{T}) where {T} = convert(T, r.stop)
+last(r::OrdinalRange{T}) where {T} = convert(T, r.stop) # via steprange_last
 last(r::StepRangeLen) = unsafe_getindex(r, length(r))
 last(r::LinRange) = r.stop
 
@@ -803,16 +867,17 @@ copy(r::AbstractRange) = r
 
 ## iteration
 
-function iterate(r::Union{LinRange,StepRangeLen}, i::Int=1)
-    @_inline_meta
+function iterate(r::Union{StepRangeLen,LinRange}, i::Integer=zero(length(r)))
+    @inline
+    i += oneunit(i)
     length(r) < i && return nothing
-    unsafe_getindex(r, i), i + 1
+    unsafe_getindex(r, i), i
 end
 
 iterate(r::OrdinalRange) = isempty(r) ? nothing : (first(r), first(r))
 
 function iterate(r::OrdinalRange{T}, i) where {T}
-    @_inline_meta
+    @inline
     i == last(r) && return nothing
     next = convert(T, i + step(r))
     (next, next)
@@ -823,7 +888,7 @@ end
 _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start
 
 function getindex(v::UnitRange{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     val = convert(T, v.start + (i - 1))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
@@ -834,7 +899,7 @@ const OverflowSafe = Union{Bool,Int8,Int16,Int32,Int64,Int128,
                            UInt8,UInt16,UInt32,UInt64,UInt128}
 
 function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     val = v.start + (i - 1)
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
@@ -842,14 +907,14 @@ function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
 end
 
 function getindex(v::OneTo{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     @boundscheck ((i > 0) & (i <= v.stop)) || throw_boundserror(v, i)
     convert(T, i)
 end
 
 function getindex(v::AbstractRange{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     ret = convert(T, first(v) + (i - 1)*step_hp(v))
     ok = ifelse(step(v) > zero(step(v)),
@@ -860,7 +925,7 @@ function getindex(v::AbstractRange{T}, i::Integer) where T
 end
 
 function getindex(r::Union{StepRangeLen,LinRange}, i::Integer)
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     @boundscheck checkbounds(r, i)
     unsafe_getindex(r, i)
@@ -885,7 +950,7 @@ function unsafe_getindex(r::LinRange, i::Integer)
 end
 
 function lerpi(j::Integer, d::Integer, a::T, b::T) where T
-    @_inline_meta
+    @inline
     t = j/d
     T((1-t)*a + t*b)
 end
@@ -893,38 +958,38 @@ end
 getindex(r::AbstractRange, ::Colon) = copy(r)
 
 function getindex(r::AbstractUnitRange, s::AbstractUnitRange{T}) where {T<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        range(first(s) ? first(r) : last(r), length = Int(last(s)))
+        range(first(s) ? first(r) : last(r), length = Integer(last(s)))
     else
         f = first(r)
-        st = oftype(f, f + first(s)-1)
+        st = oftype(f, f + first(s)-firstindex(r))
         return range(st, length=length(s))
     end
 end
 
 function getindex(r::OneTo{T}, s::OneTo) where T
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
     OneTo(T(s.stop))
 end
 
 function getindex(r::AbstractUnitRange, s::StepRange{T}) where {T<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length = Int(last(s)))
+        range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length = Integer(last(s)))
     else
-        st = oftype(first(r), first(r) + s.start-1)
+        st = oftype(first(r), first(r) + s.start-firstindex(r))
         return range(st, step=step(s), length=length(s))
     end
 end
 
 function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
     if T === Bool
@@ -946,50 +1011,57 @@ function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
 end
 
 function getindex(r::StepRangeLen{T}, s::OrdinalRange{S}) where {T, S<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
+    len = length(s)
+    sstep = step_hp(s)
+    rstep = step_hp(r)
+    L = typeof(len)
     if S === Bool
-        if length(s) == 0
-            return StepRangeLen{T}(first(r), step(r), 0, 1)
-        elseif length(s) == 1
+        rstep *= one(sstep)
+        if len == 0
+            return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+        elseif len == 1
             if first(s)
-                return StepRangeLen{T}(first(r), step(r), 1, 1)
+                return StepRangeLen{T}(first(r), rstep, oneunit(L), oneunit(L))
             else
-                return StepRangeLen{T}(first(r), step(r), 0, 1)
+                return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
             end
-        else # length(s) == 2
-            return StepRangeLen{T}(last(r), step(r), 1, 1)
+        else # len == 2
+            return StepRangeLen{T}(last(r), rstep, oneunit(L), oneunit(L))
         end
     else
         # Find closest approach to offset by s
         ind = LinearIndices(s)
-        offset = max(min(1 + round(Int, (r.offset - first(s))/step(s)), last(ind)), first(ind))
-        ref = _getindex_hiprec(r, first(s) + (offset-1)*step(s))
-        return StepRangeLen{T}(ref, r.step*step(s), length(s), offset)
+        offset = L(max(min(1 + round(L, (r.offset - first(s))/sstep), last(ind)), first(ind)))
+        ref = _getindex_hiprec(r, first(s) + (offset-1)*sstep)
+        return StepRangeLen{T}(ref, rstep*sstep, len, offset)
     end
 end
 
 function getindex(r::LinRange{T}, s::OrdinalRange{S}) where {T, S<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
+    len = length(s)
+    L = typeof(len)
     if S === Bool
-        if length(s) == 0
-            return LinRange(first(r), first(r), 0)
-        elseif length(s) == 1
+        if len == 0
+            return LinRange{T}(first(r), first(r), len)
+        elseif len == 1
             if first(s)
-                return LinRange(first(r), first(r), 1)
+                return LinRange{T}(first(r), first(r), len)
             else
-                return LinRange(first(r), first(r), 0)
+                return LinRange{T}(first(r), first(r), zero(L))
             end
         else # length(s) == 2
-            return LinRange(last(r), last(r), 1)
+            return LinRange{T}(last(r), last(r), oneunit(L))
         end
     else
         vfirst = unsafe_getindex(r, first(s))
         vlast  = unsafe_getindex(r, last(s))
-        return LinRange{T}(vfirst, vlast, length(s))
+        return LinRange{T}(vfirst, vlast, len)
     end
 end
 
@@ -1017,6 +1089,11 @@ function ==(r::OrdinalRange, s::OrdinalRange)
     (first(r) == first(s)) & (step(r) == step(s)) & (last(r) == last(s))
 end
 
+==(r::AbstractUnitRange, s::AbstractUnitRange) =
+    (isempty(r) & isempty(s)) | ((first(r) == first(s)) & (last(r) == last(s)))
+
+==(r::OneTo, s::OneTo) = last(r) == last(s)
+
 ==(r::T, s::T) where {T<:Union{StepRangeLen,LinRange}} =
     (isempty(r) & isempty(s)) | ((first(r) == first(s)) & (length(r) == length(s)) & (last(r) == last(s)))
 
@@ -1115,6 +1192,16 @@ function intersect(r::StepRange, s::StepRange)
     step(r) < zero(step(r)) ? StepRange{T,S}(n, -a, m) : StepRange{T,S}(m, a, n)
 end
 
+function intersect(r1::AbstractRange, r2::AbstractRange)
+    # To iterate over the shorter range
+    length(r1) > length(r2) && return intersect(r2, r1)
+
+    r1 = unique(r1)
+    T = promote_eltype(r1, r2)
+
+    return T[x for x in r1 if x ∈ r2]
+end
+
 function intersect(r1::AbstractRange, r2::AbstractRange, r3::AbstractRange, r::AbstractRange...)
     i = intersect(intersect(r1, r2), r3)
     for t in r
@@ -1153,8 +1240,8 @@ issubset(r::AbstractUnitRange{<:Integer}, s::AbstractUnitRange{<:Integer}) =
 ## linear operations on ranges ##
 
 -(r::OrdinalRange) = range(-first(r), step=-step(r), length=length(r))
--(r::StepRangeLen{T,R,S}) where {T,R,S} =
-    StepRangeLen{T,R,S}(-r.ref, -r.step, length(r), r.offset)
+-(r::StepRangeLen{T,R,S,L}) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(-r.ref, -r.step, r.len, r.offset)
 function -(r::LinRange)
     start = -r.start
     LinRange{typeof(start)}(start, -r.stop, length(r))
@@ -1168,12 +1255,12 @@ el_same(::Type{T}, a::Type{<:AbstractArray{S,n}}, b::Type{<:AbstractArray{T,n}})
 el_same(::Type, a, b) = promote_typejoin(a, b)
 
 promote_rule(a::Type{UnitRange{T1}}, b::Type{UnitRange{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
+    el_same(promote_type(T1, T2), a, b)
 UnitRange{T}(r::UnitRange{T}) where {T<:Real} = r
 UnitRange{T}(r::UnitRange) where {T<:Real} = UnitRange{T}(r.start, r.stop)
 
 promote_rule(a::Type{OneTo{T1}}, b::Type{OneTo{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
+    el_same(promote_type(T1, T2), a, b)
 OneTo{T}(r::OneTo{T}) where {T<:Integer} = r
 OneTo{T}(r::OneTo) where {T<:Integer} = OneTo{T}(r.stop)
 
@@ -1191,11 +1278,11 @@ OrdinalRange{T1, T2}(r::AbstractUnitRange{T1}) where {T1, T2<:Integer} = r
 OrdinalRange{T1, T2}(r::UnitRange) where {T1, T2<:Integer} = UnitRange{T1}(r)
 OrdinalRange{T1, T2}(r::OneTo) where {T1, T2<:Integer} = OneTo{T1}(r)
 
-promote_rule(::Type{StepRange{T1a,T1b}}, ::Type{StepRange{T2a,T2b}}) where {T1a,T1b,T2a,T2b} =
-    el_same(promote_type(T1a,T2a),
-            # el_same only operates on array element type, so just promote second type parameter
-            StepRange{T1a, promote_type(T1b,T2b)},
-            StepRange{T2a, promote_type(T1b,T2b)})
+function promote_rule(::Type{StepRange{T1a,T1b}}, ::Type{StepRange{T2a,T2b}}) where {T1a,T1b,T2a,T2b}
+    Tb = promote_type(T1b, T2b)
+    # el_same only operates on array element type, so just promote second type parameter
+    el_same(promote_type(T1a, T2a), StepRange{T1a,Tb}, StepRange{T2a,Tb})
+end
 StepRange{T1,T2}(r::StepRange{T1,T2}) where {T1,T2} = r
 
 promote_rule(a::Type{StepRange{T1a,T1b}}, ::Type{UR}) where {T1a,T1b,UR<:AbstractUnitRange} =
@@ -1206,35 +1293,38 @@ StepRange(r::AbstractUnitRange{T}) where {T} =
     StepRange{T,T}(first(r), step(r), last(r))
 (StepRange{T1,T2} where T1)(r::AbstractRange) where {T2} = StepRange{eltype(r),T2}(r)
 
-promote_rule(::Type{StepRangeLen{T1,R1,S1}},::Type{StepRangeLen{T2,R2,S2}}) where {T1,T2,R1,R2,S1,S2} =
-    el_same(promote_type(T1,T2),
-            StepRangeLen{T1,promote_type(R1,R2),promote_type(S1,S2)},
-            StepRangeLen{T2,promote_type(R1,R2),promote_type(S1,S2)})
-StepRangeLen{T,R,S}(r::StepRangeLen{T,R,S}) where {T,R,S} = r
-StepRangeLen{T,R,S}(r::StepRangeLen) where {T,R,S} =
-    StepRangeLen{T,R,S}(convert(R, r.ref), convert(S, r.step), length(r), r.offset)
+function promote_rule(::Type{StepRangeLen{T1,R1,S1,L1}},::Type{StepRangeLen{T2,R2,S2,L2}}) where {T1,T2,R1,R2,S1,S2,L1,L2}
+    R, S, L = promote_type(R1, R2), promote_type(S1, S2), promote_type(L1, L2)
+    el_same(promote_type(T1, T2), StepRangeLen{T1,R,S,L}, StepRangeLen{T2,R,S,L})
+end
+StepRangeLen{T,R,S,L}(r::StepRangeLen{T,R,S,L}) where {T,R,S,L} = r
+StepRangeLen{T,R,S,L}(r::StepRangeLen) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(convert(R, r.ref), convert(S, r.step), convert(L, r.len), convert(L, r.offset))
 StepRangeLen{T}(r::StepRangeLen) where {T} =
-    StepRangeLen(convert(T, r.ref), convert(T, r.step), length(r), r.offset)
+    StepRangeLen(convert(T, r.ref), convert(T, r.step), r.len, r.offset)
 
-promote_rule(a::Type{StepRangeLen{T,R,S}}, ::Type{OR}) where {T,R,S,OR<:AbstractRange} =
-    promote_rule(a, StepRangeLen{eltype(OR), eltype(OR), eltype(OR)})
-StepRangeLen{T,R,S}(r::AbstractRange) where {T,R,S} =
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+promote_rule(a::Type{StepRangeLen{T,R,S,L}}, ::Type{OR}) where {T,R,S,L,OR<:AbstractRange} =
+    promote_rule(a, StepRangeLen{eltype(OR), eltype(OR), eltype(OR), Int})
+StepRangeLen{T,R,S,L}(r::AbstractRange) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), length(r))
 StepRangeLen{T}(r::AbstractRange) where {T} =
     StepRangeLen(T(first(r)), T(step(r)), length(r))
 StepRangeLen(r::AbstractRange) = StepRangeLen{eltype(r)}(r)
 
-promote_rule(a::Type{LinRange{T1}}, b::Type{LinRange{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
-LinRange{T}(r::LinRange{T}) where {T} = r
-LinRange{T}(r::AbstractRange) where {T} = LinRange{T}(first(r), last(r), length(r))
+function promote_rule(a::Type{LinRange{T1,L1}}, b::Type{LinRange{T2,L2}}) where {T1,T2,L1,L2}
+    L = promote_type(L1, L2)
+    el_same(promote_type(T1, T2), LinRange{T1,L}, LinRange{T2,L})
+end
+LinRange{T,L}(r::LinRange{T,L}) where {T,L} = r
+LinRange{T,L}(r::AbstractRange) where {T,L} = LinRange{T,L}(first(r), last(r), length(r))
+LinRange{T}(r::AbstractRange) where {T} = LinRange{T,typeof(length(r))}(first(r), last(r), length(r))
 LinRange(r::AbstractRange{T}) where {T} = LinRange{T}(r)
 
-promote_rule(a::Type{LinRange{T}}, ::Type{OR}) where {T,OR<:OrdinalRange} =
-    promote_rule(a, LinRange{eltype(OR)})
+promote_rule(a::Type{LinRange{T,L}}, ::Type{OR}) where {T,L,OR<:OrdinalRange} =
+    promote_rule(a, LinRange{eltype(OR),L})
 
-promote_rule(::Type{LinRange{L}}, b::Type{StepRangeLen{T,R,S}}) where {L,T,R,S} =
-    promote_rule(StepRangeLen{L,L,L}, b)
+promote_rule(::Type{LinRange{A,L}}, b::Type{StepRangeLen{T2,R2,S2,L2}}) where {A,L,T2,R2,S2,L2} =
+    promote_rule(StepRangeLen{A,A,A,L}, b)
 
 ## concatenation ##
 
@@ -1261,9 +1351,9 @@ function _reverse(r::StepRangeLen, ::Colon)
     # invalid. As `reverse(r)` is also empty, any offset would work so we keep
     # `r.offset`
     offset = isempty(r) ? r.offset : length(r)-r.offset+1
-    StepRangeLen(r.ref, -r.step, length(r), offset)
+    return typeof(r)(r.ref, -r.step, length(r), offset)
 end
-_reverse(r::LinRange{T}, ::Colon) where {T} = LinRange{T}(r.stop, r.start, length(r))
+_reverse(r::LinRange{T}, ::Colon) where {T} = typeof(r)(r.stop, r.start, length(r))
 
 ## sorting ##
 
@@ -1303,11 +1393,13 @@ in(x::T, r::AbstractRange{T}) where {T} = _in_range(x, r)
 in(x::Integer, r::AbstractUnitRange{<:Integer}) = (first(r) <= x) & (x <= last(r))
 
 in(x::Real, r::AbstractRange{T}) where {T<:Integer} =
-    isinteger(x) && !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
-        (mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)
+    isinteger(x) && !isempty(r) &&
+    (iszero(step(r)) ? x == first(r) : (x >= minimum(r) && x <= maximum(r) &&
+        (mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)))
 in(x::AbstractChar, r::AbstractRange{<:AbstractChar}) =
-    !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
-        (mod(Int(x) - Int(first(r)), step(r)) == 0)
+    !isempty(r) &&
+    (iszero(step(r)) ? x == first(r) : (x >= minimum(r) && x <= maximum(r) &&
+        (mod(Int(x) - Int(first(r)), step(r)) == 0)))
 
 # Addition/subtraction of ranges
 
diff --git a/base/rational.jl b/base/rational.jl
index 23c9298962f53..9e887bdaefa91 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -216,6 +216,8 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer
 end
 rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)::Rational{T}
 rationalize(x::AbstractFloat; kvs...) = rationalize(Int, x; kvs...)
+rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re, kvs...)::Rational{T}, rationalize(T, x.im, kvs...)::Rational{T})
+rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re, kvs...), rationalize(Int, x.im, kvs...))
 
 """
     numerator(x)
@@ -533,3 +535,21 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
     h = hash_integer(num, h)
     return h
 end
+
+# These methods are only needed for performance. Since `first(r)` and `last(r)` have the
+# same denominator (because their difference is an integer), `length(r)` can be calulated
+# without calling `gcd`.
+function length(r::AbstractUnitRange{T}) where T<:Rational
+    @inline
+    f = first(r)
+    l = last(r)
+    return div(l.num - f.num + f.den, f.den)
+end
+function checked_length(r::AbstractUnitRange{T}) where T<:Rational
+    f = first(r)
+    l = last(r)
+    if isempty(r)
+        return f.num - f.num
+    end
+    return div(checked_add(checked_sub(l.num, f.num), f.den), f.den)
+end
diff --git a/base/reduce.jl b/base/reduce.jl
index aed7a546e144e..0581a72fb2862 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -168,6 +168,8 @@ Like [`reduce`](@ref), but with guaranteed left associativity. If provided, the
 argument `init` will be used exactly once. In general, it will be necessary to provide
 `init` to work with empty collections.
 
+See also [`mapfoldl`](@ref), [`foldr`](@ref), [`accumulate`](@ref).
+
 # Examples
 ```jldoctest
 julia> foldl(=>, 1:4)
@@ -175,6 +177,9 @@ julia> foldl(=>, 1:4)
 
 julia> foldl(=>, 1:4; init=0)
 (((0 => 1) => 2) => 3) => 4
+
+julia> accumulate(=>, (1,2,3,4))
+(1, 1 => 2, (1 => 2) => 3, ((1 => 2) => 3) => 4)
 ```
 """
 foldl(op, itr; kw...) = mapfoldl(identity, op, itr; kw...)
@@ -299,6 +304,9 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 # handling empty arrays
 _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
+_empty_reduce_error(@nospecialize(f), @nospecialize(T::Type)) = throw(ArgumentError("""
+    reducing with $f over an empty collection of element type $T is not allowed.
+    You may be able to prevent this error by supplying an `init` value to the reducer."""))
 
 """
     Base.reduce_empty(op, T)
@@ -306,23 +314,32 @@ _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection i
 The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref) or [`foldr`](@ref)
 with reduction `op` over an empty array with element type of `T`.
 
-If not defined, this will throw an `ArgumentError`.
+This should only be defined in unambiguous cases; for example,
+
+```julia
+Base.reduce_empty(::typeof(+), ::Type{T}) where T = zero(T)
+```
+
+is justified (the sum of zero elements is zero), whereas
+`reduce_empty(::typeof(max), ::Type{Any})` is not (the maximum value of an empty collection
+is generally ambiguous, and especially so when the element type is unknown).
+
+As an alternative, consider supplying an `init` value to the reducer.
 """
-reduce_empty(op, ::Type{T}) where {T} = _empty_reduce_error()
-reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error(+, Union{})
 reduce_empty(::typeof(+), ::Type{T}) where {T} = zero(T)
 reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
-reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error(*, Union{})
 reduce_empty(::typeof(*), ::Type{T}) where {T} = one(T)
 reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
-reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error(add_sum, Union{})
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T} = reduce_empty(+, T)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
-reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error(mul_prod, Union{})
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T} = reduce_empty(*, T)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
@@ -337,11 +354,8 @@ reduce_empty(op::FlipArgs, ::Type{T}) where {T} = reduce_empty(op.f, T)
 
 The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
 [`mapfoldr`](@ref) with map `f` and reduction `op` over an empty array with element type
-of `T`.
-
-If not defined, this will throw an `ArgumentError`.
+of `T`. See [`Base.reduce_empty`](@ref) for more information.
 """
-mapreduce_empty(f, op, T) = _empty_reduce_error()
 mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
 mapreduce_empty(::typeof(abs), op, T)      = abs(reduce_empty(op, T))
 mapreduce_empty(::typeof(abs2), op, T)     = abs2(reduce_empty(op, T))
@@ -355,7 +369,10 @@ mapreduce_empty_iter(f, op, itr, ItrEltype) =
 
 @inline reduce_empty_iter(op, itr) = reduce_empty_iter(op, itr, IteratorEltype(itr))
 @inline reduce_empty_iter(op, itr, ::HasEltype) = reduce_empty(op, eltype(itr))
-reduce_empty_iter(op, itr, ::EltypeUnknown) = _empty_reduce_error()
+reduce_empty_iter(op, itr, ::EltypeUnknown) = throw(ArgumentError("""
+    reducing over an empty collection of unknown element type is not allowed.
+    You may be able to prevent this error by supplying an `init` value to the reducer."""))
+
 
 # handling of single-element iterators
 """
@@ -726,7 +743,7 @@ julia> maximum([1,2,3])
 3
 
 julia> maximum(())
-ERROR: ArgumentError: reducing over an empty collection is not allowed
+ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -758,7 +775,7 @@ julia> minimum([1,2,3])
 1
 
 julia> minimum([])
-ERROR: ArgumentError: reducing over an empty collection is not allowed
+ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -1204,10 +1221,12 @@ count(itr; init=0) = count(identity, itr; init)
 
 count(f, itr; init=0) = _simple_count(f, itr, init)
 
-function _simple_count(pred, itr, init::T) where {T}
+_simple_count(pred, itr, init) = _simple_count_helper(Generator(pred, itr), init)
+
+function _simple_count_helper(g, init::T) where {T}
     n::T = init
-    for x in itr
-        n += pred(x)::Bool
+    for x in g
+        n += x::Bool
     end
     return n
 end
diff --git a/base/reflection.jl b/base/reflection.jl
index 29a4b5f4a66ca..1565eb15bcda7 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -303,7 +303,7 @@ objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
 
 # concrete datatype predicates
 
-datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Any, (Any,), x)
+datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
 
 struct DataTypeLayout
     nfields::UInt32
@@ -453,7 +453,7 @@ end
 """
     ismutable(v) -> Bool
 
-Return `true` iff value `v` is mutable.  See [Mutable Composite Types](@ref)
+Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
 for a discussion of immutability. Note that this function works on values, so if you give it
 a type, it will tell you that a value of `DataType` is mutable.
 
@@ -483,7 +483,7 @@ Determine whether type `T` was declared as a mutable type
 !!! compat "Julia 1.7"
     This function requires at least Julia 1.7.
 """
-function ismutabletype(@nospecialize(t::Type))
+function ismutabletype(@nospecialize t)
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
     return isa(t, DataType) && t.name.flags & 0x2 == 0x2
@@ -496,7 +496,7 @@ end
 Determine whether type `T` was declared as a struct type
 (i.e. using the `struct` or `mutable struct` keyword).
 """
-function isstructtype(@nospecialize(t::Type))
+function isstructtype(@nospecialize t)
     @_pure_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
@@ -511,7 +511,7 @@ end
 Determine whether type `T` was declared as a primitive type
 (i.e. using the `primitive` keyword).
 """
-function isprimitivetype(@nospecialize(t::Type))
+function isprimitivetype(@nospecialize t)
     @_pure_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
@@ -543,7 +543,7 @@ julia> isbitstype(Complex)
 false
 ```
 """
-isbitstype(@nospecialize(t::Type)) = (@_pure_meta; isa(t, DataType) && (t.flags & 0x8) == 0x8)
+isbitstype(@nospecialize t) = (@_pure_meta; isa(t, DataType) && (t.flags & 0x8) == 0x8)
 
 """
     isbits(x)
@@ -1085,10 +1085,10 @@ const SLOT_USED = 0x8
 ast_slotflag(@nospecialize(code), i) = ccall(:jl_ir_slotflag, UInt8, (Any, Csize_t), code, i - 1)
 
 """
-    may_invoke_generator(method, atypes, sparams)
+    may_invoke_generator(method, atype, sparams)
 
 Computes whether or not we may invoke the generator for the given `method` on
-the given atypes and sparams. For correctness, all generated function are
+the given atype and sparams. For correctness, all generated function are
 required to return monotonic answers. However, since we don't expect users to
 be able to successfully implement this criterion, we only call generated
 functions on concrete types. The one exception to this is that we allow calling
@@ -1102,9 +1102,9 @@ in some cases, but this may still allow inference not to fall over in some limit
 function may_invoke_generator(method::MethodInstance)
     return may_invoke_generator(method.def::Method, method.specTypes, method.sparam_vals)
 end
-function may_invoke_generator(method::Method, @nospecialize(atypes), sparams::SimpleVector)
+function may_invoke_generator(method::Method, @nospecialize(atype), sparams::SimpleVector)
     # If we have complete information, we may always call the generator
-    isdispatchtuple(atypes) && return true
+    isdispatchtuple(atype) && return true
 
     # We don't have complete information, but it is possible that the generator
     # syntactically doesn't make use of the information we don't have. Check
@@ -1122,7 +1122,7 @@ function may_invoke_generator(method::Method, @nospecialize(atypes), sparams::Si
     isdefined(generator_method, :source) || return false
     code = generator_method.source
     nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
-    at = unwrap_unionall(atypes)::DataType
+    at = unwrap_unionall(atype)::DataType
     (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
 
     for i = 1:nsparams
@@ -1160,7 +1160,7 @@ additional optimizations, such as inlining, are also applied.
 The keyword `debuginfo` controls the amount of code metadata present in the output,
 possible options are `:source` or `:none`.
 """
-function code_typed(@nospecialize(f), @nospecialize(types=Tuple);
+function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f));
                     optimize=true,
                     debuginfo::Symbol=:default,
                     world = get_world_counter(),
@@ -1181,13 +1181,25 @@ function code_typed(@nospecialize(f), @nospecialize(types=Tuple);
     return code_typed_by_type(tt; optimize, debuginfo, world, interp)
 end
 
+# returns argument tuple type which is supposed to be used for `code_typed` and its family;
+# if there is a single method this functions returns the method argument signature,
+# otherwise returns `Tuple` that doesn't match with any signature
+function default_tt(@nospecialize(f))
+    ms = methods(f)
+    if length(ms) == 1
+        return tuple_type_tail(only(ms).sig)
+    else
+        return Tuple
+    end
+end
+
 """
     code_typed_by_type(types::Type{<:Tuple}; ...)
 
 Similar to [`code_typed`](@ref), except the argument is a tuple type describing
 a full signature to query.
 """
-function code_typed_by_type(@nospecialize(tt::Type);
+function code_typed_by_type(@nospecialize(tt#=::Type=#);
                             optimize=true,
                             debuginfo::Symbol=:default,
                             world = get_world_counter(),
@@ -1218,7 +1230,7 @@ function code_typed_by_type(@nospecialize(tt::Type);
     return asts
 end
 
-function code_typed_opaque_closure(@nospecialize(closure::Core.OpaqueClosure), @nospecialize(types=Tuple);
+function code_typed_opaque_closure(@nospecialize(closure::Core.OpaqueClosure);
         optimize=true,
         debuginfo::Symbol=:default,
         interp = Core.Compiler.NativeInterpreter(closure.world))
@@ -1232,7 +1244,7 @@ function code_typed_opaque_closure(@nospecialize(closure::Core.OpaqueClosure), @
     end
 end
 
-function return_types(@nospecialize(f), @nospecialize(types=Tuple), interp=Core.Compiler.NativeInterpreter())
+function return_types(@nospecialize(f), @nospecialize(types=default_tt(f)), interp=Core.Compiler.NativeInterpreter())
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
     if isa(f, Core.Builtin)
         throw(ArgumentError("argument is not a generic function"))
@@ -1264,7 +1276,7 @@ function print_statement_costs(io::IO, @nospecialize(f), @nospecialize(t); kwarg
     print_statement_costs(io, tt; kwargs...)
 end
 
-function print_statement_costs(io::IO, @nospecialize(tt::Type);
+function print_statement_costs(io::IO, @nospecialize(tt#=::Type=#);
                                world = get_world_counter(),
                                interp = Core.Compiler.NativeInterpreter(world))
     matches = _methods_by_ftype(tt, -1, world)
@@ -1294,7 +1306,7 @@ end
 
 print_statement_costs(args...; kwargs...) = print_statement_costs(stdout, args...; kwargs...)
 
-function _which(@nospecialize(tt::Type), world=get_world_counter())
+function _which(@nospecialize(tt#=::Type=#), world=get_world_counter())
     min_valid = RefValue{UInt}(typemin(UInt))
     max_valid = RefValue{UInt}(typemax(UInt))
     match = ccall(:jl_gf_invoke_lookup_worlds, Any,
@@ -1329,7 +1341,7 @@ end
 
 Returns the method that would be called by the given type signature (as a tuple type).
 """
-function which(@nospecialize(tt::Type))
+function which(@nospecialize(tt#=::Type=#))
     return _which(tt).method
 end
 
@@ -1645,7 +1657,6 @@ min_world(m::Core.CodeInfo) = m.min_world
 max_world(m::Core.CodeInfo) = m.max_world
 get_world_counter() = ccall(:jl_get_world_counter, UInt, ())
 
-
 """
     propertynames(x, private=false)
 
@@ -1676,3 +1687,57 @@ Return a boolean indicating whether the object `x` has `s` as one of its own pro
 See also: [`propertynames`](@ref), [`hasfield`](@ref).
 """
 hasproperty(x, s::Symbol) = s in propertynames(x)
+
+"""
+    @invoke f(arg::T, ...; kwargs...)
+
+Provides a convenient way to call [`invoke`](@ref);
+`@invoke f(arg1::T1, arg2::T2; kwargs...)` will be expanded into `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)`.
+When an argument's type annotation is omitted, it's specified as `Any` argument, e.g.
+`@invoke f(arg1::T, arg2)` will be expanded into `invoke(f, Tuple{T,Any}, arg1, arg2)`.
+
+!!! compat "Julia 1.7"
+    This macro requires Julia 1.7 or later.
+"""
+macro invoke(ex)
+    f, args, kwargs = destructure_callex(ex)
+    arg2typs = map(args) do x
+        isexpr(x, :(::)) ? (x.args...,) : (x, GlobalRef(Core, :Any))
+    end
+    args, argtypes = first.(arg2typs), last.(arg2typs)
+    return esc(:($(GlobalRef(Core, :invoke))($(f), Tuple{$(argtypes...)}, $(args...); $(kwargs...))))
+end
+
+"""
+    @invokelatest f(args...; kwargs...)
+
+Provides a convenient way to call [`Base.invokelatest`](@ref).
+`@invokelatest f(args...; kwargs...)` will simply be expanded into
+`Base.invokelatest(f, args...; kwargs...)`.
+
+!!! compat "Julia 1.7"
+    This macro requires Julia 1.7 or later.
+"""
+macro invokelatest(ex)
+    f, args, kwargs = destructure_callex(ex)
+    return esc(:($(GlobalRef(@__MODULE__, :invokelatest))($(f), $(args...); $(kwargs...))))
+end
+
+function destructure_callex(ex)
+    isexpr(ex, :call) || throw(ArgumentError("a call expression f(args...; kwargs...) should be given"))
+
+    f = first(ex.args)
+    args = []
+    kwargs = []
+    for x in ex.args[2:end]
+        if isexpr(x, :parameters)
+            append!(kwargs, x.args)
+        elseif isexpr(x, :kw)
+            push!(kwargs, x)
+        else
+            push!(args, x)
+        end
+    end
+
+    return f, args, kwargs
+end
diff --git a/base/regex.jl b/base/regex.jl
index 15744fe14ce47..ad26c18d4c581 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -335,6 +335,20 @@ function endswith(s::SubString, r::Regex)
     return PCRE.exec_r(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED)
 end
 
+function chopprefix(s::AbstractString, prefix::Regex)
+    m = match(prefix, s, firstindex(s), PCRE.ANCHORED)
+    m === nothing && return SubString(s)
+    return SubString(s, ncodeunits(m.match) + 1)
+end
+
+function chopsuffix(s::AbstractString, suffix::Regex)
+    m = match(suffix, s, firstindex(s), PCRE.ENDANCHORED)
+    m === nothing && return SubString(s)
+    isempty(m.match) && return SubString(s)
+    return SubString(s, firstindex(s), prevind(s, m.offset))
+end
+
+
 """
     match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
 
@@ -528,6 +542,7 @@ end
 Stores the given string `substr` as a `SubstitutionString`, for use in regular expression
 substitutions. Most commonly constructed using the [`@s_str`](@ref) macro.
 
+# Examples
 ```jldoctest
 julia> SubstitutionString("Hello \\\\g<name>, it's \\\\1")
 s"Hello \\g<name>, it's \\1"
@@ -564,6 +579,7 @@ Construct a substitution string, used for regular expression substitutions.  Wit
 string, sequences of the form `\\N` refer to the Nth capture group in the regex, and
 `\\g<groupname>` refers to a named capture group with name `groupname`.
 
+# Examples
 ```jldoctest
 julia> msg = "#Hello# from Julia";
 
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index 5ce0acf97ff5b..ad1e8b26c4461 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -12,25 +12,25 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     writable::Bool
 
     function throwbits(S::Type, T::Type, U::Type)
-        @_noinline_meta
+        @noinline
         throw(ArgumentError("cannot reinterpret `$(S)` as `$(T)`, type `$(U)` is not a bits type"))
     end
     function throwsize0(S::Type, T::Type, msg)
-        @_noinline_meta
+        @noinline
         throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a $msg size"))
     end
 
     global reinterpret
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
         function thrownonint(S::Type, T::Type, dim)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("""
                 cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
                 The resulting array would have non-integral first dimension.
                 """))
         end
         function throwaxes1(S::Type, T::Type, ax1)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` when the first axis is $ax1. Try reshaping first."))
         end
         isbitstype(T) || throwbits(S, T, T)
@@ -51,11 +51,11 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     # With reshaping
     function reinterpret(::typeof(reshape), ::Type{T}, a::A) where {T,S,A<:AbstractArray{S}}
         function throwintmult(S::Type, T::Type)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got $(sizeof(T))) and `sizeof(eltype(a))` (got $(sizeof(S))) be an integer multiple of the other"))
         end
         function throwsize1(a::AbstractArray, T::Type)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $(eltype(a)) requires that `axes(a, 1)` (got $(axes(a, 1))) be equal to 1:$(sizeof(T) ÷ sizeof(eltype(a))) (from the ratio of element sizes)"))
         end
         isbitstype(T) || throwbits(S, T, T)
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index 1d40a00810e99..cabe3c9d10a58 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -287,7 +287,7 @@ viewindexing(I::Tuple{Slice, ReshapedUnitRange, Vararg{ScalarIndex}}) = IndexLin
 viewindexing(I::Tuple{ReshapedRange, Vararg{ScalarIndex}}) = IndexLinear()
 compute_stride1(s, inds, I::Tuple{ReshapedRange, Vararg{Any}}) = s*step(I[1].parent)
 compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{ReshapedRange}) =
-    (@_inline_meta; first(I[1]) - first(axes1(I[1]))*stride1)
+    (@inline; first(I[1]) - first(axes1(I[1]))*stride1)
 substrides(strds::NTuple{N,Int}, I::Tuple{ReshapedUnitRange, Vararg{Any}}) where N =
     (size_to_strides(strds[1], size(I[1])...)..., substrides(tail(strds), tail(I))...)
 unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} =
diff --git a/base/set.jl b/base/set.jl
index 5a744c556432c..dd1400d11dba1 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -3,13 +3,21 @@
 struct Set{T} <: AbstractSet{T}
     dict::Dict{T,Nothing}
 
-    Set{T}() where {T} = new(Dict{T,Nothing}())
-    Set{T}(s::Set{T}) where {T} = new(Dict{T,Nothing}(s.dict))
+    global _Set(dict::Dict{T,Nothing}) where {T} = new{T}(dict)
 end
 
+Set{T}() where {T} = _Set(Dict{T,Nothing}())
+Set{T}(s::Set{T}) where {T} = _Set(Dict{T,Nothing}(s.dict))
 Set{T}(itr) where {T} = union!(Set{T}(), itr)
 Set() = Set{Any}()
 
+function Set{T}(s::KeySet{T, <:Dict{T}}) where {T}
+    d = s.dict
+    slots = copy(d.slots)
+    keys = copy(d.keys)
+    vals = similar(d.vals, Nothing)
+    _Set(Dict{T,Nothing}(slots, keys, vals, d.ndel, d.count, d.age, d.idxfloor, d.maxprobe))
+end
 
 """
     Set([itr])
@@ -36,7 +44,7 @@ empty(s::AbstractSet{T}, ::Type{U}=T) where {T,U} = Set{U}()
 # by default, a Set is returned
 emptymutable(s::AbstractSet{T}, ::Type{U}=T) where {T,U} = Set{U}()
 
-_similar_for(c::AbstractSet, ::Type{T}, itr, isz) where {T} = empty(c, T)
+_similar_for(c::AbstractSet, ::Type{T}, itr, isz, len) where {T} = empty(c, T)
 
 function show(io::IO, s::Set)
     if isempty(s)
@@ -548,6 +556,9 @@ replaced.
 
 See also [`replace!`](@ref), [`splice!`](@ref), [`delete!`](@ref), [`insert!`](@ref).
 
+!!! compat "Julia 1.7"
+    Version 1.7 is required to replace elements of a `Tuple`.
+
 # Examples
 ```jldoctest
 julia> replace([1, 2, 1, 3], 1=>0, 2=>4, count=2)
@@ -596,6 +607,9 @@ Return a copy of `A` where each value `x` in `A` is replaced by `new(x)`.
 If `count` is specified, then replace at most `count` values in total
 (replacements being defined as `new(x) !== x`).
 
+!!! compat "Julia 1.7"
+    Version 1.7 is required to replace elements of a `Tuple`.
+
 # Examples
 ```jldoctest
 julia> replace(x -> isodd(x) ? 2x : x, [1, 2, 3, 4])
@@ -755,7 +769,7 @@ replace(f::Callable, t::Tuple; count::Integer=typemax(Int)) =
 
 function _replace(t::Tuple, count::Int, old_new::Tuple{Vararg{Pair}})
     _replace(t, count) do x
-        @_inline_meta
+        @inline
         for o_n in old_new
             isequal(first(o_n), x) && return last(o_n)
         end
diff --git a/base/shell.jl b/base/shell.jl
index bcece48681e5c..c0537821638f3 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -49,22 +49,24 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
         empty!(innerlist)
     end
 
+    C = eltype(str)
+    P = Pair{Int,C}
     for (j, c) in st
-        j, c = j::Int, c::eltype(str)
+        j, c = j::Int, c::C
         if !in_single_quotes && !in_double_quotes && isspace(c)
             i = consume_upto!(arg, s, i, j)
             append_2to1!(args, arg)
             while !isempty(st)
                 # We've made sure above that we don't end in whitespace,
                 # so updating `i` here is ok
-                (i, c) = peek(st)::Pair{Int,eltype(str)}
+                (i, c) = peek(st)::P
                 isspace(c) || break
                 popfirst!(st)
             end
         elseif interpolate && !in_single_quotes && c == '$'
             i = consume_upto!(arg, s, i, j)
             isempty(st) && error("\$ right before end of command")
-            stpos, c = popfirst!(st)::Pair{Int,eltype(str)}
+            stpos, c = popfirst!(st)::P
             isspace(c) && error("space not allowed right after \$")
             if startswith(SubString(s, stpos), "var\"")
                 # Disallow var"#" syntax in cmd interpolations.
@@ -88,19 +90,19 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
                 in_double_quotes = !in_double_quotes
                 i = consume_upto!(arg, s, i, j)
             elseif !in_single_quotes && c == '\\'
-                if !isempty(st) && peek(st)[2] in ('\n', '\r')
+                if !isempty(st) && (peek(st)::P)[2] in ('\n', '\r')
                     i = consume_upto!(arg, s, i, j) + 1
-                    if popfirst!(st)[2] == '\r' && peek(st)[2] == '\n'
+                    if popfirst!(st)[2] == '\r' && (peek(st)::P)[2] == '\n'
                         i += 1
                         popfirst!(st)
                     end
-                    while !isempty(st) && peek(st)[2] in (' ', '\t')
+                    while !isempty(st) && (peek(st)::P)[2] in (' ', '\t')
                         i = nextind(str, i)
                         _ = popfirst!(st)
                     end
                 elseif in_double_quotes
                     isempty(st) && error("unterminated double quote")
-                    k, c′ = peek(st)
+                    k, c′ = peek(st)::P
                     if c′ == '"' || c′ == '$' || c′ == '\\'
                         i = consume_upto!(arg, s, i, j)
                         _ = popfirst!(st)
diff --git a/base/show.jl b/base/show.jl
index b2a1da33ddceb..463b6379c1f4f 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -513,23 +513,21 @@ end
 # we're attempting to represent.
 # Union{T} where T is a degenerate case and is equal to T.ub, but we don't want
 # to print them that way, so filter those out from our aliases completely.
-function makeproper(io::IO, x::Type)
-    properx = x
-    x = unwrap_unionall(x)
+function makeproper(io::IO, @nospecialize(x::Type))
     if io isa IOContext
         for (key, val) in io.dict
             if key === :unionall_env && val isa TypeVar
-                properx = UnionAll(val, properx)
+                x = UnionAll(val, x)
             end
         end
     end
-    has_free_typevars(properx) && return Any
-    return properx
+    has_free_typevars(x) && return Any
+    return x
 end
 
 function make_typealias(@nospecialize(x::Type))
-    Any === x && return
-    x <: Tuple && return
+    Any === x && return nothing
+    x <: Tuple && return nothing
     mods = modulesof!(Set{Module}(), x)
     Core in mods && push!(mods, Base)
     aliases = Tuple{GlobalRef,SimpleVector}[]
@@ -704,12 +702,12 @@ function make_wheres(io::IO, env::SimpleVector, @nospecialize(x::Type))
     return wheres
 end
 
-function show_wheres(io::IO, wheres::Vector)
+function show_wheres(io::IO, wheres::Vector{TypeVar})
     isempty(wheres) && return
     io = IOContext(io)
     n = length(wheres)
     for i = 1:n
-        p = wheres[i]::TypeVar
+        p = wheres[i]
         print(io, n == 1 ? " where " : i == 1 ? " where {" : ", ")
         show(io, p)
         io = IOContext(io, :unionall_env => p)
@@ -718,7 +716,7 @@ function show_wheres(io::IO, wheres::Vector)
     nothing
 end
 
-function show_typealias(io::IO, x::Type)
+function show_typealias(io::IO, @nospecialize(x::Type))
     properx = makeproper(io, x)
     alias = make_typealias(properx)
     alias === nothing && return false
@@ -985,7 +983,7 @@ function show_type_name(io::IO, tn::Core.TypeName)
     nothing
 end
 
-function show_datatype(io::IO, @nospecialize(x::DataType), wheres::Vector=TypeVar[])
+function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[])
     parameters = x.parameters::SimpleVector
     istuple = x.name === Tuple.name
     n = length(parameters)
@@ -993,10 +991,17 @@ function show_datatype(io::IO, @nospecialize(x::DataType), wheres::Vector=TypeVa
     # Print homogeneous tuples with more than 3 elements compactly as NTuple{N, T}
     if istuple
         if n > 3 && all(@nospecialize(i) -> (parameters[1] === i), parameters)
-            print(io, "NTuple{", n, ", ", parameters[1], "}")
+            print(io, "NTuple{", n, ", ")
+            show(io, parameters[1])
+            print(io, "}")
         else
             print(io, "Tuple{")
-            join(io, parameters, ", ")
+            # join(io, params, ", ") params but `show` it
+            first = true
+            for param in parameters
+                first ? (first = false) : print(io, ", ")
+                show(io, param)
+            end
             print(io, "}")
         end
     else
@@ -1096,7 +1101,20 @@ function show(io::IO, m::Module)
     if is_root_module(m)
         print(io, nameof(m))
     else
-        print(io, join(fullname(m),"."))
+        print_fullname(io, m)
+    end
+end
+# The call to print_fullname above was originally `print(io, join(fullname(m),"."))`,
+# which allocates. The method below provides the same behavior without allocating.
+# See https://github.com/JuliaLang/julia/pull/42773 for perf information.
+function print_fullname(io::IO, m::Module)
+    mp = parentmodule(m)
+    if m === Main || m === Base || m === Core || mp === m
+        print(io, nameof(m))
+    else
+        print_fullname(io, mp)
+        print(io, '.')
+        print(io, nameof(m))
     end
 end
 
@@ -1292,17 +1310,19 @@ show_unquoted(io::IO, ex, indent::Int, prec::Int, ::Int) = show_unquoted(io, ex,
 const indent_width = 4
 const quoted_syms = Set{Symbol}([:(:),:(::),:(:=),:(=),:(==),:(===),:(=>)])
 const uni_syms = Set{Symbol}([:(::), :(<:), :(>:)])
-const uni_ops = Set{Symbol}([:(+), :(-), :(!), :(¬), :(~), :(<:), :(>:), :(√), :(∛), :(∜)])
+const uni_ops = Set{Symbol}([:(+), :(-), :(!), :(¬), :(~), :(<:), :(>:), :(√), :(∛), :(∜), :(∓), :(±)])
 const expr_infix_wide = Set{Symbol}([
     :(=), :(+=), :(-=), :(*=), :(/=), :(\=), :(^=), :(&=), :(|=), :(÷=), :(%=), :(>>>=), :(>>=), :(<<=),
     :(.=), :(.+=), :(.-=), :(.*=), :(./=), :(.\=), :(.^=), :(.&=), :(.|=), :(.÷=), :(.%=), :(.>>>=), :(.>>=), :(.<<=),
-    :(&&), :(||), :(<:), :($=), :(⊻=), :(>:), :(-->)])
+    :(&&), :(||), :(<:), :($=), :(⊻=), :(>:), :(-->),
+    :(:=), :(≔), :(⩴), :(≕)])
 const expr_infix = Set{Symbol}([:(:), :(->), :(::)])
 const expr_infix_any = union(expr_infix, expr_infix_wide)
 const expr_calls  = Dict(:call => ('(',')'), :calldecl => ('(',')'),
                          :ref => ('[',']'), :curly => ('{','}'), :(.) => ('(',')'))
 const expr_parens = Dict(:tuple=>('(',')'), :vcat=>('[',']'),
                          :hcat =>('[',']'), :row =>('[',']'), :vect=>('[',']'),
+                         :ncat =>('[',']'), :nrow =>('[',']'),
                          :braces=>('{','}'), :bracescat=>('{','}'))
 
 ## AST decoding helpers ##
@@ -1314,7 +1334,8 @@ is_id_char(c::AbstractChar) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
      isidentifier(s) -> Bool
 
 Return whether the symbol or string `s` contains characters that are parsed as
-a valid identifier in Julia code.
+a valid ordinary identifier (not a binary/unary operator) in Julia code;
+see also [`Base.isoperator`](@ref).
 
 Internally Julia allows any sequence of characters in a `Symbol` (except `\\0`s),
 and macros automatically use variable names containing `#` in order to avoid
@@ -1811,14 +1832,16 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
 
     # list-like forms, e.g. "[1, 2, 3]"
     elseif haskey(expr_parens, head) ||                          # :vcat etc.
-        head === :typed_vcat || head === :typed_hcat
+        head === :typed_vcat || head === :typed_hcat || head === :typed_ncat
         # print the type and defer to the untyped case
-        if head === :typed_vcat || head === :typed_hcat
+        if head === :typed_vcat || head === :typed_hcat || head === :typed_ncat
             show_unquoted(io, args[1], indent, prec, quote_level)
             if head === :typed_vcat
                 head = :vcat
-            else
+            elseif head === :typed_hcat
                 head = :hcat
+            else
+                head = :ncat
             end
             args = args[2:end]
             nargs = nargs - 1
@@ -1828,15 +1851,19 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
             sep = "; "
         elseif head === :hcat || head === :row
             sep = " "
+        elseif head === :ncat || head === :nrow
+            sep = ";"^args[1]::Int * " "
+            args = args[2:end]
+            nargs = nargs - 1
         else
             sep = ", "
         end
-        head !== :row && print(io, op)
+        head !== :row && head !== :nrow && print(io, op)
         show_list(io, args, sep, indent, 0, quote_level)
-        if nargs == 1 && head === :vcat
-            print(io, ';')
+        if nargs <= 1 && (head === :vcat || head === :ncat)
+            print(io, sep[1:end-1])
         end
-        head !== :row && print(io, cl)
+        head !== :row && head !== :nrow && print(io, cl)
 
     # transpose
     elseif (head === Symbol("'") && nargs == 1) || (
@@ -2124,12 +2151,15 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :line && 1 <= nargs <= 2
         show_linenumber(io, args...)
 
-    elseif head === :try && 3 <= nargs <= 4
+    elseif head === :try && 3 <= nargs <= 5
         iob = IOContext(io, beginsym=>false)
         show_block(iob, "try", args[1], indent, quote_level)
         if is_expr(args[3], :block)
             show_block(iob, "catch", args[2] === false ? Any[] : args[2], args[3]::Expr, indent, quote_level)
         end
+        if nargs >= 5 && is_expr(args[5], :block)
+            show_block(iob, "else", Any[], args[5]::Expr, indent, quote_level)
+        end
         if nargs >= 4 && is_expr(args[4], :block)
             show_block(iob, "finally", Any[], args[4]::Expr, indent, quote_level)
         end
@@ -2371,7 +2401,9 @@ end
 function print_type_stacktrace(io, type; color=:normal)
     str = sprint(show, type, context=io)
     i = findfirst('{', str)
-    if i === nothing || !get(io, :backtrace, false)::Bool
+    if !get(io, :backtrace, false)::Bool
+        print(io, str)
+    elseif i === nothing
         printstyled(io, str; color=color)
     else
         printstyled(io, str[1:prevind(str,i)]; color=color)
diff --git a/base/some.jl b/base/some.jl
index 58280fe727352..8be58739a4df4 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -138,8 +138,8 @@ true
 macro something(args...)
     expr = :(nothing)
     for arg in reverse(args)
-        expr = :((val = $arg) !== nothing ? val : $expr)
+        expr = :(val = $(esc(arg)); val !== nothing ? val : ($expr))
     end
-    return esc(:(something(let val; $expr; end)))
+    something = GlobalRef(Base, :something)
+    return :($something($expr))
 end
-
diff --git a/base/sort.jl b/base/sort.jl
index afde46232cec1..bfa3e1d0dc0e2 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -332,7 +332,7 @@ julia> searchsorted([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
     searchsortedfirst(a, x; by=<transform>, lt=<comparison>, rev=false)
 
 Return the index of the first value in `a` greater than or equal to `x`, according to the
-specified order. Return `length(a) + 1` if `x` is greater than all values in `a`.
+specified order. Return `lastindex(a) + 1` if `x` is greater than all values in `a`.
 `a` is assumed to be sorted.
 
 See also: [`searchsortedlast`](@ref), [`searchsorted`](@ref), [`findfirst`](@ref).
@@ -360,8 +360,8 @@ julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
     searchsortedlast(a, x; by=<transform>, lt=<comparison>, rev=false)
 
 Return the index of the last value in `a` less than or equal to `x`, according to the
-specified order. Return `0` if `x` is less than all values in `a`. `a` is assumed to
-be sorted.
+specified order. Return `firstindex(a) - 1` if `x` is less than all values in `a`. `a` is
+assumed to be sorted.
 
 # Examples
 ```jldoctest
@@ -727,14 +727,14 @@ end
 function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse)
     offs = 1 - minval
 
-    where = fill(0, rangelen)
+    counts = fill(0, rangelen)
     @inbounds for i = eachindex(x)
-        where[x[i] + offs] += 1
+        counts[x[i] + offs] += 1
     end
 
     idx = firstindex(x)
     @inbounds for i = maybereverse(1:rangelen)
-        lastidx = idx + where[i] - 1
+        lastidx = idx + counts[i] - 1
         val = i-offs
         for j = idx:lastidx
             x[j] = val
@@ -975,22 +975,22 @@ function sortperm_int_range(x::Vector{<:Integer}, rangelen, minval)
     offs = 1 - minval
     n = length(x)
 
-    where = fill(0, rangelen+1)
-    where[1] = 1
+    counts = fill(0, rangelen+1)
+    counts[1] = 1
     @inbounds for i = 1:n
-        where[x[i] + offs + 1] += 1
+        counts[x[i] + offs + 1] += 1
     end
 
-    #cumsum!(where, where)
-    @inbounds for i = 2:length(where)
-        where[i] += where[i-1]
+    #cumsum!(counts, counts)
+    @inbounds for i = 2:length(counts)
+        counts[i] += counts[i-1]
     end
 
     P = Vector{Int}(undef, n)
     @inbounds for i = 1:n
         label = x[i] + offs
-        P[where[label]] = i
-        where[label] += 1
+        P[counts[label]] = i
+        counts[label] += 1
     end
 
     return P
@@ -1098,7 +1098,7 @@ function sort!(A::AbstractArray;
 
     1 <= k <= nd || throw(ArgumentError("dimension out of range"))
 
-    remdims = ntuple(i -> i == k ? 1 : size(A, i), nd)
+    remdims = ntuple(i -> i == k ? 1 : axes(A, i), nd)
     for idx in CartesianIndices(remdims)
         Av = view(A, ntuple(i -> i == k ? Colon() : idx[i], nd)...)
         sort!(Av, alg, ordr)
@@ -1118,6 +1118,12 @@ import ..Sort: sort!
 import ...Order: lt, DirectOrdering
 
 const Floats = Union{Float32,Float64}
+const FPSortable = Union{ # Mixed Float32 and Float64 are not allowed.
+    AbstractVector{Union{Float32, Missing}},
+    AbstractVector{Union{Float64, Missing}},
+    AbstractVector{Float32},
+    AbstractVector{Float64},
+    AbstractVector{Missing}}
 
 struct Left <: Ordering end
 struct Right <: Ordering end
@@ -1229,10 +1235,10 @@ end
 fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) =
     sort!(v, first(axes(v,1)), last(axes(v,1)), a, o)
 
-sort!(v::AbstractVector{<:Union{Floats, Missing}}, a::Algorithm, o::DirectOrdering) =
-    fpsort!(v,a,o)
-sort!(v::Vector{Int}, a::Algorithm, o::Perm{<:DirectOrdering,<:Vector{<:Union{Floats, Missing}}}) =
-    fpsort!(v,a,o)
+sort!(v::FPSortable, a::Algorithm, o::DirectOrdering) =
+    fpsort!(v, a, o)
+sort!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable}) =
+    fpsort!(v, a, o)
 
 end # module Sort.Float
 
diff --git a/base/special/exp.jl b/base/special/exp.jl
index fd88eee3fc638..c2bbb47902360 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int.
 # This works because eps(MAGIC_ROUND_CONST(T)) == one(T), so adding it to a smaller number aligns the lsb to the 1s place.
 # Values for which this trick doesn't work are going to have outputs of 0 or Inf.
@@ -225,8 +227,35 @@ end
     twopk = Int64(k) << 52
     return reinterpret(T, twopk + reinterpret(Int64, small_part))
 end
+# Computes base^(x+xlo). Used for pow.
+@inline function exp_impl(x::Float64, xlo::Float64, base)
+    T = Float64
+    N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
+    r = muladd(N_float, LogBo256U(base, T), x)
+    r = muladd(N_float, LogBo256L(base, T), r)
+    k = N >> 8
+    jU, jL = table_unpack(N&255 + 1)
+    very_small = muladd(jU, expm1b_kernel(base, r), jL)
+    small_part =  muladd(jU,xlo,very_small) + jU
+    if !(abs(x) <= SUBNORM_EXP(base, T))
+        x >= MAX_EXP(base, T) && return Inf
+        x <= MIN_EXP(base, T) && return 0.0
+        if k <= -53
+            # The UInt64 forces promotion. (Only matters for 32 bit systems.)
+            twopk = (k + UInt64(53)) << 52
+            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
+        end
+        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+    end
+    twopk = Int64(k) << 52
+    return reinterpret(T, twopk + reinterpret(Int64, small_part))
+end
 @inline function exp_impl_fast(x::Float64, base)
     T = Float64
+    x >= MAX_EXP(base, T) && return Inf
+    x <= -SUBNORM_EXP(base, T) && return 0.0
     N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
     N = reinterpret(UInt64, N_float) % Int32
     N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
@@ -261,6 +290,8 @@ end
 
 @inline function exp_impl_fast(x::Float32, base)
     T = Float32
+    x >= MAX_EXP(base, T) && return Inf32
+    x <= -SUBNORM_EXP(base, T) && return 0f0
     N_float = round(x*LogBINV(base, T))
     N = unsafe_trunc(Int32, N_float)
     r = muladd(N_float, LogBU(base, T), x)
@@ -277,9 +308,9 @@ end
     N = unsafe_trunc(Int32, N_float)
     r = muladd(N_float, LogB(base, Float16), x)
     small_part = expb_kernel(base, r)
-    if !(abs(x) <= SUBNORM_EXP(base, T))
-        x > MAX_EXP(base, T) && return Inf16
-        N<=Int32(-24) && return zero(Float16)
+    if !(abs(x) <= 25)
+        x > 16 && return Inf16
+        x < 25 && return zero(Float16)
     end
     twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
     return Float16(twopk*small_part)
@@ -306,7 +337,7 @@ See also [`exp2`](@ref), [`exp10`](@ref) and [`cis`](@ref).
 julia> exp(1.0)
 2.718281828459045
 
-julia> exp(im * pi) == cis(pi)
+julia> exp(im * pi) ≈ cis(pi)
 true
 ```
 """ exp(x::Real)
diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl
index d84cadcb2b6f2..74f750064c7c2 100644
--- a/base/special/hyperbolic.jl
+++ b/base/special/hyperbolic.jl
@@ -32,8 +32,7 @@ SINH_SMALL_X(::Type{Float32}) = 3.0f0
 
 # For Float64, use DoubleFloat scheme for extra accuracy
 function sinh_kernel(x::Float64)
-    x2 = x*x
-    x2lo = fma(x,x,-x2)
+    x2, x2lo = two_mul(x,x)
     hi_order = evalpoly(x2, (8.333333333336817e-3, 1.9841269840165435e-4,
                              2.7557319381151335e-6, 2.5052096530035283e-8,
                              1.6059550718903307e-10, 7.634842144412119e-13,
diff --git a/base/special/log.jl b/base/special/log.jl
index 95bc9e32b5719..bca0d7143db48 100644
--- a/base/special/log.jl
+++ b/base/special/log.jl
@@ -139,10 +139,6 @@ const t_log_Float32 = (0.0,0.007782140442054949,0.015504186535965254,0.023167059
     0.6773988235918061,0.6813592248079031,0.6853040030989194,0.689233281238809,
     0.6931471805599453)
 
-# determine if hardware FMA is available
-# should probably check with LLVM, see #9855.
-const FMA_NATIVE = muladd(nextfloat(1.0),nextfloat(1.0),-nextfloat(1.0,2)) != 0
-
 # truncate lower order bits (up to 26)
 # ideally, this should be able to use ANDPD instructions, see #9868.
 @inline function truncbits(x::Float64)
@@ -209,18 +205,10 @@ end
     #   2(f-u1-u2) - f*(u1+u2) = 0
     #   2(f-u1) - f*u1 = (2+f)u2
     #   u2 = (2(f-u1) - f*u1)/(2+f)
-    if FMA_NATIVE
-        return u + fma(fma(-u,f,2(f-u)), g, q)
-    else
-        u1 = truncbits(u) # round to 24 bits
-        f1 = truncbits(f)
-        f2 = f-f1
-        u2 = ((2.0*(f-u1)-u1*f1)-u1*f2)*g
-        ## Step 4
-        m_hi = logbU(Float64, base)
-        m_lo = logbL(Float64, base)
-        return fma(m_hi, u1, fma(m_hi, (u2 + q), m_lo*u1))
-    end
+
+    m_hi = logbU(Float64, base)
+    m_lo = logbL(Float64, base)
+    return fma(m_hi, u, fma(m_lo, u, m_hi*fma(fma(-u,f,2(f-u)), g, q)))
 end
 
 
@@ -409,3 +397,51 @@ function log1p(x::Float32)
     end
 end
 
+
+@inline function log_ext_kernel(x_hi::Float64, x_lo::Float64)
+    c1hi = 0.666666666666666629659233
+    hi_order =  evalpoly(x_hi, (0.400000000000000077715612, 0.285714285714249172087875,
+                                0.222222222230083560345903, 0.181818180850050775676507,
+                                0.153846227114512262845736, 0.13332981086846273921509,
+                                0.117754809412463995466069, 0.103239680901072952701192,
+                                0.116255524079935043668677))
+    res_hi, res_lo = two_mul(hi_order, x_hi)
+    res_lo = fma(x_lo, hi_order, res_lo)
+    ans_hi = c1hi + res_hi
+    ans_lo = ((c1hi - ans_hi) + res_hi) + (res_lo + 3.80554962542412056336616e-17)
+    return ans_hi, ans_lo
+end
+
+# Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision
+# Implimentation adapted from SLEEFPirates.jl
+# Does not normalize results.
+# Must be caused with positive finite arguments
+function _log_ext(d::Float64)
+    m, e = significand(d), exponent(d)
+    if m > 1.5
+        m *= 0.5
+        e += 1.0
+    end
+    # x = (m-1)/(m+1)
+    mp1hi = m + 1.0
+    mp1lo = m + (1.0 - mp1hi)
+    invy = inv(mp1hi)
+    xhi = (m - 1.0) * invy
+    xlo = fma(-xhi, mp1lo, fma(-xhi, mp1hi, m - 1.0)) * invy
+    x2hi, x2lo = two_mul(xhi, xhi)
+    x2lo = muladd(xhi, xlo * 2.0, x2lo)
+    thi, tlo  = log_ext_kernel(x2hi, x2lo)
+
+    shi = 0.6931471805582987 * e
+    xhi2 = xhi * 2.0
+    shinew = muladd(xhi, 2.0, shi)
+    slo = muladd(1.6465949582897082e-12, e, muladd(xlo, 2.0, (((shi - shinew) + xhi2))))
+    shi = shinew
+    x3hi, x3lo = two_mul(x2hi, xhi)
+    x3lo = muladd(x2hi, xlo, muladd(xhi, x2lo,x3lo))
+    x3thi, x3tlo = two_mul(x3hi, thi)
+    x3tlo = muladd(x3hi, tlo, muladd(x3lo, thi, x3tlo))
+    anshi = x3thi + shi
+    anslo = slo + x3tlo - ((anshi - shi) - x3thi)
+    return anshi, anslo
+end
diff --git a/base/special/trig.jl b/base/special/trig.jl
index f735ea43f2425..e3033aab6c272 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -1268,7 +1268,11 @@ for (fd, f, fn) in ((:sind, :sin, "sine"), (:cosd, :cos, "cosine"), (:tand, :tan
                 $($name)(x)
 
             Compute $($fn) of `x`, where `x` is in $($un).
-            If `x` is a matrix, `x` needs to be a square matrix. """ ($fd)(x) = ($f)(($fu).(x))
+            If `x` is a matrix, `x` needs to be a square matrix.
+
+            !!! compat "Julia 1.7"
+                Matrix arguments require Julia 1.7 or later.
+            """ ($fd)(x) = ($f)(($fu).(x))
         end
     end
 end
@@ -1283,7 +1287,11 @@ for (fd, f, fn) in ((:asind, :asin, "sine"), (:acosd, :acos, "cosine"),
                 $($name)(x)
 
             Compute the inverse $($fn) of `x`, where the output is in $($un).
-            If `x` is a matrix, `x` needs to be a square matrix. """ ($fd)(x) = ($fu).(($f)(x))
+            If `x` is a matrix, `x` needs to be a square matrix.
+
+            !!! compat "Julia 1.7"
+                Matrix arguments require Julia 1.7 or later.
+            """ ($fd)(x) = ($fu).(($f)(x))
         end
     end
 end
@@ -1293,6 +1301,9 @@ end
     atand(y,x)
 
 Compute the inverse tangent of `y` or `y/x`, respectively, where the output is in degrees.
+
+!!! compat "Julia 1.7"
+    The one-argument method supports square matrix arguments as of Julia 1.7.
 """
 atand(y)    = rad2deg.(atan(y))
 atand(y, x) = rad2deg.(atan(y,x))
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index 99ee5c57db89f..8483aec55cbff 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -133,7 +133,7 @@ function lookup(ip::Union{Base.InterpreterIP,Core.Compiler.InterpreterIP})
     else
         func = top_level_scope_sym
         file = empty_sym
-        line = 0
+        line = Int32(0)
     end
     i = max(ip.stmt+1, 1)  # ip.stmt is 0-indexed
     if i > length(codeinfo.codelocs) || codeinfo.codelocs[i] == 0
diff --git a/base/stream.jl b/base/stream.jl
index 6cbd1d3b86a28..cee4894b28c3c 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -109,7 +109,7 @@ function eof(s::LibuvStream)
     # and that we won't return true if there's a readerror pending (it'll instead get thrown).
     # This requires some careful ordering here (TODO: atomic loads)
     bytesavailable(s) > 0 && return false
-    open = isopen(s) # must precede readerror check
+    open = isreadable(s) # must precede readerror check
     s.readerror === nothing || throw(s.readerror)
     return !open
 end
@@ -270,6 +270,7 @@ show(io::IO, stream::LibuvStream) = print(io, typeof(stream), "(",
 function isreadable(io::LibuvStream)
     bytesavailable(io) > 0 && return true
     isopen(io) || return false
+    io.status == StatusEOF && return false
     return ccall(:uv_is_readable, Cint, (Ptr{Cvoid},), io.handle) != 0
 end
 
@@ -282,6 +283,7 @@ end
 lock(s::LibuvStream) = lock(s.lock)
 unlock(s::LibuvStream) = unlock(s.lock)
 
+setup_stdio(stream::LibuvStream, ::Bool) = (stream, false)
 rawhandle(stream::LibuvStream) = stream.handle
 unsafe_convert(::Type{Ptr{Cvoid}}, s::Union{LibuvStream, LibuvServer}) = s.handle
 
@@ -378,7 +380,7 @@ function isopen(x::Union{LibuvStream, LibuvServer})
     if x.status == StatusUninit || x.status == StatusInit
         throw(ArgumentError("$x is not initialized"))
     end
-    return x.status != StatusClosed && x.status != StatusEOF
+    return x.status != StatusClosed
 end
 
 function check_open(x::Union{LibuvStream, LibuvServer})
@@ -390,13 +392,13 @@ end
 function wait_readnb(x::LibuvStream, nb::Int)
     # fast path before iolock acquire
     bytesavailable(x.buffer) >= nb && return
-    open = isopen(x) # must precede readerror check
+    open = isopen(x) && x.status != StatusEOF # must precede readerror check
     x.readerror === nothing || throw(x.readerror)
     open || return
     iolock_begin()
     # repeat fast path after iolock acquire, before other expensive work
     bytesavailable(x.buffer) >= nb && (iolock_end(); return)
-    open = isopen(x)
+    open = isopen(x) && x.status != StatusEOF
     x.readerror === nothing || throw(x.readerror)
     open || (iolock_end(); return)
     # now do the "real" work
@@ -407,6 +409,7 @@ function wait_readnb(x::LibuvStream, nb::Int)
         while bytesavailable(x.buffer) < nb
             x.readerror === nothing || throw(x.readerror)
             isopen(x) || break
+            x.status != StatusEOF || break
             x.throttle = max(nb, x.throttle)
             start_reading(x) # ensure we are reading
             iolock_end()
@@ -431,6 +434,52 @@ function wait_readnb(x::LibuvStream, nb::Int)
     nothing
 end
 
+function closewrite(s::LibuvStream)
+    iolock_begin()
+    check_open(s)
+    req = Libc.malloc(_sizeof_uv_shutdown)
+    uv_req_set_data(req, C_NULL) # in case we get interrupted before arriving at the wait call
+    err = ccall(:uv_shutdown, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}),
+                req, s, @cfunction(uv_shutdowncb_task, Cvoid, (Ptr{Cvoid}, Cint)))
+    if err < 0
+        Libc.free(req)
+        uv_error("shutdown", err)
+    end
+    ct = current_task()
+    preserve_handle(ct)
+    sigatomic_begin()
+    uv_req_set_data(req, ct)
+    iolock_end()
+    status = try
+        sigatomic_end()
+        wait()::Cint
+    finally
+        # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
+        sigatomic_end()
+        iolock_begin()
+        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        if uv_req_data(req) != C_NULL
+            # req is still alive,
+            # so make sure we won't get spurious notifications later
+            uv_req_set_data(req, C_NULL)
+        else
+            # done with req
+            Libc.free(req)
+        end
+        iolock_end()
+        unpreserve_handle(ct)
+    end
+    if isopen(s)
+        if status < 0 || ccall(:uv_is_readable, Cint, (Ptr{Cvoid},), s.handle) == 0
+            close(s)
+        end
+    end
+    if status < 0
+        throw(_UVError("shutdown", status))
+    end
+    nothing
+end
+
 function wait_close(x::Union{LibuvStream, LibuvServer})
     preserve_handle(x)
     lock(x.cond)
@@ -451,7 +500,7 @@ function close(stream::Union{LibuvStream, LibuvServer})
     if stream.status == StatusInit
         ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
         stream.status = StatusClosing
-    elseif isopen(stream) || stream.status == StatusEOF
+    elseif isopen(stream)
         should_wait = uv_handle_data(stream) != C_NULL
         if stream.status != StatusClosing
             ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
@@ -503,7 +552,7 @@ julia> withenv("LINES" => 30, "COLUMNS" => 100) do
 
 To get your TTY size,
 
-```julia
+```julia-repl
 julia> displaysize(stdout)
 (34, 147)
 ```
@@ -606,35 +655,33 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
     nrequested = ccall(:jl_uv_buf_len, Csize_t, (Ptr{Cvoid},), buf)
     function readcb_specialized(stream::LibuvStream, nread::Int, nrequested::UInt)
         lock(stream.cond)
-        try
-            if nread < 0
-                if nread == UV_ENOBUFS && nrequested == 0
-                    # remind the client that stream.buffer is full
-                    notify(stream.cond)
-                elseif nread == UV_EOF
-                    if isa(stream, TTY)
-                        stream.status = StatusEOF # libuv called uv_stop_reading already
+        if nread < 0
+            if nread == UV_ENOBUFS && nrequested == 0
+                # remind the client that stream.buffer is full
+                notify(stream.cond)
+            elseif nread == UV_EOF # libuv called uv_stop_reading already
+                if stream.status != StatusClosing
+                    stream.status = StatusEOF
+                    if stream isa TTY # TODO: || ccall(:uv_is_writable, Cint, (Ptr{Cvoid},), stream.handle) != 0
+                        # stream can still be used either by reseteof # TODO: or write
                         notify(stream.cond)
-                    elseif stream.status != StatusClosing
-                        # begin shutdown of the stream
+                    else
+                        # underlying stream is no longer useful: begin finalization
                         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
                         stream.status = StatusClosing
                     end
-                else
-                    stream.readerror = _UVError("read", nread)
-                    # This is a fatal connection error. Shutdown requests as per the usual
-                    # close function won't work and libuv will fail with an assertion failure
-                    ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), stream)
-                    stream.status = StatusClosing
-                    notify(stream.cond)
                 end
             else
-                notify_filled(stream.buffer, nread)
-                notify(stream.cond)
+                stream.readerror = _UVError("read", nread)
+                # This is a fatal connection error
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
+                stream.status = StatusClosing
             end
-        finally
-            unlock(stream.cond)
+        else
+            notify_filled(stream.buffer, nread)
+            notify(stream.cond)
         end
+        unlock(stream.cond)
 
         # Stop background reading when
         # 1) there's nobody paying attention to the data we are reading
@@ -651,6 +698,7 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
         nothing
     end
     readcb_specialized(stream_unknown_type, Int(nread), UInt(nrequested))
+    nothing
 end
 
 function reseteof(x::TTY)
@@ -844,6 +892,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
         while bytesavailable(buf) < nb
             s.readerror === nothing || throw(s.readerror)
             isopen(s) || break
+            s.status != StatusEOF || break
             iolock_end()
             wait_readnb(s, nb)
             iolock_begin()
@@ -890,6 +939,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
         while bytesavailable(buf) < nb
             s.readerror === nothing || throw(s.readerror)
             isopen(s) || throw(EOFError())
+            s.status != StatusEOF || throw(EOFError())
             iolock_end()
             wait_readnb(s, nb)
             iolock_begin()
@@ -946,13 +996,14 @@ function readuntil(x::LibuvStream, c::UInt8; keep::Bool=false)
     @assert buf.seekable == false
     if !occursin(c, buf) # fast path checks first
         x.readerror === nothing || throw(x.readerror)
-        if isopen(x)
+        if isopen(x) && x.status != StatusEOF
             preserve_handle(x)
             lock(x.cond)
             try
                 while !occursin(c, x.buffer)
                     x.readerror === nothing || throw(x.readerror)
                     isopen(x) || break
+                    x.status != StatusEOF || break
                     start_reading(x) # ensure we are reading
                     iolock_end()
                     wait(x.cond)
@@ -1115,6 +1166,20 @@ function uv_writecb_task(req::Ptr{Cvoid}, status::Cint)
     nothing
 end
 
+function uv_shutdowncb_task(req::Ptr{Cvoid}, status::Cint)
+    d = uv_req_data(req)
+    if d != C_NULL
+        uv_req_set_data(req, C_NULL) # let the Task know we got the shutdowncb
+        t = unsafe_pointer_to_objref(d)::Task
+        schedule(t, status)
+    else
+        # no owner for this req, safe to just free it
+        Libc.free(req)
+    end
+    nothing
+end
+
+
 _fd(x::IOStream) = RawFD(fd(x))
 _fd(x::Union{OS_HANDLE, RawFD}) = x
 
@@ -1263,7 +1328,7 @@ Possible values for each stream are:
 * `io` an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
 
 # Examples
-```julia
+```julia-repl
 julia> redirect_stdio(stdout="stdout.txt", stderr="stderr.txt") do
            print("hello stdout")
            print(stderr, "hello stderr")
@@ -1279,14 +1344,14 @@ julia> read("stderr.txt", String)
 # Edge cases
 
 It is possible to pass the same argument to `stdout` and `stderr`:
-```julia
+```julia-repl
 julia> redirect_stdio(stdout="log.txt", stderr="log.txt", stdin=devnull) do
     ...
 end
 ```
 
 However it is not supported to pass two distinct descriptors of the same file.
-```julia
+```julia-repl
 julia> io1 = open("same/path", "w")
 
 julia> io2 = open("same/path", "w")
@@ -1294,7 +1359,7 @@ julia> io2 = open("same/path", "w")
 julia> redirect_stdio(f, stdout=io1, stderr=io2) # not suppored
 ```
 Also the `stdin` argument may not be the same descriptor as `stdout` or `stderr`.
-```julia
+```julia-repl
 julia> io = open(...)
 
 julia> redirect_stdio(f, stdout=io, stdin=io) # not supported
@@ -1405,23 +1470,26 @@ mutable struct BufferStream <: LibuvStream
     buffer::IOBuffer
     cond::Threads.Condition
     readerror::Any
-    is_open::Bool
     buffer_writes::Bool
     lock::ReentrantLock # advisory lock
+    status::Int
 
-    BufferStream() = new(PipeBuffer(), Threads.Condition(), nothing, true, false, ReentrantLock())
+    BufferStream() = new(PipeBuffer(), Threads.Condition(), nothing, false, ReentrantLock(), StatusActive)
 end
 
-isopen(s::BufferStream) = s.is_open
+isopen(s::BufferStream) = s.status != StatusClosed
+
+closewrite(s::BufferStream) = close(s)
 
 function close(s::BufferStream)
     lock(s.cond) do
-        s.is_open = false
+        s.status = StatusClosed
         notify(s.cond)
         nothing
     end
 end
 uvfinalize(s::BufferStream) = nothing
+setup_stdio(stream::BufferStream, child_readable::Bool) = invoke(setup_stdio, Tuple{IO, Bool}, stream, child_readable)
 
 function read(s::BufferStream, ::Type{UInt8})
     nread = lock(s.cond) do
@@ -1439,8 +1507,8 @@ function unsafe_read(s::BufferStream, a::Ptr{UInt8}, nb::UInt)
 end
 bytesavailable(s::BufferStream) = bytesavailable(s.buffer)
 
-isreadable(s::BufferStream) = s.buffer.readable
-iswritable(s::BufferStream) = s.buffer.writable
+isreadable(s::BufferStream) = (isopen(s) || bytesavailable(s) > 0) && s.buffer.readable
+iswritable(s::BufferStream) = isopen(s) && s.buffer.writable
 
 function wait_readnb(s::BufferStream, nb::Int)
     lock(s.cond) do
@@ -1450,7 +1518,7 @@ function wait_readnb(s::BufferStream, nb::Int)
     end
 end
 
-show(io::IO, s::BufferStream) = print(io, "BufferStream() bytes waiting:", bytesavailable(s.buffer), ", isopen:", s.is_open)
+show(io::IO, s::BufferStream) = print(io, "BufferStream(bytes waiting=", bytesavailable(s.buffer), ", isopen=", isopen(s), ")")
 
 function readuntil(s::BufferStream, c::UInt8; keep::Bool=false)
     bytes = lock(s.cond) do
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 52c8f6591874f..515b836311698 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -596,6 +596,15 @@ true
 julia> isascii("αβγ")
 false
 ```
+For example, `isascii` can be used as a predicate function for [`filter`](@ref) or [`replace`](@ref)
+to remove or replace non-ASCII characters, respectively:
+```jldoctest
+julia> filter(isascii, "abcdeγfgh") # discard non-ASCII chars
+"abcdefgh"
+
+julia> replace("abcdeγfgh", !isascii=>' ') # replace non-ASCII chars with spaces
+"abcde fgh"
+```
 """
 isascii(c::Char) = bswap(reinterpret(UInt32, c)) < 0x80
 isascii(s::AbstractString) = all(isascii, s)
diff --git a/base/strings/io.jl b/base/strings/io.jl
index d54ec3aa43ccb..fffe7904ebf92 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -15,7 +15,7 @@ avoid Julia-specific details.
 For example, `show` displays strings with quotes, and `print` displays strings
 without quotes.
 
-[`string`](@ref) returns the output of `print` as a string.
+See also [`println`](@ref), [`string`](@ref), [`printstyled`](@ref).
 
 # Examples
 ```jldoctest
@@ -54,8 +54,10 @@ end
 """
     println([io::IO], xs...)
 
-Print (using [`print`](@ref)) `xs` followed by a newline.
-If `io` is not supplied, prints to [`stdout`](@ref).
+Print (using [`print`](@ref)) `xs` to `io` followed by a newline.
+If `io` is not supplied, prints to the default output stream [`stdout`](@ref).
+
+See also [`printstyled`](@ref) to add colors etc.
 
 # Examples
 ```jldoctest
diff --git a/base/strings/search.jl b/base/strings/search.jl
index 0a08ef7f60c90..938ed8d527d99 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -641,4 +641,4 @@ The returned function is of type `Base.Fix2{typeof(occursin)}`.
 """
 occursin(haystack) = Base.Fix2(occursin, haystack)
 
-in(::AbstractString, ::AbstractString) = error("use occursin(x, y) for string containment")
+in(::AbstractString, ::AbstractString) = error("use occursin(needle, haystack) for string containment")
diff --git a/base/strings/string.jl b/base/strings/string.jl
index e3a9cd56df201..c818e2e1844fb 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -252,7 +252,7 @@ function getindex_continued(s::String, i::Int, u::UInt32)
     return reinterpret(Char, u)
 end
 
-getindex(s::String, r::UnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
+getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
 
 @inline function getindex(s::String, r::UnitRange{Int})
     isempty(r) && return ""
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index 3e99cc7477446..7cc4c53a6b661 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -205,7 +205,13 @@ end
     return n
 end
 
-function string(a::Union{Char, String, SubString{String}}...)
+@inline function __unsafe_string!(out, s::Symbol, offs::Integer)
+    n = sizeof(s)
+    GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
+    return n
+end
+
+function string(a::Union{Char, String, SubString{String}, Symbol}...)
     n = 0
     for v in a
         if v isa Char
@@ -252,4 +258,4 @@ function filter(f, s::Union{String, SubString{String}})
     return String(out)
 end
 
-getindex(s::AbstractString, r::UnitRange{<:Integer}) = SubString(s, r)
+getindex(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index cf215849ab08c..e687d94365c4a 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -145,20 +145,43 @@ const UTF8PROC_STRIPMARK = (1<<13)
 
 utf8proc_error(result) = error(unsafe_string(ccall(:utf8proc_errmsg, Cstring, (Cssize_t,), result)))
 
-function utf8proc_map(str::Union{String,SubString{String}}, options::Integer)
-    nwords = ccall(:utf8proc_decompose, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint),
-                   str, sizeof(str), C_NULL, 0, options)
-    nwords < 0 && utf8proc_error(nwords)
+# static wrapper around user callback function
+utf8proc_custom_func(codepoint::UInt32, callback::Any) =
+    UInt32(callback(codepoint))::UInt32
+
+function utf8proc_decompose(str, options, buffer, nwords, chartransform::typeof(identity))
+    ret = ccall(:utf8proc_decompose, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint),
+                str, sizeof(str), buffer, nwords, options)
+    ret < 0 && utf8proc_error(ret)
+    return ret
+end
+function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T
+    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}),
+                str, sizeof(str), buffer, nwords, options,
+                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform)
+    ret < 0 && utf8proc_error(ret)
+    return ret
+end
+
+function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity)
+    nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
     buffer = Base.StringVector(nwords*4)
-    nwords = ccall(:utf8proc_decompose, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint),
-                   str, sizeof(str), buffer, nwords, options)
-    nwords < 0 && utf8proc_error(nwords)
+    nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
     nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
     nbytes < 0 && utf8proc_error(nbytes)
     return String(resize!(buffer, nbytes))
 end
 
-utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(String(s), flags)
+# from julia_charmap.h, used by julia_chartransform in the Unicode stdlib
+const _julia_charmap = Dict{UInt32,UInt32}(
+    0x025B => 0x03B5,
+    0x00B5 => 0x03BC,
+    0x00B7 => 0x22C5,
+    0x0387 => 0x22C5,
+    0x2212 => 0x002D,
+)
+
+utf8proc_map(s::AbstractString, flags::Integer, chartransform=identity) = utf8proc_map(String(s), flags, chartransform)
 
 # Documented in Unicode module
 function normalize(
@@ -176,6 +199,7 @@ function normalize(
     casefold::Bool=false,
     lump::Bool=false,
     stripmark::Bool=false,
+    chartransform=identity,
 )
     flags = 0
     stable && (flags = flags | UTF8PROC_STABLE)
@@ -198,7 +222,7 @@ function normalize(
     casefold && (flags = flags | UTF8PROC_CASEFOLD)
     lump && (flags = flags | UTF8PROC_LUMP)
     stripmark && (flags = flags | UTF8PROC_STRIPMARK)
-    utf8proc_map(s, flags)
+    utf8proc_map(s, flags, chartransform)
 end
 
 function normalize(s::AbstractString, nf::Symbol)
@@ -681,7 +705,7 @@ function iterate(g::GraphemeIterator, i_=(Int32(0),firstindex(g.s)))
     y === nothing && return nothing
     c0, k = y
     while k <= ncodeunits(s) # loop until next grapheme is s[i:j]
-        c, ℓ = iterate(s, k)
+        c, ℓ = iterate(s, k)::NTuple{2,Any}
         isgraphemebreak!(state, c0, c) && break
         j = k
         k = ℓ
diff --git a/base/strings/util.jl b/base/strings/util.jl
index c6dad5f34bafb..d3e83837653aa 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -19,8 +19,13 @@ true
 ```
 """
 function startswith(a::AbstractString, b::AbstractString)
-    a, b = Iterators.Stateful(a), Iterators.Stateful(b)
-    all(splat(==), zip(a, b)) && isempty(b)
+    i, j = iterate(a), iterate(b)
+    while true
+        j === nothing && return true # ran out of prefix: success!
+        i === nothing && return false # ran out of source: failure
+        i[1] == j[1] || return false # mismatch: failure
+        i, j = iterate(a, i[2]), iterate(b, j[2])
+    end
 end
 startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::AbstractChar in chars
 
@@ -39,9 +44,14 @@ true
 ```
 """
 function endswith(a::AbstractString, b::AbstractString)
-    a = Iterators.Stateful(Iterators.reverse(a))
-    b = Iterators.Stateful(Iterators.reverse(b))
-    all(splat(==), zip(a, b)) && isempty(b)
+    a, b = Iterators.Reverse(a), Iterators.Reverse(b)
+    i, j = iterate(a), iterate(b)
+    while true
+        j === nothing && return true # ran out of suffix: success!
+        i === nothing && return false # ran out of source: failure
+        i[1] == j[1] || return false # mismatch: failure
+        i, j = iterate(a, i[2]), iterate(b, j[2])
+    end
 end
 endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars
 
@@ -51,7 +61,7 @@ function startswith(a::Union{String, SubString{String}},
     if ncodeunits(a) < cub
         false
     elseif _memcmp(a, b, sizeof(b)) == 0
-        nextind(a, cub) == cub + 1
+        nextind(a, cub) == cub + 1 # check that end of `b` doesn't match a partial character in `a`
     else
         false
     end
@@ -64,7 +74,7 @@ function endswith(a::Union{String, SubString{String}},
     if astart < 1
         false
     elseif GC.@preserve(a, _memcmp(pointer(a, astart), b, sizeof(b))) == 0
-        thisind(a, astart) == astart
+        thisind(a, astart) == astart # check that end of `b` doesn't match a partial character in `a`
     else
         false
     end
@@ -195,6 +205,91 @@ end
 # TODO: optimization for the default case based on
 # chop(s::AbstractString) = SubString(s, firstindex(s), prevind(s, lastindex(s)))
 
+"""
+    chopprefix(s::AbstractString, prefix::Union{AbstractString,Regex}) -> SubString
+
+Remove the prefix `prefix` from `s`. If `s` does not start with `prefix`, a string equal to `s` is returned.
+
+See also [`chopsuffix`](@ref).
+
+!!! compat "Julia 1.8"
+    This function is available as of Julia 1.8.
+
+# Examples
+```jldoctest
+julia> chopprefix("Hamburger", "Ham")
+"burger"
+
+julia> chopprefix("Hamburger", "hotdog")
+"Hamburger"
+```
+"""
+function chopprefix(s::AbstractString, prefix::AbstractString)
+    k = firstindex(s)
+    i, j = iterate(s), iterate(prefix)
+    while true
+        j === nothing && i === nothing && return SubString(s, 1, 0) # s == prefix: empty result
+        j === nothing && return @inbounds SubString(s, k) # ran out of prefix: success!
+        i === nothing && return SubString(s) # ran out of source: failure
+        i[1] == j[1] || return SubString(s) # mismatch: failure
+        k = i[2]
+        i, j = iterate(s, k), iterate(prefix, j[2])
+    end
+end
+
+function chopprefix(s::Union{String, SubString{String}},
+                    prefix::Union{String, SubString{String}})
+    if startswith(s, prefix)
+        SubString(s, 1 + ncodeunits(prefix))
+    else
+        SubString(s)
+    end
+end
+
+"""
+    chopsuffix(s::AbstractString, suffix::Union{AbstractString,Regex}) -> SubString
+
+Remove the suffix `suffix` from `s`. If `s` does not end with `suffix`, a string equal to `s` is returned.
+
+See also [`chopprefix`](@ref).
+
+!!! compat "Julia 1.8"
+    This function is available as of Julia 1.8.
+
+# Examples
+```jldoctest
+julia> chopsuffix("Hamburger", "er")
+"Hamburg"
+
+julia> chopsuffix("Hamburger", "hotdog")
+"Hamburger"
+```
+"""
+function chopsuffix(s::AbstractString, suffix::AbstractString)
+    a, b = Iterators.Reverse(s), Iterators.Reverse(suffix)
+    k = lastindex(s)
+    i, j = iterate(a), iterate(b)
+    while true
+        j === nothing && i === nothing && return SubString(s, 1, 0) # s == suffix: empty result
+        j === nothing && return @inbounds SubString(s, firstindex(s), k) # ran out of suffix: success!
+        i === nothing && return SubString(s) # ran out of source: failure
+        i[1] == j[1] || return SubString(s) # mismatch: failure
+        k = i[2]
+        i, j = iterate(a, k), iterate(b, j[2])
+    end
+end
+
+function chopsuffix(s::Union{String, SubString{String}},
+                    suffix::Union{String, SubString{String}})
+    if !isempty(suffix) && endswith(s, suffix)
+        astart = ncodeunits(s) - ncodeunits(suffix) + 1
+        @inbounds SubString(s, firstindex(s), prevind(s, astart))
+    else
+        SubString(s)
+    end
+end
+
+
 """
     chomp(s::AbstractString) -> SubString
 
@@ -383,6 +478,86 @@ function rpad(
     r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))
 end
 
+"""
+    eachsplit(str::AbstractString, dlm; limit::Integer=0)
+    eachsplit(str::AbstractString; limit::Integer=0)
+
+Split `str` on occurrences of the delimiter(s) `dlm` and return an iterator over the
+substrings.  `dlm` can be any of the formats allowed by [`findnext`](@ref)'s first argument
+(i.e. as a string, regular expression or a function), or as a single character or collection
+of characters.
+
+If `dlm` is omitted, it defaults to [`isspace`](@ref).
+
+The iterator will return a maximum of `limit` results if the keyword argument is supplied.
+The default of `limit=0` implies no maximum.
+
+See also [`split`](@ref).
+
+!!! compat "Julia 1.8"
+    The `eachsplit` function requires at least Julia 1.8.
+
+# Examples
+```jldoctest
+julia> a = "Ma.rch"
+"Ma.rch"
+
+julia> collect(eachsplit(a, "."))
+2-element Vector{SubString}:
+ "Ma"
+ "rch"
+```
+"""
+function eachsplit end
+
+# Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
+# and prevents a major invalidation risk (1550 MethodInstances)
+struct SplitIterator{S<:AbstractString,F}
+    str::S
+    splitter::F
+    limit::Int
+    keepempty::Bool
+end
+
+eltype(::Type{<:SplitIterator}) = SubString
+
+IteratorSize(::Type{<:SplitIterator}) = SizeUnknown()
+
+# i: the starting index of the substring to be extracted
+# k: the starting index of the next substring to be extracted
+# n: the number of splits returned so far; always less than iter.limit - 1 (1 for the rest)
+function iterate(iter::SplitIterator, (i, k, n)=(firstindex(iter.str), firstindex(iter.str), 0))
+    i - 1 > ncodeunits(iter.str)::Int && return nothing
+    r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
+    while r !== nothing && n != iter.limit - 1 && first(r) <= ncodeunits(iter.str)
+        j, k = first(r), nextind(iter.str, last(r))::Int
+        k_ = k <= j ? nextind(iter.str, j) : k
+        if i < k
+            substr = @inbounds SubString(iter.str, i, prevind(iter.str, j)::Int)
+            (iter.keepempty || i < j) && return (substr, (k, k_, n + 1))
+            i = k
+        end
+        k = k_
+        r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
+    end
+    iter.keepempty || i <= ncodeunits(iter.str) || return nothing
+    @inbounds SubString(iter.str, i), (ncodeunits(iter.str) + 2, k, n + 1)
+end
+
+eachsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
+    SplitIterator(str, splitter, limit, keepempty)
+
+eachsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
+          limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachsplit(str, in(splitter); limit, keepempty)
+
+eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachsplit(str, isequal(splitter); limit, keepempty)
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
+eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
+    eachsplit(str, isspace; limit, keepempty)
+
 """
     split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
     split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -412,52 +587,16 @@ julia> split(a, ".")
  "rch"
 ```
 """
-function split end
-
 function split(str::T, splitter;
                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _split(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function split(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
-               limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _split(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function split(str::T, splitter::AbstractChar;
-               limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _split(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-
-function _split(str::AbstractString, splitter::F, limit::Integer, keepempty::Bool, strs::Vector) where F
-    # Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
-    # and prevents a major invalidation risk (1550 MethodInstances)
-    i = 1 # firstindex(str)
-    n = lastindex(str)::Int
-    r = findfirst(splitter,str)::Union{Nothing,Int,UnitRange{Int}}
-    if r !== nothing
-        j, k = first(r), nextind(str,last(r))::Int
-        while 0 < j <= n && length(strs) != limit-1
-            if i < k
-                if keepempty || i < j
-                    push!(strs, @inbounds SubString(str,i,prevind(str,j)::Int))
-                end
-                i = k
-            end
-            (k <= j) && (k = nextind(str,j)::Int)
-            r = findnext(splitter,str,k)::Union{Nothing,Int,UnitRange{Int}}
-            r === nothing && break
-            j, k = first(r), nextind(str,last(r))::Int
-        end
-    end
-    if keepempty || i <= ncodeunits(str)::Int
-        push!(strs, @inbounds SubString(str,i))
-    end
-    return strs
+    itr = eachsplit(str, splitter; limit, keepempty)
+    collect(T <: SubString ? T : SubString{T}, itr)
 end
 
 # a bit oddball, but standard behavior in Perl, Ruby & Python:
 split(str::AbstractString;
       limit::Integer=0, keepempty::Bool=false) =
-    split(str, isspace; limit=limit, keepempty=keepempty)
+    split(str, isspace; limit, keepempty)
 
 """
     rsplit(s::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -649,9 +788,9 @@ The length of `itr` must be even, and the returned array has half of the length
 See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse.
 
 !!! compat "Julia 1.7"
-    Calling hex2bytes with iterables producing UInt8 requires
-    version 1.7. In earlier versions, you can collect the iterable
-    before calling instead.
+    Calling `hex2bytes` with iterators producing `UInt8` values requires
+    Julia 1.7 or later. In earlier versions, you can `collect` the iterator
+    before calling `hex2bytes`.
 
 # Examples
 ```jldoctest
@@ -736,9 +875,9 @@ returning a `String` via `bytes2hex(itr)` or writing the string to an `io` strea
 via `bytes2hex(io, itr)`.  The hexadecimal characters are all lowercase.
 
 !!! compat "Julia 1.7"
-    Calling bytes2hex with iterators producing UInt8 requires
-    version 1.7. In earlier versions, you can collect the iterable
-    before calling instead.
+    Calling `bytes2hex` with arbitrary iterators producing `UInt8` values requires
+    Julia 1.7 or later. In earlier versions, you can `collect` the iterator
+    before calling `bytes2hex`.
 
 # Examples
 ```jldoctest
@@ -788,6 +927,8 @@ end
 Convert a string to `String` type and check that it contains only ASCII data, otherwise
 throwing an `ArgumentError` indicating the position of the first non-ASCII byte.
 
+See also the [`isascii`](@ref) predicate to filter or replace non-ASCII characters.
+
 # Examples
 ```jldoctest
 julia> ascii("abcdeγfgh")
diff --git a/base/subarray.jl b/base/subarray.jl
index 89a4db4d65790..ff2408bb48534 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -17,22 +17,22 @@ struct SubArray{T,N,P,I,L} <: AbstractArray{T,N}
     offset1::Int       # for linear indexing and pointer, only valid when L==true
     stride1::Int       # used only for linear indexing
     function SubArray{T,N,P,I,L}(parent, indices, offset1, stride1) where {T,N,P,I,L}
-        @_inline_meta
+        @inline
         check_parent_index_match(parent, indices)
         new(parent, indices, offset1, stride1)
     end
 end
 # Compute the linear indexability of the indices, and combine it with the linear indexing of the parent
 function SubArray(parent::AbstractArray, indices::Tuple)
-    @_inline_meta
+    @inline
     SubArray(IndexStyle(viewindexing(indices), IndexStyle(parent)), parent, ensure_indexable(indices), index_dimsum(indices...))
 end
 function SubArray(::IndexCartesian, parent::P, indices::I, ::NTuple{N,Any}) where {P,I,N}
-    @_inline_meta
+    @inline
     SubArray{eltype(P), N, P, I, false}(parent, indices, 0, 0)
 end
 function SubArray(::IndexLinear, parent::P, indices::I, ::NTuple{N,Any}) where {P,I,N}
-    @_inline_meta
+    @inline
     # Compute the stride and offset
     stride1 = compute_stride1(parent, indices)
     SubArray{eltype(P), N, P, I, true}(parent, indices, compute_offset1(parent, stride1, indices), stride1)
@@ -46,9 +46,9 @@ check_parent_index_match(parent, ::NTuple{N, Bool}) where {N} =
 # This computes the linear indexing compatibility for a given tuple of indices
 viewindexing(I::Tuple{}) = IndexLinear()
 # Leading scalar indices simply increase the stride
-viewindexing(I::Tuple{ScalarIndex, Vararg{Any}}) = (@_inline_meta; viewindexing(tail(I)))
+viewindexing(I::Tuple{ScalarIndex, Vararg{Any}}) = (@inline; viewindexing(tail(I)))
 # Slices may begin a section which may be followed by any number of Slices
-viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@_inline_meta; viewindexing(tail(I)))
+viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@inline; viewindexing(tail(I)))
 # A UnitRange can follow Slices, but only if all other indices are scalar
 viewindexing(I::Tuple{Slice, AbstractUnitRange, Vararg{ScalarIndex}}) = IndexLinear()
 viewindexing(I::Tuple{Slice, Slice, Vararg{ScalarIndex}}) = IndexLinear() # disambiguate
@@ -60,7 +60,7 @@ viewindexing(I::Tuple{Vararg{Any}}) = IndexCartesian()
 viewindexing(I::Tuple{AbstractArray, Vararg{Any}}) = IndexCartesian()
 
 # Simple utilities
-size(V::SubArray) = (@_inline_meta; map(length, axes(V)))
+size(V::SubArray) = (@inline; map(length, axes(V)))
 
 similar(V::SubArray, T::Type, dims::Dims) = similar(V.parent, T, dims)
 
@@ -172,7 +172,7 @@ julia> view(2:5, 2:3) # returns a range as type is immutable
 ```
 """
 function view(A::AbstractArray, I::Vararg{Any,N}) where {N}
-    @_inline_meta
+    @inline
     J = map(i->unalias(A,i), to_indices(A, I))
     @boundscheck checkbounds(A, J...)
     unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...)
@@ -204,8 +204,14 @@ function view(r1::LinRange, r2::OrdinalRange{<:Integer})
     getindex(r1, r2)
 end
 
+# getindex(r::AbstractRange, ::Colon) returns a copy of the range, and we may do the same for a view
+function view(r1::AbstractRange, c::Colon)
+    @_propagate_inbounds_meta
+    getindex(r1, c)
+end
+
 function unsafe_view(A::AbstractArray, I::Vararg{ViewIndex,N}) where {N}
-    @_inline_meta
+    @inline
     SubArray(A, I)
 end
 # When we take the view of a view, it's often possible to "reindex" the parent
@@ -215,16 +221,16 @@ end
 # So we use _maybe_reindex to figure out if there are any arrays of
 # `CartesianIndex`, and if so, we punt and keep two layers of indirection.
 unsafe_view(V::SubArray, I::Vararg{ViewIndex,N}) where {N} =
-    (@_inline_meta; _maybe_reindex(V, I))
-_maybe_reindex(V, I) = (@_inline_meta; _maybe_reindex(V, I, I))
+    (@inline; _maybe_reindex(V, I))
+_maybe_reindex(V, I) = (@inline; _maybe_reindex(V, I, I))
 _maybe_reindex(V, I, ::Tuple{AbstractArray{<:AbstractCartesianIndex}, Vararg{Any}}) =
-    (@_inline_meta; SubArray(V, I))
+    (@inline; SubArray(V, I))
 # But allow arrays of CartesianIndex{1}; they behave just like arrays of Ints
 _maybe_reindex(V, I, A::Tuple{AbstractArray{<:AbstractCartesianIndex{1}}, Vararg{Any}}) =
-    (@_inline_meta; _maybe_reindex(V, I, tail(A)))
-_maybe_reindex(V, I, A::Tuple{Any, Vararg{Any}}) = (@_inline_meta; _maybe_reindex(V, I, tail(A)))
+    (@inline; _maybe_reindex(V, I, tail(A)))
+_maybe_reindex(V, I, A::Tuple{Any, Vararg{Any}}) = (@inline; _maybe_reindex(V, I, tail(A)))
 function _maybe_reindex(V, I, ::Tuple{})
-    @_inline_meta
+    @inline
     @inbounds idxs = to_indices(V.parent, reindex(V.indices, I))
     SubArray(V.parent, idxs)
 end
@@ -271,7 +277,7 @@ end
 # In general, we simply re-index the parent indices by the provided ones
 SlowSubArray{T,N,P,I} = SubArray{T,N,P,I,false}
 function getindex(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, I...)
     @inbounds r = V.parent[reindex(V.indices, I)...]
     r
@@ -280,7 +286,7 @@ end
 # But SubArrays with fast linear indexing pre-compute a stride and offset
 FastSubArray{T,N,P,I} = SubArray{T,N,P,I,true}
 function getindex(V::FastSubArray, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + V.stride1*i]
     r
@@ -290,7 +296,7 @@ end
 FastContiguousSubArray{T,N,P,I<:Union{Tuple{Union{Slice, AbstractUnitRange}, Vararg{Any}},
                                       Tuple{Vararg{ScalarIndex}}}} = SubArray{T,N,P,I,true}
 function getindex(V::FastContiguousSubArray, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + i]
     r
@@ -298,13 +304,13 @@ end
 # For vector views with linear indexing, we disambiguate to favor the stride/offset
 # computation as that'll generally be faster than (or just as fast as) re-indexing into a range.
 function getindex(V::FastSubArray{<:Any, 1}, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + V.stride1*i]
     r
 end
 function getindex(V::FastContiguousSubArray{<:Any, 1}, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + i]
     r
@@ -312,31 +318,31 @@ end
 
 # Indexed assignment follows the same pattern as `getindex` above
 function setindex!(V::SubArray{T,N}, x, I::Vararg{Int,N}) where {T,N}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, I...)
     @inbounds V.parent[reindex(V.indices, I)...] = x
     V
 end
 function setindex!(V::FastSubArray, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + V.stride1*i] = x
     V
 end
 function setindex!(V::FastContiguousSubArray, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + i] = x
     V
 end
 function setindex!(V::FastSubArray{<:Any, 1}, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + V.stride1*i] = x
     V
 end
 function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + i] = x
     V
@@ -358,11 +364,11 @@ substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("strides is
 stride(V::SubArray, d::Integer) = d <= ndims(V) ? strides(V)[d] : strides(V)[end] * size(V)[end]
 
 compute_stride1(parent::AbstractArray, I::NTuple{N,Any}) where {N} =
-    (@_inline_meta; compute_stride1(1, fill_to_length(axes(parent), OneTo(1), Val(N)), I))
+    (@inline; compute_stride1(1, fill_to_length(axes(parent), OneTo(1), Val(N)), I))
 compute_stride1(s, inds, I::Tuple{}) = s
 compute_stride1(s, inds, I::Tuple{Vararg{ScalarIndex}}) = s
 compute_stride1(s, inds, I::Tuple{ScalarIndex, Vararg{Any}}) =
-    (@_inline_meta; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
+    (@inline; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
 compute_stride1(s, inds, I::Tuple{AbstractRange, Vararg{Any}}) = s*step(I[1])
 compute_stride1(s, inds, I::Tuple{Slice, Vararg{Any}}) = s
 compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("invalid strided index type $(typeof(I[1]))"))
@@ -385,42 +391,42 @@ end
 # The running sum is `f`; the cumulative stride product is `s`.
 # If the parent is a vector, then we offset the parent's own indices with parameters of I
 compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{AbstractRange}) =
-    (@_inline_meta; first(I[1]) - stride1*first(axes1(I[1])))
+    (@inline; first(I[1]) - stride1*first(axes1(I[1])))
 # If the result is one-dimensional and it's a Colon, then linear
 # indexing uses the indices along the given dimension.
 # If the result is one-dimensional and it's a range, then linear
 # indexing might be offset if the index itself is offset
 # Otherwise linear indexing always matches the parent.
 compute_offset1(parent, stride1::Integer, I::Tuple) =
-    (@_inline_meta; compute_offset1(parent, stride1, find_extended_dims(1, I...), find_extended_inds(I...), I))
+    (@inline; compute_offset1(parent, stride1, find_extended_dims(1, I...), find_extended_inds(I...), I))
 compute_offset1(parent, stride1::Integer, dims::Tuple{Int}, inds::Tuple{Slice}, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1*first(axes(parent, dims[1])))  # index-preserving case
+    (@inline; compute_linindex(parent, I) - stride1*first(axes(parent, dims[1])))  # index-preserving case
 compute_offset1(parent, stride1::Integer, dims, inds::Tuple{AbstractRange}, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1*first(axes1(inds[1]))) # potentially index-offsetting case
+    (@inline; compute_linindex(parent, I) - stride1*first(axes1(inds[1]))) # potentially index-offsetting case
 compute_offset1(parent, stride1::Integer, dims, inds, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1)
+    (@inline; compute_linindex(parent, I) - stride1)
 function compute_linindex(parent, I::NTuple{N,Any}) where N
-    @_inline_meta
+    @inline
     IP = fill_to_length(axes(parent), OneTo(1), Val(N))
     compute_linindex(first(LinearIndices(parent)), 1, IP, I)
 end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{ScalarIndex, Vararg{Any}})
-    @_inline_meta
+    @inline
     Δi = I[1]-first(IP[1])
     compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
-    @_inline_meta
+    @inline
     Δi = first(I[1])-first(IP[1])
     compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 compute_linindex(f, s, IP::Tuple, I::Tuple{}) = f
 
-find_extended_dims(dim, ::ScalarIndex, I...) = (@_inline_meta; find_extended_dims(dim + 1, I...))
-find_extended_dims(dim, i1, I...) = (@_inline_meta; (dim, find_extended_dims(dim + 1, I...)...))
+find_extended_dims(dim, ::ScalarIndex, I...) = (@inline; find_extended_dims(dim + 1, I...))
+find_extended_dims(dim, i1, I...) = (@inline; (dim, find_extended_dims(dim + 1, I...)...))
 find_extended_dims(dim) = ()
-find_extended_inds(::ScalarIndex, I...) = (@_inline_meta; find_extended_inds(I...))
-find_extended_inds(i1, I...) = (@_inline_meta; (i1, find_extended_inds(I...)...))
+find_extended_inds(::ScalarIndex, I...) = (@inline; find_extended_inds(I...))
+find_extended_inds(i1, I...) = (@inline; (i1, find_extended_inds(I...)...))
 find_extended_inds() = ()
 
 function unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{RangeIndex}}}) where {T,N,P}
@@ -442,10 +448,10 @@ end
 # indices are taken from the range/vector
 # Since bounds-checking is performance-critical and uses
 # indices, it's worth optimizing these implementations thoroughly
-axes(S::SubArray) = (@_inline_meta; _indices_sub(S.indices...))
-_indices_sub(::Real, I...) = (@_inline_meta; _indices_sub(I...))
+axes(S::SubArray) = (@inline; _indices_sub(S.indices...))
+_indices_sub(::Real, I...) = (@inline; _indices_sub(I...))
 _indices_sub() = ()
 function _indices_sub(i1::AbstractArray, I...)
-    @_inline_meta
+    @inline
     (axes(i1)..., _indices_sub(I...)...)
 end
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 1aa8a7803e062..4baa0e0c941b1 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -140,7 +140,7 @@ function (ss::SummarySize)(obj::Array)
             dsize += length(obj)
         end
         size += dsize
-        if !isempty(obj) && (!Base.allocatedinline(T) || (T isa DataType && !Base.datatype_pointerfree(T)))
+        if !isempty(obj) && T !== Symbol && (!Base.allocatedinline(T) || (T isa DataType && !Base.datatype_pointerfree(T)))
             push!(ss.frontier_x, obj)
             push!(ss.frontier_i, 1)
         end
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index cdcb304271b5d..6df8cdc56d20a 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -499,7 +499,7 @@ function which(program_name::String)
         # If we have been given just a program name (not a relative or absolute
         # path) then we should search `PATH` for it here:
         pathsep = iswindows() ? ';' : ':'
-        path_dirs = abspath.(split(get(ENV, "PATH", ""), pathsep))
+        path_dirs = map(abspath, eachsplit(get(ENV, "PATH", ""), pathsep))
 
         # On windows we always check the current directory as well
         if iswindows()
diff --git a/base/task.jl b/base/task.jl
index 0d4e5da4ccfd4..6f3deaaa0ec18 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -40,6 +40,7 @@ struct CompositeException <: Exception
 end
 length(c::CompositeException) = length(c.exceptions)
 push!(c::CompositeException, ex) = push!(c.exceptions, ex)
+pushfirst!(c::CompositeException, ex) = pushfirst!(c.exceptions, ex)
 isempty(c::CompositeException) = isempty(c.exceptions)
 iterate(c::CompositeException, state...) = iterate(c.exceptions, state...)
 eltype(::Type{CompositeException}) = Any
@@ -306,6 +307,18 @@ function _wait2(t::Task, waiter::Task)
         if !istaskdone(t)
             push!(t.donenotify.waitq, waiter)
             unlock(t.donenotify)
+            # since _wait2 is similar to schedule, we should observe the sticky
+            # bit, even if we aren't calling `schedule` due to this early-return
+            if waiter.sticky && Threads.threadid(waiter) == 0
+                # Issue #41324
+                # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+                # the parent task. If the parent (current_task) is not sticky we must
+                # set it to be sticky.
+                # XXX: Ideally we would be able to unset this
+                current_task().sticky = true
+                tid = Threads.threadid()
+                ccall(:jl_set_task_tid, Cvoid, (Any, Cint), waiter, tid-1)
+            end
             return nothing
         else
             unlock(t.donenotify)
@@ -341,6 +354,29 @@ end
 
 ## lexically-scoped waiting for multiple items
 
+struct ScheduledAfterSyncException <: Exception
+    values::Vector{Any}
+end
+
+function showerror(io::IO, ex::ScheduledAfterSyncException)
+    print(io, "ScheduledAfterSyncException: ")
+    if isempty(ex.values)
+        print(io, "(no values)")
+        return
+    end
+    show(io, ex.values[1])
+    if length(ex.values) == 1
+        print(io, " is")
+    elseif length(ex.values) == 2
+        print(io, " and one more ")
+        print(io, nameof(typeof(ex.values[2])))
+        print(io, " are")
+    else
+        print(io, " and ", length(ex.values) - 1, " more objects are")
+    end
+    print(io, " registered after the end of a `@sync` block")
+end
+
 function sync_end(c::Channel{Any})
     local c_ex
     while isready(c)
@@ -365,6 +401,25 @@ function sync_end(c::Channel{Any})
         end
     end
     close(c)
+
+    # Capture all waitable objects scheduled after the end of `@sync` and
+    # include them in the exception. This way, the user can check what was
+    # scheduled by examining at the exception object.
+    local racy
+    for r in c
+        if !@isdefined(racy)
+            racy = []
+        end
+        push!(racy, r)
+    end
+    if @isdefined(racy)
+        if !@isdefined(c_ex)
+            c_ex = CompositeException()
+        end
+        # Since this is a clear programming error, show this exception first:
+        pushfirst!(c_ex, ScheduledAfterSyncException(racy))
+    end
+
     if @isdefined(c_ex)
         throw(c_ex)
     end
@@ -434,13 +489,13 @@ function errormonitor(t::Task)
             try # try to display the failure atomically
                 errio = IOContext(PipeBuffer(), errs::IO)
                 emphasize(errio, "Unhandled Task ")
-                display_error(errio, current_exceptions(t))
+                display_error(errio, scrub_repl_backtrace(current_exceptions(t)))
                 write(errs, errio)
             catch
                 try # try to display the secondary error atomically
                     errio = IOContext(PipeBuffer(), errs::IO)
                     print(errio, "\nSYSTEM: caught exception while trying to print a failed Task notice: ")
-                    display_error(errio, current_exceptions())
+                    display_error(errio, scrub_repl_backtrace(current_exceptions()))
                     write(errs, errio)
                     flush(errs)
                     # and then the actual error, as best we can
@@ -455,6 +510,7 @@ function errormonitor(t::Task)
         end
         nothing
     end
+    t2.sticky = false
     _wait2(t, t2)
     return t
 end
diff --git a/base/timing.jl b/base/timing.jl
index ab7af23048305..20d10f26c1ef8 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -55,7 +55,7 @@ function gc_alloc_count(diff::GC_Diff)
     diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
 end
 
-# cumulative total time spent on compilation
+# cumulative total time spent on compilation, in nanoseconds
 cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
 cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
 
@@ -115,10 +115,10 @@ function format_bytes(bytes) # also used by InteractiveUtils
     end
 end
 
-function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false)
+function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false, _lpad=true)
     timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
     str = sprint() do io
-        print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
         print(io, timestr, " seconds")
         parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
         parens && print(io, " (")
@@ -149,9 +149,9 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, ne
     nothing
 end
 
-function timev_print(elapsedtime, diff::GC_Diff, compile_time)
+function timev_print(elapsedtime, diff::GC_Diff, compile_time, _lpad)
     allocs = gc_alloc_count(diff)
-    time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true)
+    time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true, _lpad)
     print("elapsed time (ns): $elapsedtime\n")
     padded_nonzero_print(diff.total_time,   "gc time (ns)")
     padded_nonzero_print(diff.allocd,       "bytes allocated")
@@ -164,19 +164,32 @@ function timev_print(elapsedtime, diff::GC_Diff, compile_time)
     padded_nonzero_print(diff.full_sweep,   "full collections")
 end
 
+# Like a try-finally block, except without introducing the try scope
+# NOTE: This is deprecated and should not be used from user logic. A proper solution to
+# this problem will be introduced in https://github.com/JuliaLang/julia/pull/39217
+macro __tryfinally(ex, fin)
+    Expr(:tryfinally,
+       :($(esc(ex))),
+       :($(esc(fin)))
+       )
+end
+
 """
-    @time
+    @time expr
+    @time "description" expr
 
 A macro to execute an expression, printing the time it took to execute, the number of
 allocations, and the total number of bytes its execution caused to be allocated, before
 returning the value of the expression. Any time spent garbage collecting (gc) or
 compiling is shown as a percentage.
 
+Optionally provide a description string to print before the time report.
+
 In some cases the system will look inside the `@time` expression and compile some of the
 called code before execution of the top-level expression begins. When that happens, some
 compilation time will not be counted. To include this time you can run `@time @eval ...`.
 
-See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
+See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
 [`@allocated`](@ref).
 
 !!! note
@@ -184,6 +197,9 @@ See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
     package which among other things evaluates the function multiple times in order to
     reduce noise.
 
+!!! compat "Julia 1.8"
+    The option to add a description was introduced in Julia 1.8.
+
 ```julia-repl
 julia> x = rand(10,10);
 
@@ -199,30 +215,76 @@ julia> @time begin
        end
   0.301395 seconds (8 allocations: 336 bytes)
 2
+
+julia> @time "A one second sleep" sleep(1)
+A one second sleep: 1.005750 seconds (5 allocations: 144 bytes)
+
+julia> for loop in 1:3
+            @time loop sleep(1)
+        end
+1: 1.006760 seconds (5 allocations: 144 bytes)
+2: 1.001263 seconds (5 allocations: 144 bytes)
+3: 1.003676 seconds (5 allocations: 144 bytes)
 ```
 """
 macro time(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        @time nothing $(esc(ex))
+    end
+end
+macro time(msg, ex)
+    quote
+        Experimental.@force_compile
         local stats = gc_num()
         local elapsedtime = time_ns()
         local compile_elapsedtime = cumulative_compile_time_ns_before()
-        local val = $(esc(ex))
-        compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
-        elapsedtime = time_ns() - elapsedtime
+        local val = @__tryfinally($(esc(ex)),
+            (elapsedtime = time_ns() - elapsedtime;
+            compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
+        )
         local diff = GC_Diff(gc_num(), stats)
-        time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true)
+        local _msg = $(esc(msg))
+        local has_msg = !isnothing(_msg)
+        has_msg && print(_msg, ": ")
+        time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true, !has_msg)
         val
     end
 end
 
 """
-    @timev
+    @showtime expr
+
+Like `@time` but also prints the expression being evaluated for reference.
+
+!!! compat "Julia 1.8"
+    This macro was added in Julia 1.8.
+
+See also [`@time`](@ref).
+
+```julia-repl
+julia> @showtime sleep(1)
+sleep(1): 1.002164 seconds (4 allocations: 128 bytes)
+```
+"""
+macro showtime(ex)
+    quote
+        @time $(sprint(show_unquoted,ex)) $(esc(ex))
+    end
+end
+
+"""
+    @timev expr
+    @timev "description" expr
 
 This is a verbose version of the `@time` macro. It first prints the same information as
 `@time`, then any non-zero memory allocation counters, and then returns the value of the
 expression.
 
+Optionally provide a description string to print before the time report.
+
+!!! compat "Julia 1.8"
+    The option to add a description was introduced in Julia 1.8.
+
 See also [`@time`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
 [`@allocated`](@ref).
 
@@ -249,15 +311,24 @@ pool allocs:       1
 """
 macro timev(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        @timev nothing $(esc(ex))
+    end
+end
+macro timev(msg, ex)
+    quote
+        Experimental.@force_compile
         local stats = gc_num()
         local elapsedtime = time_ns()
         local compile_elapsedtime = cumulative_compile_time_ns_before()
-        local val = $(esc(ex))
-        compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
-        elapsedtime = time_ns() - elapsedtime
+        local val = @__tryfinally($(esc(ex)),
+            (elapsedtime = time_ns() - elapsedtime;
+            compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
+        )
         local diff = GC_Diff(gc_num(), stats)
-        timev_print(elapsedtime, diff, compile_elapsedtime)
+        local _msg = $(esc(msg))
+        local has_msg = !isnothing(_msg)
+        has_msg && print(_msg, ": ")
+        timev_print(elapsedtime, diff, compile_elapsedtime, !has_msg)
         val
     end
 end
@@ -282,7 +353,7 @@ julia> @elapsed sleep(0.3)
 """
 macro elapsed(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        Experimental.@force_compile
         local t0 = time_ns()
         $(esc(ex))
         (time_ns() - t0) / 1e9
@@ -314,7 +385,7 @@ julia> @allocated rand(10^6)
 """
 macro allocated(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        Experimental.@force_compile
         local b0 = Ref{Int64}(0)
         local b1 = Ref{Int64}(0)
         gc_bytes(b0)
@@ -362,7 +433,7 @@ julia> stats.gcstats.total_time
 """
 macro timed(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        Experimental.@force_compile
         local stats = gc_num()
         local elapsedtime = time_ns()
         local val = $(esc(ex))
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index 4b2af426429a0..66db0e5695551 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -104,7 +104,7 @@ function Parser(str::String; filepath=nothing)
             IdSet{TOMLDict}(),    # defined_tables
             root,
             filepath,
-            isdefined(Base, :loaded_modules) ? get(Base.loaded_modules, DATES_PKGID, nothing) : nothing,
+            isdefined(Base, :maybe_root_module) ? Base.maybe_root_module(DATES_PKGID) : nothing,
         )
     startup(l)
     return l
diff --git a/base/tuple.jl b/base/tuple.jl
index ead8bc9919a86..5db0e40b495d3 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -51,12 +51,12 @@ true
 """
 function setindex(x::Tuple, v, i::Integer)
     @boundscheck 1 <= i <= length(x) || throw(BoundsError(x, i))
-    @_inline_meta
+    @inline
     _setindex(v, i, x...)
 end
 
 function _setindex(v, i::Integer, args...)
-    @_inline_meta
+    @inline
     return ntuple(j -> ifelse(j == i, v, args[j]), length(args))
 end
 
@@ -64,7 +64,7 @@ end
 ## iterating ##
 
 function iterate(@nospecialize(t::Tuple), i::Int=1)
-    @_inline_meta
+    @inline
     return (1 <= i <= length(t)) ? (@inbounds t[i], i + 1) : nothing
 end
 
@@ -74,19 +74,19 @@ prevind(@nospecialize(t::Tuple), i::Integer) = Int(i)-1
 nextind(@nospecialize(t::Tuple), i::Integer) = Int(i)+1
 
 function keys(t::Tuple, t2::Tuple...)
-    @_inline_meta
+    @inline
     OneTo(_maxlength(t, t2...))
 end
 _maxlength(t::Tuple) = length(t)
 function _maxlength(t::Tuple, t2::Tuple, t3::Tuple...)
-    @_inline_meta
+    @inline
     max(length(t), _maxlength(t2, t3...))
 end
 
 # this allows partial evaluation of bounded sequences of next() calls on tuples,
 # while reducing to plain next() for arbitrary iterables.
-indexed_iterate(t::Tuple, i::Int, state=1) = (@_inline_meta; (getfield(t, i), i+1))
-indexed_iterate(a::Array, i::Int, state=1) = (@_inline_meta; (a[i], i+1))
+indexed_iterate(t::Tuple, i::Int, state=1) = (@inline; (getfield(t, i), i+1))
+indexed_iterate(a::Array, i::Int, state=1) = (@inline; (a[i], i+1))
 function indexed_iterate(I, i)
     x = iterate(I)
     x === nothing && throw(BoundsError(I, i))
@@ -133,6 +133,7 @@ function rest end
 rest(t::Tuple) = t
 rest(t::Tuple, i::Int) = ntuple(x -> getfield(t, x+i-1), length(t)-i+1)
 rest(a::Array, i::Int=1) = a[i:end]
+rest(a::Core.SimpleVector, i::Int=1) = a[i:end]
 rest(itr, state...) = Iterators.rest(itr, state...)
 
 # Use dispatch to avoid a branch in first
@@ -203,13 +204,13 @@ ERROR: ArgumentError: Cannot call front on an empty tuple.
 ```
 """
 function front(t::Tuple)
-    @_inline_meta
+    @inline
     _front(t...)
 end
 _front() = throw(ArgumentError("Cannot call front on an empty tuple."))
 _front(v) = ()
 function _front(v, t...)
-    @_inline_meta
+    @inline
     (v, _front(t...)...)
 end
 
@@ -217,10 +218,10 @@ end
 
 # 1 argument function
 map(f, t::Tuple{})              = ()
-map(f, t::Tuple{Any,})          = (@_inline_meta; (f(t[1]),))
-map(f, t::Tuple{Any, Any})      = (@_inline_meta; (f(t[1]), f(t[2])))
-map(f, t::Tuple{Any, Any, Any}) = (@_inline_meta; (f(t[1]), f(t[2]), f(t[3])))
-map(f, t::Tuple)                = (@_inline_meta; (f(t[1]), map(f,tail(t))...))
+map(f, t::Tuple{Any,})          = (@inline; (f(t[1]),))
+map(f, t::Tuple{Any, Any})      = (@inline; (f(t[1]), f(t[2])))
+map(f, t::Tuple{Any, Any, Any}) = (@inline; (f(t[1]), f(t[2]), f(t[3])))
+map(f, t::Tuple)                = (@inline; (f(t[1]), map(f,tail(t))...))
 # stop inlining after some number of arguments to avoid code blowup
 const Any32{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
                        Any,Any,Any,Any,Any,Any,Any,Any,
@@ -242,10 +243,10 @@ function map(f, t::Any32)
 end
 # 2 argument function
 map(f, t::Tuple{},        s::Tuple{})        = ()
-map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (@_inline_meta; (f(t[1],s[1]),))
-map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (@_inline_meta; (f(t[1],s[1]), f(t[2],s[2])))
+map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (@inline; (f(t[1],s[1]),))
+map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (@inline; (f(t[1],s[1]), f(t[2],s[2])))
 function map(f, t::Tuple, s::Tuple)
-    @_inline_meta
+    @inline
     (f(t[1],s[1]), map(f, tail(t), tail(s))...)
 end
 function map(f, t::Any32, s::Any32)
@@ -261,7 +262,7 @@ heads(ts::Tuple...) = map(t -> t[1], ts)
 tails(ts::Tuple...) = map(tail, ts)
 map(f, ::Tuple{}...) = ()
 function map(f, t1::Tuple, t2::Tuple, ts::Tuple...)
-    @_inline_meta
+    @inline
     (f(heads(t1, t2, ts...)...), map(f, tails(t1, t2, ts...)...)...)
 end
 function map(f, t1::Any32, t2::Any32, ts::Any32...)
@@ -281,7 +282,7 @@ fill_to_length(t::Tuple{}, val, ::Val{1}) = (val,)
 fill_to_length(t::Tuple{Any}, val, ::Val{2}) = (t..., val)
 fill_to_length(t::Tuple{}, val, ::Val{2}) = (val, val)
 #function fill_to_length(t::Tuple, val, ::Val{N}) where {N}
-#    @_inline_meta
+#    @inline
 #    return (t..., ntuple(i -> val, N - length(t))...)
 #end
 
@@ -318,12 +319,12 @@ Tuple(x::Array{T,0}) where {T} = tuple(getindex(x))
 _totuple(::Type{Tuple{}}, itr, s...) = ()
 
 function _totuple_err(@nospecialize T)
-    @_noinline_meta
+    @noinline
     throw(ArgumentError("too few elements for tuple type $T"))
 end
 
 function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
-    @_inline_meta
+    @inline
     y = iterate(itr, s...)
     y === nothing && _totuple_err(T)
     t1 = convert(fieldtype(T, 1), y[1])
@@ -354,22 +355,36 @@ _totuple(::Type{Tuple}, itr::NamedTuple) = (itr...,)
 
 end
 
+## find ##
+
+_findfirst_rec(f, i::Int, ::Tuple{}) = nothing
+_findfirst_rec(f, i::Int, t::Tuple) = (@inline; f(first(t)) ? i : _findfirst_rec(f, i+1, tail(t)))
+function _findfirst_loop(f::Function, t)
+    for i in 1:length(t)
+        f(t[i]) && return i
+    end
+    return nothing
+end
+findfirst(f::Function, t::Tuple) = length(t) < 32 ? _findfirst_rec(f, 1, t) : _findfirst_loop(f, t)
+
+function findlast(f::Function, x::Tuple)
+    r = findfirst(f, reverse(x))
+    return isnothing(r) ? r : length(x) - r + 1
+end
+
 ## filter ##
 
-filter(f, xs::Tuple) = afoldl((ys, x) -> f(x) ? (ys..., x) : ys, (), xs...)
+filter_rec(f, xs::Tuple) = afoldl((ys, x) -> f(x) ? (ys..., x) : ys, (), xs...)
 
 # use Array for long tuples
-filter(f, t::Any32) = Tuple(filter(f, collect(t)))
+filter(f, t::Tuple) = length(t) < 32 ? filter_rec(f, t) : Tuple(filter(f, collect(t)))
 
 ## comparison ##
 
 isequal(t1::Tuple, t2::Tuple) = length(t1) == length(t2) && _isequal(t1, t2)
 _isequal(::Tuple{}, ::Tuple{}) = true
-function _isequal(t1::Tuple{Any,Vararg{Any,N}}, t2::Tuple{Any,Vararg{Any,N}}) where {N}
-    isequal(t1[1], t2[1]) || return false
-    t1, t2 = tail(t1), tail(t2)
-    # avoid dynamic dispatch by telling the compiler relational invariants
-    return isa(t1, Tuple{}) ? true : _isequal(t1, t2::Tuple{Any,Vararg{Any}})
+function _isequal(t1::Tuple{Any,Vararg{Any}}, t2::Tuple{Any,Vararg{Any}})
+    return isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
 end
 function _isequal(t1::Any32, t2::Any32)
     for i = 1:length(t1)
@@ -488,17 +503,12 @@ reverse(t::Tuple) = revargs(t...)
 
 ## specialized reduction ##
 
-# TODO: these definitions cannot yet be combined, since +(x...)
-# where x might be any tuple matches too many methods.
-# TODO: this is inconsistent with the regular sum in cases where the arguments
-# require size promotion to system size.
-sum(x::Tuple{Any, Vararg{Any}}) = +(x...)
-
-# NOTE: should remove, but often used on array sizes
-# TODO: this is inconsistent with the regular prod in cases where the arguments
-# require size promotion to system size.
 prod(x::Tuple{}) = 1
-prod(x::Tuple{Any, Vararg{Any}}) = *(x...)
+# This is consistent with the regular prod because there is no need for size promotion
+# if all elements in the tuple are of system size.
+# It is defined here separately in order to support bootstrap, because it's needed earlier
+# than the general prod definition is available.
+prod(x::Tuple{Int, Vararg{Int}}) = *(x...)
 
 all(x::Tuple{}) = true
 all(x::Tuple{Bool}) = x[1]
@@ -514,7 +524,7 @@ any(x::Tuple{Bool, Bool, Bool}) = x[1]|x[2]|x[3]
 # equivalent to any(f, t), to be used only in bootstrap
 _tuple_any(f::Function, t::Tuple) = _tuple_any(f, false, t...)
 function _tuple_any(f::Function, tf::Bool, a, b...)
-    @_inline_meta
+    @inline
     _tuple_any(f, tf | f(a), b...)
 end
 _tuple_any(f::Function, tf::Bool) = tf
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index 55cdc59371674..8f80b2c8438a0 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -63,7 +63,7 @@ representation, even though it is exact from the standpoint of binary
 representation.
 
 Example:
-```julia
+```julia-repl
 julia> 1.0 + 1.0001e-15
 1.000000000000001
 
@@ -94,7 +94,7 @@ numbers. Mathematically, `zhi + zlo = x * y`, where `zhi` contains the
 most significant bits and `zlo` the least significant.
 
 Example:
-```julia
+```julia-repl
 julia> x = Float32(π)
 3.1415927f0
 
@@ -126,7 +126,7 @@ numbers. Mathematically, `zhi + zlo ≈ x / y`, where `zhi` contains the
 most significant bits and `zlo` the least significant.
 
 Example:
-```julia
+```julia-repl
 julia> x, y = Float32(π), 3.1f0
 (3.1415927f0, 3.1f0)
 
@@ -162,7 +162,18 @@ div12(x, y) = div12(promote(x, y)...)
     TwicePrecision{T}((num, denom))
 
 A number with twice the precision of `T`, e.g., quad-precision if `T =
-Float64`. `hi` represents the high bits (most significant bits) and
+Float64`.
+
+!!! warn
+    `TwicePrecision` is an internal type used to increase the
+    precision of floating-point ranges, and not intended for external use.
+    If you encounter them in real code, the most likely explanation is
+    that you are directly accessing the fields of a range. Use
+    the function interface instead, `step(r)` rather than `r.step`
+
+# Extended help
+
+`hi` represents the high bits (most significant bits) and
 `lo` the low bits (least significant bits). Rational values
 `num//denom` can be approximated conveniently using the syntax
 `TwicePrecision{T}((num, denom))`.
@@ -194,6 +205,10 @@ function TwicePrecision{T}(x) where {T}
     TwicePrecision{T}(xT, T(Δx))
 end
 
+function TwicePrecision{T}(x::TwicePrecision) where {T}
+    TwicePrecision{T}(x.hi, x.lo)
+end
+
 TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} =
     TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...)
 
@@ -207,7 +222,7 @@ end
 
 function TwicePrecision{T}(nd::Tuple{Any,Any}) where {T}
     n, d = nd
-    TwicePrecision{T}(n) / d
+    TwicePrecision{T}(TwicePrecision{T}(n) / d)
 end
 
 function TwicePrecision{T}(nd::Tuple{I,I}, nb::Integer) where {T,I}
@@ -329,13 +344,13 @@ function steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
                          step::Tuple{Integer,Integer}, nb::Integer,
                          len::Integer, offset::Integer)
     StepRangeLen(TwicePrecision{Float64}(ref),
-                 TwicePrecision{Float64}(step, nb), Int(len), offset)
+                 TwicePrecision{Float64}(step, nb), len, offset)
 end
 
 function steprangelen_hp(::Type{T}, ref::Tuple{Integer,Integer},
                          step::Tuple{Integer,Integer}, nb::Integer,
                          len::Integer, offset::Integer) where {T<:IEEEFloat}
-    StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], Int(len), offset)
+    StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], len, offset)
 end
 
 # AbstractFloat constructors (can supply a single number or a 2-tuple
@@ -347,14 +362,13 @@ function steprangelen_hp(::Type{Float64}, ref::F_or_FF,
                          step::F_or_FF, nb::Integer,
                          len::Integer, offset::Integer)
     StepRangeLen(TwicePrecision{Float64}(ref...),
-                 twiceprecision(TwicePrecision{Float64}(step...), nb), Int(len), offset)
+                 twiceprecision(TwicePrecision{Float64}(step...), nb), len, offset)
 end
 
 function steprangelen_hp(::Type{T}, ref::F_or_FF,
                          step::F_or_FF, nb::Integer,
                          len::Integer, offset::Integer) where {T<:IEEEFloat}
-    StepRangeLen{T}(asF64(ref),
-                    asF64(step), Int(len), offset)
+    StepRangeLen{T}(asF64(ref), asF64(step), len, offset)
 end
 
 
@@ -365,30 +379,33 @@ StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T},
 
 # Construct range for rational start=start_n/den, step=step_n/den
 function floatrange(::Type{T}, start_n::Integer, step_n::Integer, len::Integer, den::Integer) where T
+    len = len + 0 # promote with Int
     if len < 2 || step_n == 0
-        return steprangelen_hp(T, (start_n, den), (step_n, den), 0, Int(len), 1)
+        return steprangelen_hp(T, (start_n, den), (step_n, den), 0, len, oneunit(len))
     end
     # index of smallest-magnitude value
-    imin = clamp(round(Int, -start_n/step_n+1), 1, Int(len))
+    L = typeof(len)
+    imin = clamp(round(typeof(len), -start_n/step_n+1), oneunit(L), len)
     # Compute smallest-magnitude element to 2x precision
     ref_n = start_n+(imin-1)*step_n  # this shouldn't overflow, so don't check
     nb = nbitslen(T, len, imin)
-    steprangelen_hp(T, (ref_n, den), (step_n, den), nb, Int(len), imin)
+    steprangelen_hp(T, (ref_n, den), (step_n, den), nb, len, imin)
 end
 
 function floatrange(a::AbstractFloat, st::AbstractFloat, len::Real, divisor::AbstractFloat)
+    len = len + 0 # promote with Int
     T = promote_type(typeof(a), typeof(st), typeof(divisor))
     m = maxintfloat(T, Int)
     if abs(a) <= m && abs(st) <= m && abs(divisor) <= m
         ia, ist, idivisor = round(Int, a), round(Int, st), round(Int, divisor)
         if ia == a && ist == st && idivisor == divisor
             # We can return the high-precision range
-            return floatrange(T, ia, ist, Int(len), idivisor)
+            return floatrange(T, ia, ist, len, idivisor)
         end
     end
     # Fallback (misses the opportunity to set offset different from 1,
     # but otherwise this is still high-precision)
-    steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), Int(len), 1)
+    steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), len, oneunit(len))
 end
 
 function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
@@ -407,7 +424,7 @@ function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
                     rem(den, start_d) == 0 && rem(den, step_d) == 0      # check lcm overflow
                 start_n = round(Int, start*den)
                 step_n = round(Int, step*den)
-                len = max(0, div(den*stop_n - stop_d*start_n + step_n*stop_d, step_n*stop_d))
+                len = max(0, Int(div(den*stop_n - stop_d*start_n + step_n*stop_d, step_n*stop_d)))
                 # Integer ops could overflow, so check that this makes sense
                 if isbetween(start, start + (len-1)*step, stop + step/2) &&
                         !isbetween(start, start + len*step, stop)
@@ -418,6 +435,7 @@ function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
         end
     end
     # Fallback, taking start and step literally
+    # n.b. we use Int as the default length type for IEEEFloats
     lf = (stop-start)/step
     if lf < 0
         len = 0
@@ -436,6 +454,7 @@ step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T<:AbstractF
 step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T} = T(r.step)
 
 function range_start_step_length(a::T, st::T, len::Integer) where T<:Union{Float16,Float32,Float64}
+    len = len + 0 # promote with Int
     start_n, start_d = rat(a)
     step_n, step_d = rat(st)
     if start_d != 0 && step_d != 0 &&
@@ -455,7 +474,7 @@ end
 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
     # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     u = i - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
@@ -474,31 +493,38 @@ end
 
 function getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, s::OrdinalRange{S}) where {T, S<:Integer}
     @boundscheck checkbounds(r, s)
+    len = length(s)
+    L = typeof(len)
+    sstep = step_hp(s)
+    rstep = step_hp(r)
     if S === Bool
-        if length(s) == 0
-            return StepRangeLen(r.ref, r.step, 0, 1)
-        elseif length(s) == 1
+        #rstep *= one(sstep)
+        if len == 0
+            return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+        elseif len == 1
             if first(s)
-                return StepRangeLen(r.ref, r.step, 1, 1)
+                return StepRangeLen{T}(first(r), rstep, oneunit(L), oneunit(L))
             else
-                return StepRangeLen(r.ref, r.step, 0, 1)
+                return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
             end
-        else # length(s) == 2
-            return StepRangeLen(r[2], step(r), 1, 1)
+        else # len == 2
+            return StepRangeLen{T}(last(r), step(r), oneunit(L), oneunit(L))
         end
     else
-        soffset = 1 + round(Int, (r.offset - first(s))/step(s))
-        soffset = clamp(soffset, 1, length(s))
-        ioffset = first(s) + (soffset-1)*step(s)
-        if step(s) == 1 || length(s) < 2
-            newstep = r.step
+        soffset = round(L, (r.offset - first(s))/sstep + 1)
+        soffset = clamp(soffset, oneunit(L), len)
+        ioffset = L(first(s) + (soffset - oneunit(L)) * sstep)
+        if sstep == 1 || len < 2
+            newstep = rstep #* one(sstep)
         else
-            newstep = twiceprecision(r.step*step(s), nbitslen(T, length(s), soffset))
+            newstep = rstep * sstep
+            newstep = twiceprecision(newstep, nbitslen(T, len, soffset))
         end
+        soffset = max(oneunit(L), soffset)
         if ioffset == r.offset
-            return StepRangeLen(r.ref, newstep, length(s), max(1,soffset))
+            return StepRangeLen{T}(r.ref, newstep, len, soffset)
         else
-            return StepRangeLen(r.ref + (ioffset-r.offset)*r.step, newstep, length(s), max(1,soffset))
+            return StepRangeLen{T}(r.ref + (ioffset-r.offset)*rstep, newstep, len, soffset)
         end
     end
 end
@@ -509,30 +535,30 @@ end
 /(r::StepRangeLen{<:Real,<:TwicePrecision}, x::Real) =
     StepRangeLen(r.ref/x, twiceprecision(r.step/x, nbitslen(r)), length(r), r.offset)
 
-StepRangeLen{T,R,S}(r::StepRangeLen{T,R,S}) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} = r
+StepRangeLen{T,R,S,L}(r::StepRangeLen{T,R,S,L}) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} = r
 
-StepRangeLen{T,R,S}(r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} =
-    _convertSRL(StepRangeLen{T,R,S}, r)
+StepRangeLen{T,R,S,L}(r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} =
+    _convertSRL(StepRangeLen{T,R,S,L}, r)
 
 StepRangeLen{Float64}(r::StepRangeLen) =
-    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
 StepRangeLen{T}(r::StepRangeLen) where {T<:IEEEFloat} =
-    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
+    _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
 
 StepRangeLen{Float64}(r::AbstractRange) =
-    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
 StepRangeLen{T}(r::AbstractRange) where {T<:IEEEFloat} =
-    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
+    _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{<:Integer}) where {T,R,S}
-    StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset)
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{<:Integer}) where {T,R,S,L}
+    StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
 end
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{<:Integer}) where {T,R,S}
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{<:Integer}) where {T,R,S,L}
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
 end
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,R,S,U}
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
     # if start and step have a rational approximation in the old type,
     # then we transfer that rational approximation to the new type
     f, s = first(r), step(r)
@@ -546,17 +572,17 @@ function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,
                 rem(den, start_d) == 0 && rem(den, step_d) == 0
             start_n = round(Int, f*den)
             step_n = round(Int, s*den)
-            return floatrange(T, start_n, step_n, length(r), den)
+            return floatrange(T, start_n, step_n, L(length(r)), den)
         end
     end
-    __convertSRL(StepRangeLen{T,R,S}, r)
+    return __convertSRL(StepRangeLen{T,R,S,L}, r)
 end
 
-function __convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{U}) where {T,R,S,U}
-    StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset)
+function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{U}) where {T,R,S,L,U}
+    StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
 end
-function __convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,R,S,U}
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
 end
 
 function sum(r::StepRangeLen)
@@ -567,7 +593,7 @@ function sum(r::StepRangeLen)
     np, nn = l - r.offset, r.offset - 1  # positive, negative
     # To prevent overflow in sum(1:n), multiply its factors by the step
     sp, sn = sumpair(np), sumpair(nn)
-    W = widen(Int)
+    W = widen(typeof(l))
     Δn = W(sp[1]) * W(sp[2]) - W(sn[1]) * W(sn[2])
     s = r.step * Δn
     # Add in contributions of ref
@@ -603,19 +629,20 @@ function +(r1::StepRangeLen{T,R}, r2::StepRangeLen{T,R}) where T where R<:TwiceP
         imid = r1.offset
         ref = r1.ref + r2.ref
     else
-        imid = round(Int, (r1.offset+r2.offset)/2)
+        imid = round(typeof(len), (r1.offset+r2.offset)/2)
         ref1mid = _getindex_hiprec(r1, imid)
         ref2mid = _getindex_hiprec(r2, imid)
         ref = ref1mid + ref2mid
     end
     step = twiceprecision(r1.step + r2.step, nbitslen(T, len, imid))
-    StepRangeLen{T,typeof(ref),typeof(step)}(ref, step, len, imid)
+    StepRangeLen{T,typeof(ref),typeof(step),typeof(len)}(ref, step, len, imid)
 end
 
 ## LinRange
 
 # For Float16, Float32, and Float64, this returns a StepRangeLen
 function range_start_stop_length(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
+    len = len + 0 # promote with Int
     len < 2 && return _linspace1(T, start, stop, len)
     if start == stop
         return steprangelen_hp(T, start, zero(T), 0, len, 1)
@@ -638,32 +665,35 @@ function range_start_stop_length(start::T, stop::T, len::Integer) where {T<:IEEE
 end
 
 function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
+    len = len + 0 # promote with Int
     (isfinite(start) && isfinite(stop)) || throw(ArgumentError("start and stop must be finite, got $start and $stop"))
     # Find the index that returns the smallest-magnitude element
     Δ, Δfac = stop-start, 1
     if !isfinite(Δ)   # handle overflow for large endpoints
-        Δ, Δfac = stop/len - start/len, Int(len)
+        Δ, Δfac = stop/len - start/len, len
     end
     tmin = -(start/Δ)/Δfac            # t such that (1-t)*start + t*stop == 0
-    imin = round(Int, tmin*(len-1)+1) # index approximately corresponding to t
+    L = typeof(len)
+    lenn1 = len - oneunit(L)
+    imin = round(L, tmin*lenn1 + 1) # index approximately corresponding to t
     if 1 < imin < len
         # The smallest-magnitude element is in the interior
-        t = (imin-1)/(len-1)
+        t = (imin - 1)/lenn1
         ref = T((1-t)*start + t*stop)
         step = imin-1 < len-imin ? (ref-start)/(imin-1) : (stop-ref)/(len-imin)
     elseif imin <= 1
-        imin = 1
+        imin = oneunit(L)
         ref = start
-        step = (Δ/(len-1))*Δfac
+        step = (Δ/(lenn1))*Δfac
     else
-        imin = Int(len)
+        imin = len
         ref = stop
-        step = (Δ/(len-1))*Δfac
+        step = (Δ/(lenn1))*Δfac
     end
     if len == 2 && !isfinite(step)
         # For very large endpoints where step overflows, exploit the
         # split-representation to handle the overflow
-        return steprangelen_hp(T, start, (-start, stop), 0, 2, 1)
+        return steprangelen_hp(T, start, (-start, stop), 0, len, oneunit(L))
     end
     # 2x calculations to get high precision endpoint matching while also
     # preventing overflow in ref_hi+(i-offset)*step_hi
@@ -676,23 +706,28 @@ function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
     a, b = (start - x1_hi) - x1_lo, (stop - x2_hi) - x2_lo
     step_lo = (b - a)/(len - 1)
     ref_lo = a - (1 - imin)*step_lo
-    steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, Int(len), imin)
+    steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, len, imin)
 end
 
 # range for rational numbers, start = start_n/den, stop = stop_n/den
 # Note this returns a StepRangeLen
 _linspace(::Type{T}, start::Integer, stop::Integer, len::Integer) where {T<:IEEEFloat} = _linspace(T, start, stop, len, one(start))
 function _linspace(::Type{T}, start_n::Integer, stop_n::Integer, len::Integer, den::Integer) where T<:IEEEFloat
+    len = len + 0 # promote with Int
     len < 2 && return _linspace1(T, start_n/den, stop_n/den, len)
-    start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len, 1)
+    L = typeof(len)
+    start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len, oneunit(L))
     tmin = -start_n/(Float64(stop_n) - Float64(start_n))
-    imin = round(Int, tmin*(len-1)+1)
-    imin = clamp(imin, 1, Int(len))
-    ref_num = Int128(len-imin) * start_n + Int128(imin-1) * stop_n
-    ref_denom = Int128(len-1) * den
+    imin = round(typeof(len), tmin*(len-1)+1)
+    imin = clamp(imin, oneunit(L), len)
+    W = widen(L)
+    start_n = W(start_n)
+    stop_n = W(stop_n)
+    ref_num = W(len-imin) * start_n + W(imin-1) * stop_n
+    ref_denom = W(len-1) * den
     ref = (ref_num, ref_denom)
-    step_full = (Int128(stop_n) - Int128(start_n), ref_denom)
-    steprangelen_hp(T, ref, step_full,  nbitslen(T, len, imin), Int(len), imin)
+    step_full = (stop_n - start_n, ref_denom)
+    steprangelen_hp(T, ref, step_full, nbitslen(T, len, imin), len, imin)
 end
 
 # For len < 2
@@ -704,7 +739,7 @@ function _linspace1(::Type{T}, start, stop, len::Integer) where T<:IEEEFloat
         # The output type must be consistent with steprangelen_hp
         if T<:Union{Float32,Float16}
             return StepRangeLen{T}(Float64(start), Float64(start) - Float64(stop), len, 1)
-        else
+        else # T == Float64
             return StepRangeLen(TwicePrecision(start, zero(T)), TwicePrecision(start, -stop), len, 1)
         end
     end
@@ -713,8 +748,8 @@ end
 
 ### Numeric utilities
 
-# Approximate x with a rational representation. Guaranteed to return,
-# but not guaranteed to return a precise answer.
+# Approximate x with a rational representation as a pair of Int values.
+# Guaranteed to return, but not guaranteed to return a precise answer.
 # https://en.wikipedia.org/wiki/Continued_fraction#Best_rational_approximations
 function rat(x)
     y = x
@@ -722,7 +757,7 @@ function rat(x)
     b = c = 0
     m = maxintfloat(narrow(typeof(x)), Int)
     while abs(y) <= m
-        f = trunc(Int,y)
+        f = trunc(Int, y)
         y -= f
         a, c = f*a + c, a
         b, d = f*b + d, b
@@ -742,7 +777,7 @@ narrow(::Type{Float32}) = Float16
 narrow(::Type{Float16}) = Float16
 
 function _tp_prod(t::TwicePrecision, x, y...)
-    @_inline_meta
+    @inline
     _tp_prod(t * x, y...)
 end
 _tp_prod(t::TwicePrecision) = t
diff --git a/base/util.jl b/base/util.jl
index a8bbd92116a83..9b89c5a40cf1e 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -97,7 +97,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
                            (bold ? disable_text_style[:bold] : "") *
                                get(disable_text_style, color, text_colors[:default])
             first = true
-            for line in split(str, '\n')
+            for line in eachsplit(str, '\n')
                 first || print(buf, '\n')
                 first = false
                 isempty(line) && continue
@@ -113,14 +113,18 @@ end
 
 Print `xs` in a color specified as a symbol or integer, optionally in bold.
 
-`color` may take any of the values $(Base.available_text_colors_docstring)
+Keyword `color` may take any of the values $(Base.available_text_colors_docstring)
 or an integer between 0 and 255 inclusive. Note that not all terminals support 256 colors.
-If the keyword `bold` is given as `true`, the result will be printed in bold.
-If the keyword `underline` is given as `true`, the result will be printed underlined.
-If the keyword `blink` is given as `true`, the result will blink.
-If the keyword `reverse` is given as `true`, the result will have foreground and background colors inversed.
-If the keyword `hidden` is given as `true`, the result will be hidden.
-Keywords can be given in any combination.
+
+Keywords `bold=true`, `underline=true`, `blink=true` are self-explanatory.
+Keyword `reverse=true` prints with foreground and background colors exchanged,
+and `hidden=true` should be invisibe in the terminal but can still be copied.
+These properties can be used in any combination.
+
+See also [`print`](@ref), [`println`](@ref), [`show`](@ref).
+
+!!! compat "Julia 1.7"
+    Keywords except `color` and `bold` were added in Julia 1.7.
 """
 printstyled(io::IO, msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
     with_output_color(print, color, io, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
@@ -208,6 +212,9 @@ function julia_cmd(julia=joinpath(Sys.BINDIR::String, julia_exename()))
     if opts.startupfile == 2
         push!(addflags, "--startup-file=no")
     end
+    if opts.use_sysimage_native_code == 0
+        push!(addflags, "--sysimage-native-code=no")
+    end
     return `$julia -C$cpu_target -J$image_file $addflags`
 end
 
@@ -536,54 +543,6 @@ function _kwdef!(blk, params_args, call_args)
     blk
 end
 
-"""
-    @invoke f(arg::T, ...; kwargs...)
-
-Provides a convenient way to call [`invoke`](@ref);
-`@invoke f(arg1::T1, arg2::T2; kwargs...)` will be expanded into `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)`.
-When an argument's type annotation is omitted, it's specified as `Any` argument, e.g.
-`@invoke f(arg1::T, arg2)` will be expanded into `invoke(f, Tuple{T,Any}, arg1, arg2)`.
-"""
-macro invoke(ex)
-    f, args, kwargs = destructure_callex(ex)
-    arg2typs = map(args) do x
-        is_expr(x, :(::)) ? (x.args...,) : (x, GlobalRef(Core, :Any))
-    end
-    args, argtypes = first.(arg2typs), last.(arg2typs)
-    return esc(:($(GlobalRef(Core, :invoke))($(f), Tuple{$(argtypes...)}, $(args...); $(kwargs...))))
-end
-
-"""
-    @invokelatest f(args...; kwargs...)
-
-Provides a convenient way to call [`Base.invokelatest`](@ref).
-`@invokelatest f(args...; kwargs...)` will simply be expanded into
-`Base.invokelatest(f, args...; kwargs...)`.
-"""
-macro invokelatest(ex)
-    f, args, kwargs = destructure_callex(ex)
-    return esc(:($(GlobalRef(Base, :invokelatest))($(f), $(args...); $(kwargs...))))
-end
-
-function destructure_callex(ex)
-    is_expr(ex, :call) || throw(ArgumentError("a call expression f(args...; kwargs...) should be given"))
-
-    f = first(ex.args)
-    args = []
-    kwargs = []
-    for x in ex.args[2:end]
-        if is_expr(x, :parameters)
-            append!(kwargs, x.args)
-        elseif is_expr(x, :kw)
-            push!(kwargs, x)
-        else
-            push!(args, x)
-        end
-    end
-
-    return f, args, kwargs
-end
-
 # testing
 
 """
@@ -611,6 +570,7 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS::Int
     seed !== nothing && push!(tests, "--seed=0x$(string(seed % UInt128, base=16))") # cast to UInt128 to avoid a minus sign
     ENV2 = copy(ENV)
     ENV2["JULIA_CPU_THREADS"] = "$ncores"
+    ENV2["JULIA_DEPOT_PATH"] = mktempdir(; cleanup = true)
     try
         run(setenv(`$(julia_cmd()) $(joinpath(Sys.BINDIR::String,
             Base.DATAROOTDIR, "julia", "test", "runtests.jl")) $tests`, ENV2))
diff --git a/base/version.jl b/base/version.jl
index 77676f80e3676..2ff1842b79caf 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -100,7 +100,7 @@ const VERSION_REGEX = r"^
 $"ix
 
 function split_idents(s::AbstractString)
-    idents = split(s, '.')
+    idents = eachsplit(s, '.')
     pidents = Union{UInt64,String}[occursin(r"^\d+$", ident) ? parse(UInt64, ident) : String(ident) for ident in idents]
     return tuple(pidents...)::VerTuple
 end
diff --git a/base/version_git.sh b/base/version_git.sh
index c46021097995e..fbb884648642a 100644
--- a/base/version_git.sh
+++ b/base/version_git.sh
@@ -13,6 +13,8 @@ echo "    date_string::String"
 echo "    tagged_commit::Bool"
 echo "    fork_master_distance::Int"
 echo "    fork_master_timestamp::Float64"
+echo "    build_system_commit::String"
+echo "    build_system_commit_short::String"
 echo "end"
 echo ""
 
@@ -82,6 +84,15 @@ if [ -z "$fork_master_timestamp" ]; then
     fork_master_timestamp="0"
 fi
 
+build_system_directory="../.buildkite"
+if [ -d "${build_system_directory}/.git" ]; then
+    build_system_commit=$(git -C "${build_system_directory}" rev-parse HEAD)
+    build_system_commit_short=$(git -C "${build_system_directory}" rev-parse --short HEAD)
+else
+    build_system_commit=""
+    build_system_commit_short=""
+fi
+
 echo "const GIT_VERSION_INFO = GitVersionInfo("
 echo "    \"$commit\","
 echo "    \"$commit_short\","
@@ -90,5 +101,7 @@ echo "    $build_number,"
 echo "    \"$date_string\","
 echo "    $tagged_commit,"
 echo "    $fork_master_distance,"
-echo "    $fork_master_timestamp."
+echo "    $fork_master_timestamp.,"
+echo "    \"$build_system_commit\","
+echo "    \"$build_system_commit_short\","
 echo ")"
diff --git a/base/views.jl b/base/views.jl
index f60dc04094a43..e26359a5c9fd7 100644
--- a/base/views.jl
+++ b/base/views.jl
@@ -42,7 +42,7 @@ function replace_ref_begin_end_!(ex, withex)
                 n = 1
                 J = lastindex(ex.args)
                 for j = 2:J
-                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S)),:($lastindex($S,$n))))
+                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S,$n)),:($lastindex($S,$n))))
                     used_S |= used
                     ex.args[j] = exj
                     if isa(exj,Expr) && exj.head === :...
diff --git a/cli/Makefile b/cli/Makefile
index d4a1b2472c24d..11855ee6244dc 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -65,7 +65,6 @@ $(DIRS):
 	@mkdir -p $@
 
 ifeq ($(OS),WINNT)
-ifneq ($(USEMSVC), 1)
 $(BUILDDIR)/julia_res.o: $(JULIAHOME)/contrib/windows/julia.rc $(JULIAHOME)/VERSION
 	JLVER=`cat $(JULIAHOME)/VERSION` && \
 	JLVERi=`echo $$JLVER | perl -nle \
@@ -75,7 +74,6 @@ $(BUILDDIR)/julia_res.o: $(JULIAHOME)/contrib/windows/julia.rc $(JULIAHOME)/VERS
 EXE_OBJS += $(BUILDDIR)/julia_res.o
 EXE_DOBJS += $(BUILDDIR)/julia_res.o
 endif
-endif
 
 # Embed an Info.plist in the julia executable
 # Create an intermediate target Info.plist for Darwin code signing.
diff --git a/cli/jl_exports.h b/cli/jl_exports.h
index 35d2767726865..e9be7c6f2f819 100644
--- a/cli/jl_exports.h
+++ b/cli/jl_exports.h
@@ -1,4 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
+
 // Bring in the curated lists of exported data and function symbols, then
 // perform C preprocessor magic upon them to generate lists of declarations and
 // functions to re-export our function symbols from libjulia-internal to libjulia.
@@ -18,25 +19,57 @@ JL_EXPORTED_DATA_SYMBOLS(XX)
 // Declare list of exported functions (sans type)
 #define XX(name)    JL_DLLEXPORT void name(void);
 typedef void (anonfunc)(void);
-JL_EXPORTED_FUNCS(XX)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
 #undef XX
 
-// Define holder locations for function addresses as `const void * $(name)_addr = & $(name);`
-#define XX(name)    JL_HIDDEN anonfunc * name##_addr = (anonfunc*)&name;
-JL_EXPORTED_FUNCS(XX)
+// Define holder locations for function addresses as `const void * $(name)_addr = NULL;
+#define XX(name)    JL_HIDDEN anonfunc * name##_addr = NULL;
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
 #undef XX
 
 // Generate lists of function names and addresses
-#define XX(name)    #name,
-static const char *const jl_exported_func_names[] = {
-    JL_EXPORTED_FUNCS(XX)
+#define XX(name)    "i" #name,
+static const char *const jl_runtime_exported_func_names[] = {
+    JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+    JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+    NULL
+};
+#undef XX
+
+#define XX(name)    #name"_impl",
+static const char *const jl_codegen_exported_func_names[] = {
+    JL_CODEGEN_EXPORTED_FUNCS(XX)
+    NULL
+};
+#undef XX
+
+#define XX(name)    #name"_fallback",
+static const char *const jl_codegen_fallback_func_names[] = {
+    JL_CODEGEN_EXPORTED_FUNCS(XX)
     NULL
 };
 #undef XX
 
 #define XX(name)    &name##_addr,
-static anonfunc **const jl_exported_func_addrs[] = {
-    JL_EXPORTED_FUNCS(XX)
+static anonfunc **const jl_runtime_exported_func_addrs[] = {
+    JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+    JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+    NULL
+};
+static anonfunc **const jl_codegen_exported_func_addrs[] = {
+    JL_CODEGEN_EXPORTED_FUNCS(XX)
     NULL
 };
 #undef XX
diff --git a/cli/list_strip_symbols.h b/cli/list_strip_symbols.h
index e1a96261fe05a..5d534616e132b 100644
--- a/cli/list_strip_symbols.h
+++ b/cli/list_strip_symbols.h
@@ -3,5 +3,8 @@
 #include "jl_exported_funcs.inc"
 #include "trampolines/common.h"
 #define XX(x) --strip-symbol=CNAME(x)
-JL_EXPORTED_FUNCS(XX)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
 #undef XX
diff --git a/cli/loader.h b/cli/loader.h
index 6df1557ec2c26..70ae8750a6c0a 100644
--- a/cli/loader.h
+++ b/cli/loader.h
@@ -22,10 +22,15 @@
 #define realloc loader_realloc
 #endif
 
+#include <stdint.h>
+
 #ifdef _OS_WINDOWS_
+
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
+
 #else
+
 #ifdef _OS_DARWIN_
 #include <mach-o/dyld.h>
 #endif
@@ -33,7 +38,6 @@
 #include <stddef.h>
 #include <sys/sysctl.h>
 #endif
-
 #define _GNU_SOURCE // Need this for `dladdr()`
 #include <stdio.h>
 #include <stdlib.h>
@@ -42,6 +46,7 @@
 #include <libgen.h>
 #include <unistd.h>
 #include <dlfcn.h>
+
 #endif
 
 // Borrow definition from `support/dtypes.h`
@@ -53,7 +58,7 @@
 # endif
 #define JL_HIDDEN
 #else
-# if defined(LIBRARY_EXPORTS) && defined(_OS_LINUX)
+# if defined(LIBRARY_EXPORTS) && defined(_OS_LINUX_)
 #  define JL_DLLEXPORT __attribute__ ((visibility("protected")))
 # else
 #  define JL_DLLEXPORT __attribute__ ((visibility("default")))
@@ -91,3 +96,5 @@ int wchar_to_utf8(const wchar_t * wstr, char *str, size_t maxlen);
 int utf8_to_wchar(const char * str, wchar_t *wstr, size_t maxlen);
 void setup_stdio(void);
 #endif
+
+#include "../src/jloptions.h"
diff --git a/cli/loader_exe.c b/cli/loader_exe.c
index dd7561b14d1f6..983cef02fa037 100644
--- a/cli/loader_exe.c
+++ b/cli/loader_exe.c
@@ -1,4 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
+
 // This defines a bare-bones loader that opens `libjulia` and immediately invokes its `load_repl()` function.
 #include "loader.h"
 
@@ -13,6 +14,16 @@ extern "C" {
 
 JULIA_DEFINE_FAST_TLS
 
+#ifdef _COMPILER_ASAN_ENABLED_
+JL_DLLEXPORT const char* __asan_default_options()
+{
+    return "allow_user_segv_handler=1:detect_leaks=0";
+    // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
+    //        or defining __lsan_default_options = exitcode=0 once publicly available
+    //        (here and in flisp/flmain.c)
+}
+#endif
+
 #ifdef _OS_WINDOWS_
 int mainCRTStartup(void)
 {
@@ -25,6 +36,12 @@ int main(int argc, char * argv[])
 {
 #endif
 
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
+    // ASAN/TSAN do not support RTLD_DEEPBIND
+    // https://github.com/google/sanitizers/issues/611
+    putenv("LBT_USE_RTLD_DEEPBIND=0");
+#endif
+
     // Convert Windows wchar_t values to UTF8
 #ifdef _OS_WINDOWS_
     for (int i = 0; i < argc; i++) {
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 94cec50ae7e6a..57346784aec7a 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -1,4 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
+
 // This file defines an RPATH-style relative path loader for all platforms
 #include "loader.h"
 
@@ -15,7 +16,7 @@ extern "C" {
 #endif
 
 // Save DEP_LIBS to a variable that is explicitly sized for expansion
-static char dep_libs[512] = DEP_LIBS;
+static char dep_libs[1024] = DEP_LIBS;
 
 JL_DLLEXPORT void jl_loader_print_stderr(const char * msg)
 {
@@ -30,13 +31,29 @@ void jl_loader_print_stderr3(const char * msg1, const char * msg2, const char *
 }
 
 /* Wrapper around dlopen(), with extra relative pathing thrown in*/
-static void * load_library(const char * rel_path, const char * src_dir) {
+static void * load_library(const char * rel_path, const char * src_dir, int err) {
+    void * handle = NULL;
+
+    // See if a handle is already open to the basename
+    const char *basename = rel_path + strlen(rel_path);
+    while (basename-- > rel_path)
+        if (*basename == PATHSEPSTRING[0] || *basename == '/')
+            break;
+    basename++;
+#if defined(_OS_WINDOWS_)
+    if ((handle = GetModuleHandleA(basename)))
+        return handle;
+#else
+    // if err == 0 the library is optional, so don't allow global lookups to see it
+    if ((handle = dlopen(basename, RTLD_NOLOAD | RTLD_NOW | (err ? RTLD_GLOBAL : RTLD_LOCAL))))
+        return handle;
+#endif
+
     char path[2*PATH_MAX + 1] = {0};
     strncat(path, src_dir, sizeof(path) - 1);
     strncat(path, PATHSEPSTRING, sizeof(path) - 1);
     strncat(path, rel_path, sizeof(path) - 1);
 
-    void * handle = NULL;
 #if defined(_OS_WINDOWS_)
     wchar_t wpath[2*PATH_MAX + 1] = {0};
     if (!utf8_to_wchar(path, wpath, 2*PATH_MAX)) {
@@ -45,10 +62,12 @@ static void * load_library(const char * rel_path, const char * src_dir) {
     }
     handle = (void *)LoadLibraryExW(wpath, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
 #else
-    handle = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
+    handle = dlopen(path, RTLD_NOW | (err ? RTLD_GLOBAL : RTLD_LOCAL));
 #endif
 
     if (handle == NULL) {
+        if (!err)
+            return NULL;
         jl_loader_print_stderr3("ERROR: Unable to load dependent library ", path, "\n");
 #if defined(_OS_WINDOWS_)
         LPWSTR wmsg = TEXT("");
@@ -141,32 +160,80 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
     // Pre-load libraries that libjulia-internal needs.
     int deps_len = strlen(dep_libs);
     char * curr_dep = &dep_libs[0];
+
+    // We keep track of "special" libraries names (ones whose name is prefixed with `@`)
+    // which are libraries that we want to load in some special, custom way, such as
+    // `libjulia-internal` or `libjulia-codegen`.
+    int special_idx = 0;
+    char * special_library_names[2] = {NULL};
     while (1) {
-        // try to find next colon character, if we can't, escape out.
+        // try to find next colon character; if we can't, break out
         char * colon = strchr(curr_dep, ':');
         if (colon == NULL)
             break;
 
-        // Chop the string at the colon, load this library.
+        // Chop the string at the colon so it's a valid-ending-string
         *colon = '\0';
-        load_library(curr_dep, lib_dir);
+
+        // If this library name starts with `@`, don't open it here (but mark it as special)
+        if (curr_dep[0] == '@') {
+            if (special_idx > sizeof(special_library_names)/sizeof(char *)) {
+                jl_loader_print_stderr("ERROR: Too many special library names specified, check LOADER_BUILD_DEP_LIBS and friends!\n");
+                exit(1);
+            }
+            special_library_names[special_idx] = curr_dep + 1;
+            special_idx += 1;
+        } else {
+            load_library(curr_dep, lib_dir, 1);
+        }
 
         // Skip ahead to next dependency
         curr_dep = colon + 1;
     }
 
-    // Last dependency is `libjulia-internal`, so load that and we're done with `dep_libs`!
-    libjulia_internal = load_library(curr_dep, lib_dir);
+    if (special_idx != sizeof(special_library_names)/sizeof(char *)) {
+        jl_loader_print_stderr("ERROR: Too few special library names specified, check LOADER_BUILD_DEP_LIBS and friends!\n");
+        exit(1);
+    }
+
+    // Unpack our special library names.  This is why ordering of library names matters.
+    libjulia_internal = load_library(special_library_names[0], lib_dir, 1);
+    void *libjulia_codegen = load_library(special_library_names[1], lib_dir, 0);
+    const char * const * codegen_func_names;
+    if (libjulia_codegen == NULL) {
+        // if codegen is not available, use fallback implementation in libjulia-internal
+        libjulia_codegen = libjulia_internal;
+        codegen_func_names = jl_codegen_fallback_func_names;
+    }
+    else {
+        codegen_func_names = jl_codegen_exported_func_names;
+    }
 
     // Once we have libjulia-internal loaded, re-export its symbols:
-    for (unsigned int symbol_idx=0; jl_exported_func_names[symbol_idx] != NULL; ++symbol_idx) {
-        void *addr = lookup_symbol(libjulia_internal, jl_exported_func_names[symbol_idx]);
-        if (addr == NULL || addr == *jl_exported_func_addrs[symbol_idx]) {
-            jl_loader_print_stderr3("ERROR: Unable to load ", jl_exported_func_names[symbol_idx], " from libjulia-internal");
+    for (unsigned int symbol_idx=0; jl_runtime_exported_func_names[symbol_idx] != NULL; ++symbol_idx) {
+        void *addr = lookup_symbol(libjulia_internal, jl_runtime_exported_func_names[symbol_idx]);
+        if (addr == NULL) {
+            jl_loader_print_stderr3("ERROR: Unable to load ", jl_runtime_exported_func_names[symbol_idx], " from libjulia-internal\n");
             exit(1);
         }
-        (*jl_exported_func_addrs[symbol_idx]) = addr;
+        (*jl_runtime_exported_func_addrs[symbol_idx]) = addr;
     }
+    // jl_options must be initialized very early, in case an embedder sets some
+    // values there before calling jl_init
+    ((void (*)())jl_init_options_addr)();
+
+    for (unsigned int symbol_idx=0; codegen_func_names[symbol_idx] != NULL; ++symbol_idx) {
+        void *addr = lookup_symbol(libjulia_codegen, codegen_func_names[symbol_idx]);
+        if (addr == NULL) {
+            jl_loader_print_stderr3("ERROR: Unable to load ", codegen_func_names[symbol_idx], " from libjulia-codegen\n");
+            exit(1);
+        }
+        (*jl_codegen_exported_func_addrs[symbol_idx]) = addr;
+    }
+
+    // jl_options must be initialized very early, in case an embedder sets some
+    // values there before calling jl_init
+    ((void (*)(void))jl_init_options_addr)();
 }
 
 // Load libjulia and run the REPL with the given arguments (in UTF-8 format)
diff --git a/cli/loader_win_utils.c b/cli/loader_win_utils.c
index 46b07cb1796c7..621834a030c52 100644
--- a/cli/loader_win_utils.c
+++ b/cli/loader_win_utils.c
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 // Workarounds for compiling via mingw without using libgcc_s
 typedef struct {
     HANDLE fd;
diff --git a/cli/trampolines/common.h b/cli/trampolines/common.h
index 06d7b9e236971..00d703c341515 100644
--- a/cli/trampolines/common.h
+++ b/cli/trampolines/common.h
@@ -1,3 +1,7 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "../../src/support/platform.h"
+
 // Preprocessor annoyances
 #define CONCAT_(x,y)    x##y
 #define CONCAT(x,y)     CONCAT_(x, y)
diff --git a/cli/trampolines/trampolines_aarch64.S b/cli/trampolines/trampolines_aarch64.S
index bffeab76c1763..2d87ae6dcdb1c 100644
--- a/cli/trampolines/trampolines_aarch64.S
+++ b/cli/trampolines/trampolines_aarch64.S
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
@@ -11,5 +13,9 @@ CNAME(name)##: SEP \
     br x16 SEP \
 .cfi_endproc SEP \
 
-JL_EXPORTED_FUNCS(XX)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
 #undef XX
diff --git a/cli/trampolines/trampolines_arm.S b/cli/trampolines/trampolines_arm.S
index f99b7820360b2..5ce6617f3f04e 100644
--- a/cli/trampolines/trampolines_arm.S
+++ b/cli/trampolines/trampolines_arm.S
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
@@ -14,5 +16,9 @@ CONCAT(.L,CNAMEADDR(name))##: ; \
     .word CNAMEADDR(name)##-(CONCAT(.L,CNAME(name)) + 8); \
 .cfi_endproc; \
 
-JL_EXPORTED_FUNCS(XX)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
 #undef XX
diff --git a/cli/trampolines/trampolines_i686.S b/cli/trampolines/trampolines_i686.S
index f27949afa47b8..3d9cacf0ce652 100644
--- a/cli/trampolines/trampolines_i686.S
+++ b/cli/trampolines/trampolines_i686.S
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
@@ -12,5 +14,9 @@ CNAME(name)##:; \
 .cfi_endproc; \
 EXPORT(name); \
 
-JL_EXPORTED_FUNCS(XX)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
 #undef XX
diff --git a/cli/trampolines/trampolines_powerpc64le.S b/cli/trampolines/trampolines_powerpc64le.S
index cd64f656362d0..8b32ef91d2464 100644
--- a/cli/trampolines/trampolines_powerpc64le.S
+++ b/cli/trampolines/trampolines_powerpc64le.S
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
@@ -22,5 +24,6 @@ CNAME(name)##: ; \
 .cfi_endproc; \
 .size CNAME(name)##,.-CNAME(name)##; \
 
-JL_EXPORTED_FUNCS(XX)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+JL_CODEGEN_EXPORTED_FUNCS(XX)
 #undef XX
diff --git a/cli/trampolines/trampolines_x86_64.S b/cli/trampolines/trampolines_x86_64.S
index e06434cf540e5..3b800da56eee1 100644
--- a/cli/trampolines/trampolines_x86_64.S
+++ b/cli/trampolines/trampolines_x86_64.S
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
@@ -16,5 +18,9 @@ SEH_END(); \
 .cfi_endproc; \
 EXPORT(name); \
 
-JL_EXPORTED_FUNCS(XX)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
 #undef XX
diff --git a/contrib/README.md b/contrib/README.md
index 1f6304706548e..62eca671dc38e 100644
--- a/contrib/README.md
+++ b/contrib/README.md
@@ -23,3 +23,4 @@ Debugging
 | Name                           |  Description                                                |
 | ------------------------------ | ----------------------------------------------------------- |
 |[ debug_bootstrap.gdb ](https://github.com/JuliaLang/julia/blob/master/contrib/debug_bootstrap.gdb) | Bootstrap process using the debug build |
+|[ valgrind-julia.supp ](https://github.com/JuliaLang/julia/blob/master/contrib/valgrind-julia.supp) | Suppressions for Valgrind debugging tool |
diff --git a/contrib/add_license_to_files.jl b/contrib/add_license_to_files.jl
index c5aa0f49d99d3..9650422ee07ca 100644
--- a/contrib/add_license_to_files.jl
+++ b/contrib/add_license_to_files.jl
@@ -15,6 +15,7 @@ const print_result = true  # prints files which where not processed.
 
 const rootdirs = [
     "../base",
+    "../cli",
     "../contrib",
     "../src",
     "../stdlib",
@@ -31,6 +32,7 @@ const excludedirs = [
 
 const skipfiles = [
     "../contrib/add_license_to_files.jl",
+    "../contrib/asan/check.jl",
     # files to check - already copyright
     # see: https://github.com/JuliaLang/julia/pull/11073#issuecomment-98099389
     "../base/special/trig.jl",
@@ -44,11 +46,10 @@ const skipfiles = [
     "../src/abi_x86.cpp",
     "../src/abi_x86_64.cpp",
     "../src/disasm.cpp",
-    "../src/getopt.c",
-    "../src/getopt.h",
     "../src/support/END.h",
     "../src/support/ENTRY.amd64.h",
     "../src/support/ENTRY.i387.h",
+    "../src/support/_setjmp.win32.S",
     "../src/support/MurmurHash3.c",
     "../src/support/MurmurHash3.h",
     "../src/support/asprintf.c",
@@ -66,6 +67,7 @@ const ext_prefix = Dict([
     (".h", "// "),
     (".c", "// "),
     (".cpp", "// "),
+    (".S", "// "),
 ])
 
 const new_license = "This file is a part of Julia. License is MIT: https://julialang.org/license"
@@ -104,6 +106,7 @@ function getfilespaths!(filepaths::Vector, rootdir::AbstractString)
     abs_rootdir = abspath(rootdir)
     for name in readdir(abs_rootdir)
         path = joinpath(abs_rootdir, name)
+        islink(path) && continue
         if isdir(path)
             getfilespaths!(filepaths, path)
         else
@@ -118,6 +121,7 @@ function add_license_line!(unprocessed::Vector, src::AbstractString, new_license
 
     for name in readdir(src)
         path = normpath(joinpath(src, name))
+        islink(path) && continue
         if isdir(path)
             if path in abs_excludedirs
                 getfilespaths!(unprocessed, path)
diff --git a/contrib/asan/Make.user.asan b/contrib/asan/Make.user.asan
index 3bcc34df68323..28e56990ceb5e 100644
--- a/contrib/asan/Make.user.asan
+++ b/contrib/asan/Make.user.asan
@@ -1,10 +1,12 @@
-TOOLCHAIN=$(BUILDROOT)/../toolchain/usr/tools
+TOOLCHAIN=$(BUILDROOT)/../toolchain
+BINDIR=$(TOOLCHAIN)/usr/bin
+TOOLDIR=$(TOOLCHAIN)/usr/tools
 
 # use our new toolchain
 USECLANG=1
-override CC=$(TOOLCHAIN)/clang
-override CXX=$(TOOLCHAIN)/clang++
-export ASAN_SYMBOLIZER_PATH=$(TOOLCHAIN)/llvm-symbolizer
+override CC=$(TOOLDIR)/clang
+override CXX=$(TOOLDIR)/clang++
+export ASAN_SYMBOLIZER_PATH=$(TOOLDIR)/llvm-symbolizer
 
 USE_BINARYBUILDER_LLVM=1
 
@@ -19,6 +21,3 @@ override JULIA_BUILD_MODE=debug
 
 # make ASAN consume less memory
 export ASAN_OPTIONS=detect_leaks=0:fast_unwind_on_malloc=0:allow_user_segv_handler=1:malloc_context_size=2
-
-# tell libblastrampoline to not use RTLD_DEEPBIND
-export LBT_USE_RTLD_DEEPBIND=0
diff --git a/contrib/asan/build.sh b/contrib/asan/build.sh
index d124e0a92f1e0..77f3078b35c42 100755
--- a/contrib/asan/build.sh
+++ b/contrib/asan/build.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+
 #
 # Usage:
 #     contrib/asan/build.sh <path> [<make_targets>...]
diff --git a/contrib/asan/check.jl b/contrib/asan/check.jl
index 0c1e12f7f471a..2933aaf3fb4e3 100755
--- a/contrib/asan/check.jl
+++ b/contrib/asan/check.jl
@@ -35,12 +35,7 @@ function main(args = ARGS)::Int
     timeout = Threads.Atomic{Bool}(false)
     isstarted = false
     mktemp() do tmppath, tmpio
-        cmd = addenv(
-            `$julia -e $code $tmppath`,
-            "ASAN_OPTIONS" =>
-                "detect_leaks=0:fast_unwind_on_malloc=0:allow_user_segv_handler=1:malloc_context_size=2",
-            "LBT_USE_RTLD_DEEPBIND" => "0",
-        )
+        cmd = `$julia -e $code $tmppath`
         # Note: Ideally, we set ASAN_SYMBOLIZER_PATH here. But there is no easy
         # way to find out the path from just a Julia binary.
 
diff --git a/contrib/bpftrace/gc_all.bt b/contrib/bpftrace/gc_all.bt
new file mode 100755
index 0000000000000..f78e8f3aa607d
--- /dev/null
+++ b/contrib/bpftrace/gc_all.bt
@@ -0,0 +1,44 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    $now = nsecs;
+    @time[pid] = $now;
+    @start[pid] = $now;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
+/@start[pid]/
+{
+    $now = nsecs;
+    @stop_the_world_usecs[pid] = hist(($now - @time[pid]) / 1000);
+    @time[pid] = $now;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__end
+/@start[pid]/
+{
+    $now = nsecs;
+    @gc_total_usecs[pid] = hist(($now - @start[pid]) / 1000);
+    @gc_phase_usecs[pid] = hist(($now - @time[pid]) / 1000);
+    @time[pid] = $now;
+    delete(@start[pid]);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__finalizer
+/@time[pid]/
+{
+    @finalizer[pid] = hist((nsecs - @time[pid]) / 1000);
+    delete(@time[pid]);
+}
+
+END
+{
+    clear(@start);
+    clear(@time);
+}
diff --git a/contrib/bpftrace/gc_simple.bt b/contrib/bpftrace/gc_simple.bt
new file mode 100755
index 0000000000000..559f41c41cf72
--- /dev/null
+++ b/contrib/bpftrace/gc_simple.bt
@@ -0,0 +1,23 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    @start[pid] = nsecs;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__end
+/@start[pid]/
+{
+    @usecs[pid] = hist((nsecs - @start[pid]) / 1000);
+    delete(@start[pid]);
+}
+
+END
+{
+    clear(@start);
+}
diff --git a/contrib/bpftrace/gc_stop_the_world_latency.bt b/contrib/bpftrace/gc_stop_the_world_latency.bt
new file mode 100755
index 0000000000000..8e541bcb421e2
--- /dev/null
+++ b/contrib/bpftrace/gc_stop_the_world_latency.bt
@@ -0,0 +1,23 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    @start[pid] = nsecs;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
+/@start[pid]/
+{
+    @usecs[pid] = hist((nsecs - @start[pid]) / 1000);
+    delete(@start[pid]);
+}
+
+END
+{
+    clear(@start);
+}
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index b5ded199688ee..3c31c4c118c00 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-if isempty(Base.ARGS) || Base.ARGS[1] !== "0"
+if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0"
 Sys.__init_build()
 # Prevent this from being put into the Main namespace
 @eval Module() begin
@@ -32,9 +32,7 @@ precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}})
 precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
 precompile(Tuple{typeof(getindex), Core.SimpleVector, Int})
 precompile(Tuple{typeof(Base.Experimental.register_error_hint), Any, Type})
-precompile(Tuple{typeof(Base.display_error), MethodError, Vector{Union{Ptr{Nothing}, Base.InterpreterIP}}})
-precompile(Tuple{typeof(Base.display_error), ErrorException})
-precompile(Tuple{typeof(Base.display_error), BoundsError})
+precompile(Tuple{typeof(Base.display_error), Base.ExceptionStack})
 precompile(Tuple{Core.kwftype(typeof(Type)), NamedTuple{(:sizehint,), Tuple{Int}}, Type{IOBuffer}})
 precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel, String, Module))
 precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel, Symbol, Module))
@@ -188,7 +186,7 @@ Test = get(Base.loaded_modules,
 if Test !== nothing
     hardcoded_precompile_statements *= """
     precompile(Tuple{typeof(Test.do_test), Test.ExecutionResult, Any})
-    precompile(Tuple{typeof(Test.testset_beginend), Tuple{String, Expr}, Expr, LineNumberNode})
+    precompile(Tuple{typeof(Test.testset_beginend_call), Tuple{String, Expr}, Expr, LineNumberNode})
     precompile(Tuple{Type{Test.DefaultTestSet}, String})
     precompile(Tuple{Type{Test.DefaultTestSet}, AbstractString})
     precompile(Tuple{Core.kwftype(Type{Test.DefaultTestSet}), Any, Type{Test.DefaultTestSet}, AbstractString})
@@ -222,7 +220,11 @@ Profile = get(Base.loaded_modules,
           nothing)
 if Profile !== nothing
     hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
     """
 end
 
@@ -250,16 +252,20 @@ function generate_precompile_statements()
               module $pkgname
               end
               """)
-        tmp = tempname()
+        tmp_prec = tempname()
+        tmp_proc = tempname()
         s = """
             pushfirst!(DEPOT_PATH, $(repr(prec_path)));
-            Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp));
+            Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp_prec));
             Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
             $precompile_script
             """
-        run(`$(julia_exepath()) -O0 --sysimage $sysimg --startup-file=no -Cnative -e $s`)
-        for statement in split(read(tmp, String), '\n')
-            push!(statements, statement)
+        run(`$(julia_exepath()) -O0 --sysimage $sysimg --trace-compile=$tmp_proc --startup-file=no -Cnative -e $s`)
+        for f in (tmp_prec, tmp_proc)
+            for statement in split(read(f, String), '\n')
+                occursin("Main.", statement) && continue
+                push!(statements, statement)
+            end
         end
     end
 
@@ -373,7 +379,7 @@ function generate_precompile_statements()
                 # XXX: precompile doesn't currently handle overloaded Vararg arguments very well.
                 # Replacing N with a large number works around it.
                 l = l.args[end]
-                if isexpr(l, :curly) && length(l.args) == 2 && l.args[1] == :Vararg # Vararg{T}
+                if isexpr(l, :curly) && length(l.args) == 2 && l.args[1] === :Vararg # Vararg{T}
                     push!(l.args, 100) # form Vararg{T, 100} instead
                 end
             end
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index d69e09aba0581..ad275c078c49c 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -11,8 +11,6 @@ const options = [
     "--framework"
 ];
 
-threadingOn() = ccall(:jl_threading_enabled, Cint, ()) != 0
-
 function shell_escape(str)
     str = replace(str, "'" => "'\''")
     return "'$str'"
diff --git a/contrib/mac/app/startup.applescript b/contrib/mac/app/startup.applescript
index f02830a3902dc..9964049f34ed6 100644
--- a/contrib/mac/app/startup.applescript
+++ b/contrib/mac/app/startup.applescript
@@ -1,5 +1,4 @@
-set RootPath to POSIX path of (path to me)
-tell application id "com.apple.terminal"
-  do script ("exec '" & RootPath & "Contents/Resources/julia/bin/julia'")
-  activate
-end tell
+set RootPath to (path to me)
+set JuliaPath to POSIX path of ((RootPath as text) & "Contents:Resources:julia:bin:julia")
+set JuliaFile to POSIX file JuliaPath
+tell application id "com.apple.finder" to open JuliaFile
diff --git a/contrib/new-stdlib.sh b/contrib/new-stdlib.sh
index 323dc11271da6..15f82cffb1c46 100755
--- a/contrib/new-stdlib.sh
+++ b/contrib/new-stdlib.sh
@@ -50,14 +50,14 @@ UNAME=$(echo "$NAME" | tr [a-z] [A-Z])
 
 sed -e "/^STDLIBS_EXT =/,/^\$/s!^\$!\\
 STDLIBS_EXT += $NAME\\
-${UNAME}_GIT_URL := git://github.com/$USER/$NAME.jl.git\\
-${UNAME}_TAR_URL = https://api.github.com/repos/$USER/$NAME.jl/tarball/\$1\\
 !" "$ROOT/Makefile" >"$ROOT/Makefile.tmp"
 mv "$ROOT/Makefile.tmp" "$ROOT/Makefile"
 
 cat >"$ROOT/$NAME.version" <<EOF
 ${UNAME}_BRANCH = master
 ${UNAME}_SHA1 = $SHA1
+${UNAME}_GIT_URL := https://github.com/$USER/$NAME.jl.git
+${UNAME}_TAR_URL = https://api.github.com/repos/$USER/$NAME.jl/tarball/\$1
 EOF
 
 git add "$ROOT/$NAME.version"
diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk
index 1ccbb16a9ba4f..898bd5841ee82 100644
--- a/contrib/refresh_checksums.mk
+++ b/contrib/refresh_checksums.mk
@@ -8,6 +8,11 @@
 SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 JULIAHOME := $(abspath $(SRCDIR)/..)
 
+# force a sane / stable configuration
+export LC_ALL=C
+export LANG=C
+.SUFFIXES:
+
 # Default target that will have everything else added to it as a dependency
 all: checksum pack-checksum
 
@@ -46,7 +51,7 @@ endef
 # note that `"src"` is a special triplet value.
 # if $(3) is "assert", we set BINARYBUILDER_LLVM_ASSERTS=1
 define checksum_dep
-checksum-$(1)-$(2)-$(3):
+checksum-$(1)-$(2)-$(3): clean-$(1)
 	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/deps" $(call make_flags,$(1),$(2),$(3)) checksum-$(1)
 .PHONY: checksum-$(1)-$(2)-$(3)
 
@@ -91,39 +96,42 @@ checksum-doc-unicodedata:
 all: checksum-doc-unicodedata
 .PHONY: checksum-doc-unicodedata
 
-# Special LLVM source hashes for optional targets
-checksum-llvm-special-src:
-	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/deps" USE_BINARYBUILDER_LLVM=0 DEPS_GIT=0 BUILD_LLDB=1 BUILD_LLVM_CLANG=1 BUILD_CUSTOM_LIBCXX=1 USECLANG=1 checksum-llvm
-all: checksum-llvm-special-src
-.PHONY: checksum-llvm-special-src
-
 # merge substring project names to avoid races
 pack-checksum-llvm-tools: | pack-checksum-llvm
+	@# nothing to do but disable the prefix rule
 pack-checksum-llvm: | checksum-llvm-tools
 pack-checksum-csl: | pack-checksum-compilersupportlibraries
+	@# nothing to do but disable the prefix rule
 pack-checksum-compilersupportlibraries: | checksum-csl
-
-# We need to adjust to the fact that the checksum files are called `suitesparse`
 pack-checksum-libsuitesparse: | pack-checksum-suitesparse
 	@# nothing to do but disable the prefix rule
 pack-checksum-suitesparse: | checksum-libsuitesparse
+# This is a bit tricky: we want llvmunwind to be separate from unwind and llvm,
+# so we add a rule to process those first
+pack-checksum-llvm pack-checksum-unwind: | pack-checksum-llvmunwind
+# and the name for LLVMLibUnwind is awkward, so handle that with a regex
+pack-checksum-llvmunwind: | pack-checksum-llvm.*unwind
+	cd "$(JULIAHOME)/deps/checksums" && mv 'llvm.*unwind' llvmunwind
+
+clean-%: FORCE
+	-rm "$(JULIAHOME)/deps/checksums"/'$*'
 
 # define how to pack parallel checksums into a single file format
 pack-checksum-%: FORCE
-	@echo making "$(JULIAHOME)/deps/checksums/$*"
+	@echo making "$(JULIAHOME)/deps/checksums/"'$*'
 	@cd "$(JULIAHOME)/deps/checksums" && \
 		for each in $$(ls | grep -i '$*'); do \
-			if [ -d $$each ]; then \
-				for type in $$(ls $$each); do \
-					echo $$each/$$type/$$(cat $$each/$$type); \
-					rm $$each/$$type; \
+			if [ -d "$$each" ]; then \
+				for type in $$(ls "$$each"); do \
+					echo "$$each"/"$$type"/$$(cat "$$each"/"$$type"); \
+					rm "$$each"/"$$type"; \
 				done; \
-				rmdir $$each; \
+				rmdir "$$each"; \
 			fi; \
-		done >> $*
+		done >> '$*'
 	@cd "$(JULIAHOME)/deps/checksums" && \
-		sort $* > $*.tmp && \
-		mv $*.tmp $*
+		sort '$*' > '$*.tmp' && \
+		mv '$*.tmp' '$*'
 
 # This file is completely phony
 FORCE:
diff --git a/contrib/tsan/Make.user.tsan b/contrib/tsan/Make.user.tsan
new file mode 100644
index 0000000000000..01c9874a85182
--- /dev/null
+++ b/contrib/tsan/Make.user.tsan
@@ -0,0 +1,16 @@
+TOOLCHAIN=$(BUILDROOT)/../toolchain
+BINDIR=$(TOOLCHAIN)/usr/bin
+TOOLDIR=$(TOOLCHAIN)/usr/tools
+
+# use our new toolchain
+USECLANG=1
+override CC=$(TOOLDIR)/clang
+override CXX=$(TOOLDIR)/clang++
+
+USE_BINARYBUILDER_LLVM=1
+
+override SANITIZE=1
+override SANITIZE_THREAD=1
+
+# default to a debug build for better line number reporting
+override JULIA_BUILD_MODE=debug
diff --git a/contrib/tsan/build.sh b/contrib/tsan/build.sh
new file mode 100755
index 0000000000000..2c4ba3b1bde95
--- /dev/null
+++ b/contrib/tsan/build.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#
+# Usage:
+#     contrib/tsan/build.sh <path> [<make_targets>...]
+#
+# Build TSAN-enabled julia.  Given a workspace directory <path>, build
+# TSAN-enabled julia in <path>/tsan.  Required toolss are install under
+# <path>/toolchain.  Note that the same <path> passed to `contrib/asan/build.sh`
+# can be used to share the toolchain used for ASAN.  This scripts also takes
+# optional <make_targets> arguments which are passed to `make`.  The default
+# make target is `debug`.
+
+set -ue
+
+# `$WORKSPACE` is a directory in which we create `toolchain` and `tsan`
+# sub-directories.
+WORKSPACE="$1"
+shift
+if [ "$WORKSPACE" = "" ]; then
+    echo "Workspace directory must be specified as the first argument" >&2
+    exit 2
+fi
+
+mkdir -pv "$WORKSPACE"
+WORKSPACE="$(cd "$WORKSPACE" && pwd)"
+if [ "$WORKSPACE" = "" ]; then
+    echo "Failed to create the workspace directory." >&2
+    exit 2
+fi
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+JULIA_HOME="$HERE/../../"
+
+echo
+echo "Installing toolchain..."
+
+TOOLCHAIN="$WORKSPACE/toolchain"
+if [ ! -d "$TOOLCHAIN" ]; then
+    make -C "$JULIA_HOME" configure O=$TOOLCHAIN
+    cp "$HERE/../asan/Make.user.tools"  "$TOOLCHAIN/Make.user"
+fi
+
+make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
+
+echo
+echo "Building Julia..."
+
+BUILD="$WORKSPACE/tsan"
+if [ ! -d "$BUILD" ]; then
+    make -C "$JULIA_HOME" configure O="$BUILD"
+    cp "$HERE/Make.user.tsan"  "$BUILD/Make.user"
+fi
+
+cd "$BUILD"  # so that we can pass `-C src` to `make`
+make "$@"
diff --git a/contrib/valgrind-julia.supp b/contrib/valgrind-julia.supp
new file mode 100644
index 0000000000000..408a48a2893cc
--- /dev/null
+++ b/contrib/valgrind-julia.supp
@@ -0,0 +1,10 @@
+# https://github.com/JuliaLang/julia/issues/4533
+{
+   msync unwind
+   Memcheck:Param
+   write(buf)
+   ...
+   fun:validate_mem
+   ...
+   fun:rec_backtrace
+}
diff --git a/deps/Makefile b/deps/Makefile
index 27b93f444580f..06897fc316493 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -39,8 +39,9 @@ unexport CONFIG_SITE
 
 DEP_LIBS :=
 
-# Always use libblastrampoline
+ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0)
 DEP_LIBS += blastrampoline
+endif
 
 ifeq ($(USE_SYSTEM_CSL), 0)
 DEP_LIBS += csl
@@ -158,9 +159,11 @@ DEP_LIBS += lapack
 endif
 endif
 
+ifeq ($(USE_SYSTEM_LIBWHICH), 0)
 ifneq ($(OS), WINNT)
 DEP_LIBS += libwhich
 endif
+endif
 
 # list all targets
 DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
diff --git a/deps/Versions.make b/deps/Versions.make
index 770828e28d93c..3fb5f2f9a2add 100644
--- a/deps/Versions.make
+++ b/deps/Versions.make
@@ -15,7 +15,8 @@ CSL_JLL_NAME := CompilerSupportLibraries
 
 # Clang (paired with LLVM, only here as a JLL download)
 CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 12.0.0+1
+# equivalent to libLLVM 12.0.1+4
+CLANG_JLL_VER  := 12.0.1+3
 
 # DSFMT
 DSFMT_VER := 2.2.4
@@ -44,17 +45,18 @@ LIBUV_VER := 2
 LIBUV_JLL_NAME := LibUV
 
 # LLVM
-LLVM_VER := 12.0.0
-LLVM_ASSERT_JLL_VER := 12.0.0+1
+LLVM_VER := 12.0.1
+LLVM_ASSERT_JLL_VER := 12.0.1+4
 LLVM_JLL_NAME := libLLVM
 
 # LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
 LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 12.0.0+1
-LLVM_TOOLS_ASSERT_JLL_VER := 12.0.0+1
+# equivalent to libLLVM 12.0.1+4
+LLVM_TOOLS_JLL_VER := 12.0.1+3
+LLVM_TOOLS_ASSERT_JLL_VER := 12.0.1+3
 
 # LLVM libunwind
-LLVMUNWIND_VER := 11.0.1
+LLVMUNWIND_VER := 12.0.1
 LLVMUNWIND_JLL_NAME := LLVMLibUnwind
 
 # MbedTLS
@@ -79,7 +81,7 @@ BLASTRAMPOLINE_VER := 3.0.4
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 # OpenBLAS
-OPENBLAS_VER := 0.3.13
+OPENBLAS_VER := 0.3.17
 OPENBLAS_JLL_NAME := OpenBLAS
 
 # OpenLibm
@@ -87,7 +89,7 @@ OPENLIBM_VER := 0.7.3
 OPENLIBM_JLL_NAME := OpenLibm
 
 # Patchelf (we don't ship this or even use a JLL, we just always build it)
-PATCHELF_VER := 0.9
+PATCHELF_VER := 0.13
 
 # p7zip
 P7ZIP_VER := 16.2.0
@@ -104,7 +106,7 @@ LIBSUITESPARSE_JLL_NAME := SuiteSparse
 # unwind
 UNWIND_VER := 1.3.2
 UNWIND_JLL_NAME := LibUnwind
-UNWIND_JLL_VER  := 1.3.2+4
+UNWIND_JLL_VER  := 1.3.2+6
 
 # zlib
 ZLIB_VER := 1.2.11
diff --git a/deps/blastrampoline.mk b/deps/blastrampoline.mk
index 1ba1d1cdada8d..a29b9b19e0eaa 100644
--- a/deps/blastrampoline.mk
+++ b/deps/blastrampoline.mk
@@ -2,8 +2,8 @@
 
 ifneq ($(USE_BINARYBUILDER_BLASTRAMPOLINE),1)
 
-BLASTRAMPOLINE_GIT_URL := git://github.com/staticfloat/libblastrampoline.git
-BLASTRAMPOLINE_TAR_URL = https://api.github.com/repos/staticfloat/libblastrampoline/tarball/$1
+BLASTRAMPOLINE_GIT_URL := https://github.com/JuliaLinearAlgebra/libblastrampoline.git
+BLASTRAMPOLINE_TAR_URL = https://api.github.com/repos/JuliaLinearAlgebra/libblastrampoline/tarball/$1
 $(eval $(call git-external,blastrampoline,BLASTRAMPOLINE,,,$(BUILDDIR)))
 
 $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/source-extracted
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
new file mode 100644
index 0000000000000..915ee5c4bb6bf
--- /dev/null
+++ b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
@@ -0,0 +1 @@
+22c097ca7784442f1f10733db7961cc3
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
new file mode 100644
index 0000000000000..b824dbcb73a08
--- /dev/null
+++ b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
@@ -0,0 +1 @@
+915791ab9837f09db428060bd128e182dda38c8dc10e13f32f059eb8e8b477548e8ae2cd691522f98c88c510b78b2693018264b62d9cc76d5005ea8104d1539a
diff --git a/deps/checksums/ArgTools-fa878696ff2ae4ba7ca9942bf9544556c0d86ce4.tar.gz/md5 b/deps/checksums/ArgTools-fa878696ff2ae4ba7ca9942bf9544556c0d86ce4.tar.gz/md5
deleted file mode 100644
index 916cfb7a6f0ce..0000000000000
--- a/deps/checksums/ArgTools-fa878696ff2ae4ba7ca9942bf9544556c0d86ce4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-25b06e0ff130621e94e7f0be85455c55
diff --git a/deps/checksums/ArgTools-fa878696ff2ae4ba7ca9942bf9544556c0d86ce4.tar.gz/sha512 b/deps/checksums/ArgTools-fa878696ff2ae4ba7ca9942bf9544556c0d86ce4.tar.gz/sha512
deleted file mode 100644
index 4dbab16a706d4..0000000000000
--- a/deps/checksums/ArgTools-fa878696ff2ae4ba7ca9942bf9544556c0d86ce4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c7a9e78215e362c5c2cf8ca1bbbc4d1855c49d97ddc77a6872d53daaab60a8364fbff90d0585f78b24e353c15807b3b31f239e3785b8c7b3307ba746cebd2706
diff --git a/deps/checksums/Downloads-a55825b0815fb22d0e3fe47903a180e9ffa23eab.tar.gz/md5 b/deps/checksums/Downloads-a55825b0815fb22d0e3fe47903a180e9ffa23eab.tar.gz/md5
new file mode 100644
index 0000000000000..9995f34189007
--- /dev/null
+++ b/deps/checksums/Downloads-a55825b0815fb22d0e3fe47903a180e9ffa23eab.tar.gz/md5
@@ -0,0 +1 @@
+f47e90ca5382668912a52ddfd7906cc5
diff --git a/deps/checksums/Downloads-a55825b0815fb22d0e3fe47903a180e9ffa23eab.tar.gz/sha512 b/deps/checksums/Downloads-a55825b0815fb22d0e3fe47903a180e9ffa23eab.tar.gz/sha512
new file mode 100644
index 0000000000000..92f88e492ffbe
--- /dev/null
+++ b/deps/checksums/Downloads-a55825b0815fb22d0e3fe47903a180e9ffa23eab.tar.gz/sha512
@@ -0,0 +1 @@
+7921af23c5d88edb65b586b505936454c0f0a5c1d58b3983716e216a014eabd26f4cff0e85284c9e79e4ed35dfeb006743eabdacb3521d24d2c49899068e7980
diff --git a/deps/checksums/Downloads-cd002c3c6936d144ae668d70e18337931706c63a.tar.gz/md5 b/deps/checksums/Downloads-cd002c3c6936d144ae668d70e18337931706c63a.tar.gz/md5
deleted file mode 100644
index 8b51d12bf9793..0000000000000
--- a/deps/checksums/Downloads-cd002c3c6936d144ae668d70e18337931706c63a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-53ebf53c712444f7a7e4b2b4507ed675
diff --git a/deps/checksums/Downloads-cd002c3c6936d144ae668d70e18337931706c63a.tar.gz/sha512 b/deps/checksums/Downloads-cd002c3c6936d144ae668d70e18337931706c63a.tar.gz/sha512
deleted file mode 100644
index 05665fb161f20..0000000000000
--- a/deps/checksums/Downloads-cd002c3c6936d144ae668d70e18337931706c63a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3d058995add04e2cf37b61e2c39b1606c63e50edee6d7945b59078df3f05b39b86333d03e48832871101793342182055cb604a7b0565d82b2740dedac4248461
diff --git a/deps/checksums/NetworkOptions-01e6ec17aa4ef74b4a0ea19c193dacf8d2cfc353.tar.gz/md5 b/deps/checksums/NetworkOptions-01e6ec17aa4ef74b4a0ea19c193dacf8d2cfc353.tar.gz/md5
new file mode 100644
index 0000000000000..385b70d612e3d
--- /dev/null
+++ b/deps/checksums/NetworkOptions-01e6ec17aa4ef74b4a0ea19c193dacf8d2cfc353.tar.gz/md5
@@ -0,0 +1 @@
+38005620dd59d364a3705127a2548b27
diff --git a/deps/checksums/NetworkOptions-01e6ec17aa4ef74b4a0ea19c193dacf8d2cfc353.tar.gz/sha512 b/deps/checksums/NetworkOptions-01e6ec17aa4ef74b4a0ea19c193dacf8d2cfc353.tar.gz/sha512
new file mode 100644
index 0000000000000..756079a989580
--- /dev/null
+++ b/deps/checksums/NetworkOptions-01e6ec17aa4ef74b4a0ea19c193dacf8d2cfc353.tar.gz/sha512
@@ -0,0 +1 @@
+d7247ad3285d768514683693edea3ca47ad22fc3ad6c1fcd5fbc42a38ee31e44df0c33cae9ba8448f7c4cff59c52fb8f401163716f46fb0c53b2a7959ae4d885
diff --git a/deps/checksums/NetworkOptions-6cea813ca841c400627b6cf4d4f4ec2599965ab6.tar.gz/md5 b/deps/checksums/NetworkOptions-6cea813ca841c400627b6cf4d4f4ec2599965ab6.tar.gz/md5
deleted file mode 100644
index ab9875abdaa72..0000000000000
--- a/deps/checksums/NetworkOptions-6cea813ca841c400627b6cf4d4f4ec2599965ab6.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6cfa681fc6a1c16d3dd3620c3b55d178
diff --git a/deps/checksums/NetworkOptions-6cea813ca841c400627b6cf4d4f4ec2599965ab6.tar.gz/sha512 b/deps/checksums/NetworkOptions-6cea813ca841c400627b6cf4d4f4ec2599965ab6.tar.gz/sha512
deleted file mode 100644
index 4d2e4a7acd631..0000000000000
--- a/deps/checksums/NetworkOptions-6cea813ca841c400627b6cf4d4f4ec2599965ab6.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4f0df64518b0d877462101c6cdc4c9311af74f72d5e266fd83ef320968fbf7b79a649f1e84ef1d3d8d13c41dfdd651e9164e95227896e01c423ac4f9c7e007cb
diff --git a/deps/checksums/Pkg-aec716f4360bc82bd6a7db6d0bd5dd7458dccba5.tar.gz/md5 b/deps/checksums/Pkg-aec716f4360bc82bd6a7db6d0bd5dd7458dccba5.tar.gz/md5
new file mode 100644
index 0000000000000..3f30d45576c8e
--- /dev/null
+++ b/deps/checksums/Pkg-aec716f4360bc82bd6a7db6d0bd5dd7458dccba5.tar.gz/md5
@@ -0,0 +1 @@
+e26e3146744e209424b666cc129b852f
diff --git a/deps/checksums/Pkg-aec716f4360bc82bd6a7db6d0bd5dd7458dccba5.tar.gz/sha512 b/deps/checksums/Pkg-aec716f4360bc82bd6a7db6d0bd5dd7458dccba5.tar.gz/sha512
new file mode 100644
index 0000000000000..c544a138484d4
--- /dev/null
+++ b/deps/checksums/Pkg-aec716f4360bc82bd6a7db6d0bd5dd7458dccba5.tar.gz/sha512
@@ -0,0 +1 @@
+6d14ceed5b88ad3b6e2f38261eaabe3afbd7fde3e45998426564ed0db35fc8e507e829aaa3d08d89a37ef2b34ce2e4d6eada8d65b77852f71f606c13dce7ac8d
diff --git a/deps/checksums/Pkg-e476cd0c61e19b645cc0e32bb30f8e44f60001f7.tar.gz/md5 b/deps/checksums/Pkg-e476cd0c61e19b645cc0e32bb30f8e44f60001f7.tar.gz/md5
deleted file mode 100644
index de586e869f6db..0000000000000
--- a/deps/checksums/Pkg-e476cd0c61e19b645cc0e32bb30f8e44f60001f7.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-af31db7882d62388fcc3fccf81fe699c
diff --git a/deps/checksums/Pkg-e476cd0c61e19b645cc0e32bb30f8e44f60001f7.tar.gz/sha512 b/deps/checksums/Pkg-e476cd0c61e19b645cc0e32bb30f8e44f60001f7.tar.gz/sha512
deleted file mode 100644
index e6703d6365171..0000000000000
--- a/deps/checksums/Pkg-e476cd0c61e19b645cc0e32bb30f8e44f60001f7.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6982d5a2eebf2cd2a69b43a9ee8c54c3833f494f89aef9fab852d3b9b48a0724fa58b4f59fbc8dfd27406ae9c8e8c4beedba7f7a8dc8d128d435f6d9a8a03147
diff --git a/deps/checksums/SHA-d30dbf6f75b30b84c678208bb93438bd75a5f3ef.tar.gz/md5 b/deps/checksums/SHA-d30dbf6f75b30b84c678208bb93438bd75a5f3ef.tar.gz/md5
new file mode 100644
index 0000000000000..9f07b717b57cb
--- /dev/null
+++ b/deps/checksums/SHA-d30dbf6f75b30b84c678208bb93438bd75a5f3ef.tar.gz/md5
@@ -0,0 +1 @@
+af952f7139cfa0c67812f593a3f23b85
diff --git a/deps/checksums/SHA-d30dbf6f75b30b84c678208bb93438bd75a5f3ef.tar.gz/sha512 b/deps/checksums/SHA-d30dbf6f75b30b84c678208bb93438bd75a5f3ef.tar.gz/sha512
new file mode 100644
index 0000000000000..f6d5c063aaa30
--- /dev/null
+++ b/deps/checksums/SHA-d30dbf6f75b30b84c678208bb93438bd75a5f3ef.tar.gz/sha512
@@ -0,0 +1 @@
+c915b4039308fb8c9d5a7d0cc627869bf0d3498b97ab8243c9fa5f597271dd49ff117b5173df3a9a2fd97eeb63f3b93c2b53b4cf97c160450952e6135700ac1a
diff --git a/deps/checksums/Statistics-5256d570d0a554780ed80949c79116f47eac6382.tar.gz/md5 b/deps/checksums/Statistics-5256d570d0a554780ed80949c79116f47eac6382.tar.gz/md5
new file mode 100644
index 0000000000000..cad5503d6fd27
--- /dev/null
+++ b/deps/checksums/Statistics-5256d570d0a554780ed80949c79116f47eac6382.tar.gz/md5
@@ -0,0 +1 @@
+0b43a9ed3c1f081f9510dffd6697c825
diff --git a/deps/checksums/Statistics-5256d570d0a554780ed80949c79116f47eac6382.tar.gz/sha512 b/deps/checksums/Statistics-5256d570d0a554780ed80949c79116f47eac6382.tar.gz/sha512
new file mode 100644
index 0000000000000..e9aae6344832c
--- /dev/null
+++ b/deps/checksums/Statistics-5256d570d0a554780ed80949c79116f47eac6382.tar.gz/sha512
@@ -0,0 +1 @@
+15b2ed0b269605b1dab150fccc8e202829278bc4920293b711c611627cd4a61767373895acd1d45a0b37557e8c0cd123673d75944cd08216cffa81dc5186c0fc
diff --git a/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/md5 b/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/md5
deleted file mode 100644
index 62f1954037241..0000000000000
--- a/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4c09536f4f769b23e88fee769f5a09bd
diff --git a/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/sha512 b/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/sha512
deleted file mode 100644
index a79b037b94de2..0000000000000
--- a/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e409fa943a9683a129b80c78ef74572df316ed414dfc8c208f1500d0f07d4d41870d44654446e2c20d1b9ed11e62c4fc6107b6e5789939edbd049fc2aaf22f63
diff --git a/deps/checksums/SuiteSparse-76856153eef26c008f13520ffa12288e214fe02c.tar.gz/md5 b/deps/checksums/SuiteSparse-76856153eef26c008f13520ffa12288e214fe02c.tar.gz/md5
deleted file mode 100644
index 16d45f7422a10..0000000000000
--- a/deps/checksums/SuiteSparse-76856153eef26c008f13520ffa12288e214fe02c.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a7bbe99082964df20037ba70934b2b5a
diff --git a/deps/checksums/SuiteSparse-76856153eef26c008f13520ffa12288e214fe02c.tar.gz/sha512 b/deps/checksums/SuiteSparse-76856153eef26c008f13520ffa12288e214fe02c.tar.gz/sha512
deleted file mode 100644
index 88f364d2097c9..0000000000000
--- a/deps/checksums/SuiteSparse-76856153eef26c008f13520ffa12288e214fe02c.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e22d3bb86fe7952447fe5044307f9e4613ae04eeeb63c5c685d36fa09b78762c76e2ff32bd66dd49cd5c2b8b9b986c79d4e851f76c29ad985f7266c38d795cf1
diff --git a/deps/checksums/Tar-6a946029685639b69ce5a7cc4c4a6c0e6c6b2697.tar.gz/md5 b/deps/checksums/Tar-6a946029685639b69ce5a7cc4c4a6c0e6c6b2697.tar.gz/md5
new file mode 100644
index 0000000000000..ee883809e8465
--- /dev/null
+++ b/deps/checksums/Tar-6a946029685639b69ce5a7cc4c4a6c0e6c6b2697.tar.gz/md5
@@ -0,0 +1 @@
+676c882d979bd06b8c15e860976feefd
diff --git a/deps/checksums/Tar-6a946029685639b69ce5a7cc4c4a6c0e6c6b2697.tar.gz/sha512 b/deps/checksums/Tar-6a946029685639b69ce5a7cc4c4a6c0e6c6b2697.tar.gz/sha512
new file mode 100644
index 0000000000000..da42b4d34a02f
--- /dev/null
+++ b/deps/checksums/Tar-6a946029685639b69ce5a7cc4c4a6c0e6c6b2697.tar.gz/sha512
@@ -0,0 +1 @@
+ee47eb3e8752f888f2f775531b61e3d1b5d11e9348a0539f34f83b5899e530801fdf0ec621657452c561e724bdb54d9e2953dd5850b48e620d50279dc497d8f8
diff --git a/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/md5 b/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/md5
deleted file mode 100644
index 27fb3f83ef22e..0000000000000
--- a/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bb8b923ec61cbd3160aa81017308f0f8
diff --git a/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/sha512 b/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/sha512
deleted file mode 100644
index 79a65ec673d46..0000000000000
--- a/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-79f0a94ea1fd895c6afb52e6e4f26a295cc2d2b9317e5b0f80017b036836c269a982b6028e3e8002675fb8d56e2144ba426769b9701adbf065980d11b9c1fecf
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index cdb05346bfe93..5b3a5d01c051c 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,5 +1,5 @@
-blastrampoline-23de7a09bf354fe6f655c457bab5bf47fdd2486d.tar.gz/md5/0c8016a6e30bc2237184b816b613d11c
-blastrampoline-23de7a09bf354fe6f655c457bab5bf47fdd2486d.tar.gz/sha512/7b7dbb101cf05ac833a8e5b09f1eec8eb99f0caafbe80075751a5f7e0bfe03a3b19d11d3507dadd13f503cfa9fc1a6cd53f3461af7d5afb39ca385a0ee26120b
+blastrampoline-23de7a09bf354fe6f655c457bab5bf47fdd2486d.tar.gz/md5/b81efa951fd909591339189f5909ff6b
+blastrampoline-23de7a09bf354fe6f655c457bab5bf47fdd2486d.tar.gz/sha512/1c2558bab0aeaa76e7094d8a6a9798c95f2cf4efe2960640b70f1fd752f3dfb73813d9de93b539426376571febaab22ac22c2f903ccdf3296c7b067af92fecdc
 libblastrampoline.v3.1.0+0.aarch64-apple-darwin.tar.gz/md5/9a72574c810323ebe7b496266a5b3d90
 libblastrampoline.v3.1.0+0.aarch64-apple-darwin.tar.gz/sha512/559c91374882a137ce8b9f008e9d815dfebc175f65ac07bf784d590e31a07f60302de3d498e5dcc3f409f59fc2c7fbd1fb32623d25ed65bcc31c35a74c11f260
 libblastrampoline.v3.1.0+0.aarch64-linux-gnu.tar.gz/md5/95802755d3b6205c88163ba313662fac
diff --git a/deps/checksums/clang b/deps/checksums/clang
index 125b200da0a36..b5343f4965e0f 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,58 +1,58 @@
-Clang.v12.0.0+1.aarch64-apple-darwin.tar.gz/md5/465b3375db7ab6b93fbe465ed76a275d
-Clang.v12.0.0+1.aarch64-apple-darwin.tar.gz/sha512/a39faf3bb411612da5cb29be70a3b200dac3a4bfd7eb58e4f68edfd09923819f7f465dab4dde4caa0335d8fc392b2f55db762de0e93642ff49a466b68f0fd383
-Clang.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/md5/162a2ee947742b9d0c01c95725b98d96
-Clang.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/sha512/ead804d78ab42d3e1ca6cb7f12ff7544525772badce779eaef5cf973e23966f1398343d3c3148b58195eef7d4e9c7ee897e8bfb30a24c095685cece46344671e
-Clang.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/md5/563e93b82893c8257c23878135de6687
-Clang.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/sha512/ce8103e409790b5d4993ef4259524096617ee27b82b5d1f6c55880814211da6cefb4645a9c5de4ec44f9c961d1ec62da8cde0977d8af4be6e25a638891efc372
-Clang.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/md5/d3eac2234c6fa736c3c8e8a4a7e9eb0c
-Clang.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/sha512/0a7f414dd22d34184c47695b5ad40e57286527a16fe32e90eaccad11d72192679985a9ffd6f9616564ff87de1d189726c3049100269648115f0f983d80400400
-Clang.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/md5/23cdec7b732c13313c897593c40bce2a
-Clang.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/sha512/616fcd76d887cb3188b5dab8a2a15f276f3cc7d73ebdc13e1e12d1d4eaf0a3722ee288b5531bd24ab825a463112052399ab75d839e9df1c98987313fe731a2ef
-Clang.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/f2d3d99ae67b8435021b20bd77aaf433
-Clang.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/7cb4ac7392501a849058822f88aac49f9a0a03496a5fb39b8a8fcfeecf86f004b466e9fbfe2bfdaaed6d9cbe33e61b42d89c03ffddbfc4d8015790f64ca3623b
-Clang.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/3c43e3cbbea5c8f2f197bf6c505a98c4
-Clang.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/31107abd43a3ad43a3df424e9d35739d646c955923c08ca3cc405b7677deacc0cf9aaaf29b29fdd4fd53a434087620317e1f4d91c3bcb2932716a87804388d0b
-Clang.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/md5/dd2eecb77a209c088ad81dec1f4d9610
-Clang.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/7a5a13ed5e046fb05cba3cd7aa55cdef741a7d76274e71d2b2fa194f132bb98c40d200fb04dc111cda7d901babd4f1f5d4e9bdf1dfd635f0492c386971878b5a
-Clang.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/md5/74684a3dfdecad109aa1cb3e5eb1f24b
-Clang.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/e1dae1ae3bd5b82d21401cbc9c6ca2281a05bb1039fffe0d94e7d5ef482a09bedbb5c512aae52873ec7ca471e543c334ff37b1e8429da79724b6b131b0502bf5
-Clang.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/32284100f6a57f5e4b32d0e6d35dff9c
-Clang.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/201cff5571219df602ea2f63d9559e8345aacc8c8691b30614fe6b6f5d0b7692f670b17366185f0e16cd1119bc49cdd1184247b32826bcc0a3a1d026c48e538b
-Clang.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/c854231dcdd3816078ffaf435b152a3c
-Clang.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/082760f1dcb39cccffd5fa0a096ff0c6d823355d22d8cc2b69aa90092fb6157c87070d932fa736e3ddf4de0199c7f06cdca91bb354ee0cc125b505e66f5df068
-Clang.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/md5/5c909ab9d88334058e99be60e0330711
-Clang.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/abb39baac59aff579603870a31dbbbfebae3eea91caa203dbb3409d49cccd81dd96aa82a098c20265ee5f5fd851a0621c11b1a8837952a329c699b909e2c83a1
-Clang.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/md5/5c275a64299c0d4fc51d35f53c3dab00
-Clang.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/05c167d371dc5fa0226f0505ea1339ffcec8563c76adbc1450098ec7aa096fe04d855595d3cee290c2b6dc0843d8fc1c5215c84d3b3095d2cf0d93711dd4acec
-Clang.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/md5/f27a08b7004c4a5eeb35d22198d72c82
-Clang.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/sha512/9fdcadb0d2bb56eb0e880a7b7a0c7660d614670209f76dfd5aae70ef0d3c437d6b56171b542d608784f7c4f7ef7bc5a4844baffae4fa95c25094c630ac39bfdb
-Clang.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/md5/748c1efc36a70910fb3d3fea537c3b4e
-Clang.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/sha512/bf70ba1a1576039210978488050890d6ee049fdab076a9a8498058b732c1c05d47861146aa48262a304f7dd8b24529e1a3425d0afc42c69843eac4b1317327a2
-Clang.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/md5/5233ddc66968db6bd00bcc9e2aed7f12
-Clang.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/sha512/bb9aea78c78c6cbb613b4c3eac6f08776dfafcc5402e6bf1a6670ac8d707de29b0ff8ad654d1d586f3e0b2a324c8fff49524033277f3cceb08c826243dd8a519
-Clang.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/md5/f448936531ab804b56b0d8b63a2b7cb9
-Clang.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/sha512/5ccdaad4623ec17eb72f806bea432fbe62584b512159771cb295bd273771b168f502a3a8f15588e0e9df65024271a8edb891fc48c2535e45a31289b4c3469a28
-Clang.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/md5/2e68b66d9d607414d69bf2b11609ce5d
-Clang.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/sha512/fec58abebab5d632fbfaef8335737a54d83804a823ca6aa8ab9031aa3c2a8afd5c342d6d989d7918352c241b7d7cb8f9802a1422a44316ad0f8760998e2e02ae
-Clang.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/md5/1ab89ec9072eb7d871b3a0eff78d56ac
-Clang.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/sha512/9878561bbf405ca29e1f32049c753215ecac1bbcbb62f66dc573da3826bbdfb5f4e5630273595d611c2aa315d54fdfa35a954f71ba8781e5eb659658a9157886
-Clang.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/md5/4b8045175ede4b934e2ccc8a28920910
-Clang.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/38c952d0ed7e63c52e013e303f77c4d779ca8682926a32705402f5a275358fdd5d7f59188c0f43d994b8c2ea31939c23d0dff9a4b7312ed39574cc39c27691ea
-Clang.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/md5/53ce55e158860afa17cc653f4fc0b500
-Clang.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/15738783329bca1720278526a306d230917d64d6ec6c9542a248953f5c1a071316de6bb80cd1d0498926ce876fd541d939023fda7b234ae9517f4583a2b6aa8c
-Clang.v12.0.0+1.x86_64-apple-darwin.tar.gz/md5/aefdcf918b508ba66cba11abd0d99ef7
-Clang.v12.0.0+1.x86_64-apple-darwin.tar.gz/sha512/c7ef9af9bbb5a1e2fe022c3996566fe774903bee67c15dd6ae304e89fc93e5080f177ea074f9736d6959edecae415586db4dfff8328e8fdfd643019ca7d41131
-Clang.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/md5/490a63fadd41b10c425c1e87170b63eb
-Clang.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/sha512/14ad28afecad4a60d0a978af75b3db68feed71badb271dee0b29bb4cb471f5e69ce85b6cc990b63a2f9db24ca87e897ee935beca532451b2ffc020d80e3cc827
-Clang.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/md5/9056064168e01462aae83721a670fc7e
-Clang.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/sha512/aa0340efa8106acf92c2528c6cd5ac8b6bfd0ca0b8041e22534db4178364467dd8c46f4c77f365fa0ddeb5aefc6ed01e4a4416e1326173a255e18cc890dfc0ef
-Clang.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/md5/b4edd5baae3b1f9b18eae365d7969621
-Clang.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/sha512/abe5ada1bfb6d69ff28fd1d16f5de3e2b12c36730f04d3fb4c9e332d4947438530f4d5a677d3dfc973769a381ec7dd1944c3ce2213bf2177d263b47b4903d186
-Clang.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/md5/d3176f49c216d11f2113caa10020745e
-Clang.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/sha512/8585c40983ab7c401bc6537cfc3ebb7a44e52738576155855db5c51fa9c0dfbec3b5c12f63575b09c038c9f047d9941a192082ba7572489b55d5bcba2e3fb27b
-Clang.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/md5/1d3bb2c77b0a3473697ca2a63de35b89
-Clang.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/sha512/de8922979ad7158fa72d0e152670ad3be75fabdb22a1e91d187e6847e9b6cbc905ca09a476dac651a52c775192b3bada333e68e38e6ba7101a472755bb7ad79f
-Clang.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/md5/ecaf6c8831e8c6de8b8e74f95f04dbf4
-Clang.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/sha512/aec6626dd02fc6a0f82e05783dbce19d69a2fd9eec53189a1086393134ce1f972a1b0ef286b06f19be192fc4575d6e9f5f09984444cd3bd50c66c2fcabe23b73
-Clang.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/md5/23c8b1d208dde43fe2b48f143f2108ea
-Clang.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/sha512/75118203f3ba67ca99279100e9b31eaff165f70af5a487bd9ad6dbf70da8041a374d9953b807739258be6e493a5422a2e5e1ba00b8ebd746b706de3915375c1e
+Clang.v12.0.1+3.aarch64-apple-darwin.tar.gz/md5/e48f17625df207526c56dc0010ae0daa
+Clang.v12.0.1+3.aarch64-apple-darwin.tar.gz/sha512/e7b9844e84327e8624bb689302f103ad1e621e0e31f2824af54182c597873c38f5e7028ec055aba84645e0c6890c4d39ab2bc9a1acdaa6eaf0b3d648b3e4cdfc
+Clang.v12.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/md5/5da291e45739f3cf99c055364bd8c2fd
+Clang.v12.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/sha512/6a06c06c6b03d4dbb5fc61d922ce5ff4f394fca2b39025b7e8ee0f50d2373917ad46d8c688f0a6e073a4ad3b14c981a8353d554356d634846be940d659b1da53
+Clang.v12.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/md5/e732781718ddbd28567247c5d1ed0d1c
+Clang.v12.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/sha512/5b8ee21c0661bd3221252964a1ef0b5be55e227a9d167453be5659bf21d006fcc921b275b35f9ee9c6535fae4e03ee9df585233af61b96d04acf3742997a20b5
+Clang.v12.0.1+3.aarch64-linux-musl-cxx03.tar.gz/md5/6d93f705e632a2eef897e56510091bc6
+Clang.v12.0.1+3.aarch64-linux-musl-cxx03.tar.gz/sha512/75fa08aeb2c4aafa73e39109391f22c0469df2ef6a5973d7a0f72d9ba9f6602259732ec1d87ec48e019f7c1c7cb168747e1215e7a95b9faf7d12ef4928c2a087
+Clang.v12.0.1+3.aarch64-linux-musl-cxx11.tar.gz/md5/92f83227bf548f2237ee367893ea5779
+Clang.v12.0.1+3.aarch64-linux-musl-cxx11.tar.gz/sha512/7215208979ca7b6cab569b1d23da554b62d0f09f5ce80b9240a0bdbc55307f2b2cf7dd53523800a620a635d0dc9a5355d0b62b9e943e5e054d527f12e1488283
+Clang.v12.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/cb434a7f448a848941150b676a000993
+Clang.v12.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/3c7075e543ca17d7c41d99fb123351fb5afe4b292afe45b2d5283d43b59fe07fe822721455b62dd35ce8eb72c061066eefbb9c72218a0b7cad92fbf0a7f78696
+Clang.v12.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/6867cd783ae6e8161f356c2de58b2b9e
+Clang.v12.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/8a976ff74c14b475e4e06ec95b7d24d3e6fc1f8938569c38953314e3ff1be32f03b1aa7f681fcf852cdc4041c2eacbf8ef6110615267dd74c83cbd0e308b0975
+Clang.v12.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/md5/03d7b293eec65433a10b72fce8822c45
+Clang.v12.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/9a2b00fe099c6481122252f2079692335bdc450b67ac094272725bf992d9376a55102db8221ad56e1575c75af09fbc5919f06faea63130785973afdadd7d5920
+Clang.v12.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/md5/a7c9d6bfc7d6f203b50f9b4dcecd6965
+Clang.v12.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/0532eeb0c391927c897d7c050913d5b41d0869bb26da2e56d782e60b228fef859bc1928e563f4ee349d4bad450588e7706a144398ca47a849ca2c36b7721fa8d
+Clang.v12.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/5cda8cbfc60a39c7e4426c0f12545337
+Clang.v12.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/ea35242499d3e658c72407e3caa42a642051d6a0245aa72605288b9c5f90335d3caf2c2f2c31768e1667fccaf3e6f3e6d003b98504aec4304f562ee6854cf434
+Clang.v12.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/460de1a7571da00e77ab43488cc5c81a
+Clang.v12.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/e275ef8b01b142e8187dfc5420342fc6d889ce97458d4178f85b7951a37e37896d3d30793856475030d635d6df7fc20a380524ef24560d4d3a1681564135517e
+Clang.v12.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/md5/42b35ff028d6c4964083ac6218d783e6
+Clang.v12.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/d941f82bff7917d8c8b9b260a8896d1af8d61c56dcc574e7a96a3971da8622f2d5822e340f8d5cf340adcc36760a757efa68c7e6a4686ecdec49207b68761ea0
+Clang.v12.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/md5/2fffd7d0d994b8185b6b93e43ba0d187
+Clang.v12.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/d538b96f66e9d4935391abd51ed069a323c7dde561ab5daafbc31360c422a25fffcf924a676d50642275c3047cc2c733b89a9c130f78fa9862a63146d981ae0f
+Clang.v12.0.1+3.i686-linux-gnu-cxx03.tar.gz/md5/cd41e8189cb89991cf25b2d29418b127
+Clang.v12.0.1+3.i686-linux-gnu-cxx03.tar.gz/sha512/8c42ce277a94492e76c7719cc7aa9388fc167caa05bec7faec901dfaae953857c97a82c24edfa47d42e23bda3ccb13e6ef33b89d5e663177429b95304d74f6ba
+Clang.v12.0.1+3.i686-linux-gnu-cxx11.tar.gz/md5/39e81ef66d081d99e63c3df56392f518
+Clang.v12.0.1+3.i686-linux-gnu-cxx11.tar.gz/sha512/fe630a179fe59fa899bab114890b3e3f8d704b08eca933289f26ac888fff2dce295328b75c77c0a837890d0ef15a07e92df30a60c71e3db5be99b4dbc8e3bbe1
+Clang.v12.0.1+3.i686-linux-musl-cxx03.tar.gz/md5/db4f1a883aff089f3e939ea73019ce92
+Clang.v12.0.1+3.i686-linux-musl-cxx03.tar.gz/sha512/4dafc8b917a4fbfa07477d6fd95b08c14ad3a5101017e539ca6b09c572a671117f30336f705a1a494ea123b5afdfd84ae3e4d3e18d7179225fd20873e758be27
+Clang.v12.0.1+3.i686-linux-musl-cxx11.tar.gz/md5/f06fdcc12560bfa49659fd074d7c3e85
+Clang.v12.0.1+3.i686-linux-musl-cxx11.tar.gz/sha512/5f5b70b2f52752a255ada165408adab30f5c3549a9a0c07ab02efa86af437610eb948309cf7c62282edbf0f19b6c72ef011c276405b9c80207775211a15409b9
+Clang.v12.0.1+3.i686-w64-mingw32-cxx03.tar.gz/md5/a45b6e1a73bfe6c89e42e02fb0a7c8d5
+Clang.v12.0.1+3.i686-w64-mingw32-cxx03.tar.gz/sha512/d0a8a539e2ae9ae0c652e80c2a321b2ea5e1e0d2b7d50850ef9fb2013b5b1ee8dda86e159b17aba9e5c4fa35d777dd6cf99e10bd54fbd51d9a2e235486e70956
+Clang.v12.0.1+3.i686-w64-mingw32-cxx11.tar.gz/md5/0cf24f2a471eaa7979f8b08dbc6384df
+Clang.v12.0.1+3.i686-w64-mingw32-cxx11.tar.gz/sha512/3cb5d20140c3e10a079e603377e5963844a85c6c50f3df7ec7981a16c0c2b7810cbab3460be9b96412ee9cfc6664d6d2dfefc554101a2b8934b4fc8551a90ece
+Clang.v12.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/md5/01c8ca0f75ca27893933b0f56dc2e4f7
+Clang.v12.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/c00bd0c1ec5a602c2343865ba3267904b24daa92bcd384240d7f09190dfaaa9ec7d1927c60245345a9b207224ac15834332406f67a0ba152239c4064f38800de
+Clang.v12.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/md5/901196acf617f5e7113197b9f555e357
+Clang.v12.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/efab074365fa8597fe3c383d69cad981edebe9919128c9de01801cfc2e77e83c070fad61cc606670045af696b00f724f1b4fd9166d54090984a587b4c3c193db
+Clang.v12.0.1+3.x86_64-apple-darwin.tar.gz/md5/ce31160483e0a9872391988530012ee9
+Clang.v12.0.1+3.x86_64-apple-darwin.tar.gz/sha512/a091f580ed8ffec36aecb5b7b929b0c4753ecce2f0a4421fe99a8b98d1e615bb975ffe1a82acc3921eac8a2e92055012c8222c7b8a52214acc87e2d8b0b16fb4
+Clang.v12.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/md5/ad4f1b6309c845816fe20b974e02acf1
+Clang.v12.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/sha512/9a6b18091bdf586c554fd9c4c0a97da2324e69443ebc732d4863028f099ea0523c1ac476726b6310ef95f438673d317170cba38e571a5de16805b43d9b6fa64c
+Clang.v12.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/md5/3cf260c1df00939fdaf322320b19264b
+Clang.v12.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/sha512/15951398a9d2c176caee370e004234e7db7eda5d590b74183ee5696678b4bb8ecad6047cdd4cb7c62f25847035b0e52d4c8ee8cb25d08e0edf00a68dbee12e98
+Clang.v12.0.1+3.x86_64-linux-musl-cxx03.tar.gz/md5/ce07838922efed663d6aea0ee0b0995e
+Clang.v12.0.1+3.x86_64-linux-musl-cxx03.tar.gz/sha512/e05a903feda1868d17b311816943f00745ce79fc859ed92782b8606c11015f13884380ddc5fa6e1fad2174b2f5f32ec3208eb76e000ffc9825ff532926ff8859
+Clang.v12.0.1+3.x86_64-linux-musl-cxx11.tar.gz/md5/a59f567abcc0adbed004ba0d0e590407
+Clang.v12.0.1+3.x86_64-linux-musl-cxx11.tar.gz/sha512/45e6f46f028beb879f9fec18137593882a7d81fc0825b623d9884f9b8fa400ca847e0c975ad2a7f1fce6ab7bee6d2f7d3cd3c6ad4ee49cbf879584ea7b3e2e58
+Clang.v12.0.1+3.x86_64-unknown-freebsd.tar.gz/md5/5caa4b65706d5bcce71e72ac8dae39bc
+Clang.v12.0.1+3.x86_64-unknown-freebsd.tar.gz/sha512/d49041ff192b1af43778afed30d0ab8f8e6a6b0cf1a8b0382712dd54e0362de2a58a9ea69ef51a1924f34480a79bb6a498904fb7eecb76e6cb40bffc98427093
+Clang.v12.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/md5/88e8ce154a83ac41fa8bdee270453f54
+Clang.v12.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/sha512/d9087edb3ee10bb91783340f27717a690a7e659aeb7bbff138f1a9ce1db2763c9f3cabd8cb0a1d1c79e2587a993ca609d815166db009b214689c7d4f1c55f05c
+Clang.v12.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/md5/1394c306638fe127292414f6144e6d90
+Clang.v12.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/sha512/88f6aa01c851cb26f8999d4a1996527351237fd3331048b4255de303c3cd8e8e90ba217bbf709bc1277e9eab46258b831d0bdaccdcc58dbf086b8be0a1ae49d0
diff --git a/deps/checksums/curl b/deps/checksums/curl
index a1ef36078d1f0..0cddbc5be67e1 100644
--- a/deps/checksums/curl
+++ b/deps/checksums/curl
@@ -1,5 +1,5 @@
-LibCURL-cddeb7f4a7d5718a4a1be602ffcbe68299a1a37e.tar.gz/md5/eaf82b82c59404386d9f5744a6447e5c
-LibCURL-cddeb7f4a7d5718a4a1be602ffcbe68299a1a37e.tar.gz/sha512/b286be84c9bc405479d13feb44ba97109ad17e1de8be8f1b2bb3125bede884311b190e0cbb0a0c806e7034ecd688b72dd11c1133cca1a2ef74370ec1200aa790
+LibCURL-04c450c17024d5b49cb30013f1409306efd35203.tar.gz/md5/6a545e3c5dc4a0d7fe73435ec4c45dea
+LibCURL-04c450c17024d5b49cb30013f1409306efd35203.tar.gz/sha512/1308d4efde43eebd70646a77e4cf2d4a850a7c33d4a26018a1e84b4e7e1fb525ae193385fef7d47c405dbba0d685523d4b593702d93f441bcf8a495cc21fff0e
 LibCURL.v7.73.0+4.aarch64-apple-darwin.tar.gz/md5/457083bbbfe9b7602a62acf5df56c123
 LibCURL.v7.73.0+4.aarch64-apple-darwin.tar.gz/sha512/93dbb2cd0a126dca3f721370f1e92e1bd6ead5fb2971f7ec61c36b47924057ce306715b127fe1bd6e4ebb369cadfcf7f41d9f26fa367c185ee1b4c8a448f456d
 LibCURL.v7.73.0+4.aarch64-linux-gnu.tar.gz/md5/683c6cd6e9d5bec018402068678c811e
diff --git a/deps/checksums/gmp b/deps/checksums/gmp
index da510e3dc2388..6b95ca883ddf8 100644
--- a/deps/checksums/gmp
+++ b/deps/checksums/gmp
@@ -1,5 +1,3 @@
-gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
-gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
 GMP.v6.2.1+0.aarch64-apple-darwin.tar.gz/md5/e805c580078e4d6bcaeb6781cb6d56fa
 GMP.v6.2.1+0.aarch64-apple-darwin.tar.gz/sha512/62435e80f5fa0b67e2788c8bfc3681426add7a9b2853131bbebe890d1a2d9b54cebaea0860f6ddd0e93e1ae302baba39851d5f58a65acf0b2a9ea1226bb4eea4
 GMP.v6.2.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/5384d6ba6fd408bc71c2781b643cd59a
@@ -58,3 +56,63 @@ GMP.v6.2.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/1499a265b438cf5169286c1830eb573
 GMP.v6.2.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/d2e6fe76abe0a0cb1a7445ea93cd5bd0bf9f729aec8df9c76d06a1f6f5e67cce442be69b66950eb33aa22cfda2e5a308f2bade64018a27bebfcb4b7a97e1d047
 GMP.v6.2.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/fdb4187f617511d8eb19f67f8499a8d0
 GMP.v6.2.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/bb6d8ead1c20cffebc2271461d3787cfad794fee2b32e23583af6521c0667ed9107805268a996d23d6edcab9fe653e542a210cab07252f7713af0c23feb76fb3
+GMP.v6.2.1+1.aarch64-apple-darwin.tar.gz/md5/03cb14ac16daabb4a77fe1c78e8e48a9
+GMP.v6.2.1+1.aarch64-apple-darwin.tar.gz/sha512/5b8f974a07f579272981f5ebe44191385a4ce95f58d434a3565ffa827a6d65824cbe4173736b7328630bbccfe6af4242195aec24de3f0aa687e2e32a18a97a5c
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx03.tar.gz/md5/0ce7d419a49f2f90033618bdda2588e7
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx03.tar.gz/sha512/16363dedaae116fa0d493182aeadb2ffa7f990f1813e4b47cae3cd61ca71f23b65267ea4e2c698d52bd78d61e12feaa73179d7b86ab6d6df01eeb7b6a9b27958
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx11.tar.gz/md5/011f1cdc39b9e529b4b6ea80f4c33108
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx11.tar.gz/sha512/1ed2139580c5c78578f350ee83dbf9cd0120d9d36e1951438d757f5734cda7931600b3f83bfe0d0d806926636d6aea8048c6b64aa42a22e59310282c2428f417
+GMP.v6.2.1+1.aarch64-linux-musl-cxx03.tar.gz/md5/34f17083a1f142c284b707cc82407b00
+GMP.v6.2.1+1.aarch64-linux-musl-cxx03.tar.gz/sha512/dd32912c31a8422734c2e5d5a37001ac18f0e9de151982583d9dc185e5cc3e45076d737729345cca8e8eaf42993d4102353261a2de245e26a8a9cd86960a2fbf
+GMP.v6.2.1+1.aarch64-linux-musl-cxx11.tar.gz/md5/9ba1b822f20f88a1e4c6e81dc8c4fdc1
+GMP.v6.2.1+1.aarch64-linux-musl-cxx11.tar.gz/sha512/d8a4ecd5c35022b9c912c3b4fabe3a4c31258d6a1bd38e4fea13a3da53206a29bfd90f4d602f6e3ee3ee271d84289d1ecdf45534adfabf7e657daef5b5cb0b21
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/23e28efa2579d636cb4c80036da5d4ea
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/02c8023958fa616c1f944898e686510d449b743d053cfd42f526e9c4fe3ff3dd9de7309694b8537b4bb6dc978085339eb787983ec4ba32dc041448c912a8b982
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/bf2a2c4f81f6d04746cc528438f62639
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/1c152abeed24761c775e78a64835f8e61b28b16cbc29a6fde88fa4fdbf2a5782cd62697bd03a552d873995bda58b7bdc081c11ecd5e4badde2dea426e5218116
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx03.tar.gz/md5/25cbceed2cf1bb12601fe285c342d6b0
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/37d8b21bf59c0c555f2b59d6dca4f486bf1725ae18a7fea9a2f31533c54ebb818b5ddb88ec8aa2b618e0ecad78973659abd1a9f095f64ef65067ab8ed08d7801
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx11.tar.gz/md5/8ec72c769625a218c6951abed32b3684
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/4cb9ccb97859b0918002b649e1b5e74e1fc89a2daeec6f32d5a06ce0d84217f54d1ee788f472cebeefc73ef52284a3d5607efbed47058b438d2dcbcf9f384ed0
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/6f799d6516cc46af28eacf8409634825
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/541c1e03726584ddb672a83becdc9a99c68f5da9a7415750d582753b47774910bf25cee7fe21f5b5c2a80ff8ce87fc327abd45bf54d6cfe821cb202c81b67e43
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/17dba9ebcc1bf4637095a98a876375a8
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/648220e632618d23e8611e10fa4bb2e581ed4432e3fff77d0d7349a7585bffa65ae57bf1ce64c550bf6d2acc016f499c0bbbfed8088281445b9d4ecbbf9a64bc
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx03.tar.gz/md5/79c77b81cc16fd22ad4cef75af7aa220
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/0059ba54806ef0ca6621ddcd309a18922c4c7d9d9e214bc6870b6338a9449a472934cc27569402741d41a18dd53a896aae2f68b788f853fd4ea3db63035c8153
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx11.tar.gz/md5/87b79bfc5c780e214863d0f0c1944da9
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/88dcabcf96d8f2dcc7968333a94adcb8e8a91615b67ca23edf75c3368a89ef60a8deff8e8532d0cd4d5dd5356343b753b0ae0bf88ce7e190639468bf8170939a
+GMP.v6.2.1+1.i686-linux-gnu-cxx03.tar.gz/md5/61d39e42ab6fd5844e938605e357b336
+GMP.v6.2.1+1.i686-linux-gnu-cxx03.tar.gz/sha512/8e0d382adf6b45cbf613092cee524551a04096b0bc6fb8893701edae9c1928bda67b5522cae3ef954a882ff73b735190881ade37495d9d1a6db88ed6fbcdc6b1
+GMP.v6.2.1+1.i686-linux-gnu-cxx11.tar.gz/md5/b66b49054426adf3e1d3454a80010d97
+GMP.v6.2.1+1.i686-linux-gnu-cxx11.tar.gz/sha512/b28f22bbfbf796c4e959b1fa3433d46b4cf0dbd402c0497a6d4893c8030aa12fd038da4846d8bce02199f1da9b0158d78f2b4ff2636799ba139602775725ff6d
+GMP.v6.2.1+1.i686-linux-musl-cxx03.tar.gz/md5/69ea3b3348813777a1682e41a117d7c3
+GMP.v6.2.1+1.i686-linux-musl-cxx03.tar.gz/sha512/048dd08b5891864e69504baf6328ef5423e0f8e31c5c6cfac552eb51b3ef943af83b7ac654c33e1a0cf061c5832e08eebb9c03dbda6532fbc24e160e99c2aae6
+GMP.v6.2.1+1.i686-linux-musl-cxx11.tar.gz/md5/e7c82091d29a3e5958442c9ec631ad78
+GMP.v6.2.1+1.i686-linux-musl-cxx11.tar.gz/sha512/8574f2e42e181a7bd1cf8aa8056a14d13efe555ee74b14e14aef1bdce7f26ce2afe41b4f85ee20de6823045d5ff38e4dbcebcc7042fff4288af1b7d296202d43
+GMP.v6.2.1+1.i686-w64-mingw32-cxx03.tar.gz/md5/dcef59aa056dcd56e6e36ad49174389f
+GMP.v6.2.1+1.i686-w64-mingw32-cxx03.tar.gz/sha512/3cf3096c325ae2baea8b3c3aed4a26d649dc2bb3cf0d979809d9962521422ada3fdcdddbcfc52b27d43b473a1d3ed4a40368cdeb16cac4d32718c604dbc9f388
+GMP.v6.2.1+1.i686-w64-mingw32-cxx11.tar.gz/md5/b772a602b016e73dfc9a93908f51622b
+GMP.v6.2.1+1.i686-w64-mingw32-cxx11.tar.gz/sha512/00e06591e2cc44100dca1a8897c72933bf4bd8c3c732daea99a9efa4d0a67f6a8820bf3e5d27583dfddc50d4cda656fa7462a2c453035d03657948f0051dc2fe
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx03.tar.gz/md5/b31c423855c4c5633b41301e3b424312
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/2565176e2bbcb9deab25a91736e8b6de01e7dca619ed1fcc98cebcaaa144eb03f89f4f6d5989aa8454b0d1c7266d1ace690e6deef67c0cf5c3fc1c2ab4d41b43
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx11.tar.gz/md5/1ed2494342b5713308f6ffed5fe3863d
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/c600802c81c77247a24a50ec0695f742177c8c9f090b4c345f9b0cd065b35183f49592a764cdb7b1b6d5ee3722e7dd26672d85db963d1e490731545a36d1e581
+GMP.v6.2.1+1.x86_64-apple-darwin.tar.gz/md5/51e00a2b55e9f81eb62abe23bb5f6fd9
+GMP.v6.2.1+1.x86_64-apple-darwin.tar.gz/sha512/91731427afd8df54b54d87b93006190a8b959438dc591eb5fa44724056911b8bd5588b2b1e70e9da3d8d6e9ce5aaa6fea66b0706f636cb56b3c860e8f3c0550a
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx03.tar.gz/md5/3f3a6f15e4e8499470bbe69a9ea885c1
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx03.tar.gz/sha512/2659344ab097cd9542a5946c127a43af6fad05aa1445d69a4978d1a6d9a9f0e0502a5a60c6ca88acccb86d038dd10f2a72a7c2d4dd7ad5383c7d687e9720cc88
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx11.tar.gz/md5/15ee858d8e1f07f18df8a893634d859e
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx11.tar.gz/sha512/9d8ffa570eb22a5a908679e06af4dd0ce8c06cf97ff9fd766baeca352a99bcc54b4b71b9c52829ba80043a688f2ed6a33b0302072518f2b16416235d5295ea00
+GMP.v6.2.1+1.x86_64-linux-musl-cxx03.tar.gz/md5/79078a236575994696e7328e34326243
+GMP.v6.2.1+1.x86_64-linux-musl-cxx03.tar.gz/sha512/d4b77a4056a2b0dcb6f789381fff720ab7481cc7edb4672756cb2057ed6475abeb6ea414e6cec3e2450ef7302b647d7d2fc2d9f7de52feddd7767548392e84bb
+GMP.v6.2.1+1.x86_64-linux-musl-cxx11.tar.gz/md5/94f822c7521f83652d87fd5f1ad8bb19
+GMP.v6.2.1+1.x86_64-linux-musl-cxx11.tar.gz/sha512/fa4f70f81524d47b65d5cf3ff5abe38a691f09e3297c62f0db2512483702b9af33bc4a3c15f6f1465d6dce4eeb19f665f29872e6dd7caea0806f4c7fd32c2c5a
+GMP.v6.2.1+1.x86_64-unknown-freebsd.tar.gz/md5/cdb93a733763e8a4fc29652fda8c8b13
+GMP.v6.2.1+1.x86_64-unknown-freebsd.tar.gz/sha512/ec529f57eb167bfcb367310b375a3cded007cbc386cab9b09faa9fe8f37a443302c674814ada6c82125ad0ce4aebecb75bb61633a21e7a3a00fc928fbe05cb4f
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx03.tar.gz/md5/8b5be9da6a0a293e14ab1d589a622b98
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx03.tar.gz/sha512/73287b8390cac2ce8afc4565c5218ac739ed8a23c56754f4667570039f022b777284aee25d7857a94ff46fd502ac0fabe46f509a5f870b1aa074f6ed1278dcf1
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx11.tar.gz/md5/11bcbfc3b65b19d73c3abf92ec46cb6a
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx11.tar.gz/sha512/1dd9a6fe5c4991483a2d46420cd892271d37d9d23c409ed782b7736ab1942cd6c42360efbc308b5684bd5f991c7a96e8d375f3e855dc537bb3089e3402eed110
+gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
+gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
index f03a3c6ce9a35..6c90c1b2115c7 100644
--- a/deps/checksums/libuv
+++ b/deps/checksums/libuv
@@ -1,34 +1,34 @@
-libuv-fb3e3364c33ae48c827f6b103e05c3f0e78b79a9.tar.gz/md5/dc93ae5119c8934f374570342ef036ed
-libuv-fb3e3364c33ae48c827f6b103e05c3f0e78b79a9.tar.gz/sha512/29947c236aef8931be4767df1cd8404ee9b036ee107b31cbce6fad9a97743df57d068b15bc4bd00320b9b81cd879258a9ec9dc675853e424ccdb8d6bdd226240
-LibUV.v2.0.1+2.aarch64-apple-darwin.tar.gz/md5/ed00585eb80fd82c014e2a431269ccec
-LibUV.v2.0.1+2.aarch64-apple-darwin.tar.gz/sha512/a98ffde4ff49a71699f798622c62b5f95d0dc010f1de88ad57ee437baa73cb25e263a8a6c4de86364fb31076993326d9bd0223db3e1ecf6904c1aa6e7e1f0120
-LibUV.v2.0.1+2.aarch64-linux-gnu.tar.gz/md5/3e75495795d5a4eee8ec9c1619a5caaa
-LibUV.v2.0.1+2.aarch64-linux-gnu.tar.gz/sha512/c0f1396ccc7784772d4c40f3a62d6bb22c6859a3258b07727348f436b7991a8f6d51ec46c09569f17a7bd600a321ab3b3cd59538d39c228cd3e205c33e755a51
-LibUV.v2.0.1+2.aarch64-linux-musl.tar.gz/md5/c2899ea791cfcd37ff85c1182330168e
-LibUV.v2.0.1+2.aarch64-linux-musl.tar.gz/sha512/b340ff4e28a5e566ee2640926265b2070acfcc4b3c87fc2e414e2b2a9ff23be852d92ff4f51e36e21de029f23bca5524e7e267ba091401b070f4d5cd9bd03c54
-LibUV.v2.0.1+2.armv6l-linux-gnueabihf.tar.gz/md5/6b6c080a88050051100af58a4e96f25d
-LibUV.v2.0.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/936a4f4baf21a0fe7492bc3fab3475f653824daa184030df764af1eb5e71d152aa5dd3449b1cf31f77a460169853d7371597056e641c440c7b3d4f5c6be1ce10
-LibUV.v2.0.1+2.armv6l-linux-musleabihf.tar.gz/md5/032010ad683931906d2467753cebea9e
-LibUV.v2.0.1+2.armv6l-linux-musleabihf.tar.gz/sha512/b8050662775d75e59b072c688ae44f7a3d3f54d114270902a825e01f4d74c8e131a3a75cd95e31b9ebf4488d64ff6170a67e5986e02e5fcb105bf5d3cc28706b
-LibUV.v2.0.1+2.armv7l-linux-gnueabihf.tar.gz/md5/e558bedc0b69d6575e43df0eec958ad9
-LibUV.v2.0.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/d3404b20b7e8e8fe935ca1e7da55823a6ff6703c822acf622638dc5c744bfefe1745e8e3a67054abc3aec0c10793ac46dbab29ccf7269d8a3a0d857e3a1a93e7
-LibUV.v2.0.1+2.armv7l-linux-musleabihf.tar.gz/md5/1aa605d9930ba63874483defb35a96ba
-LibUV.v2.0.1+2.armv7l-linux-musleabihf.tar.gz/sha512/927e710191e6b8d1c09054780b4af6336c6744ceb0885c7a5a0cec5e08bfb0d53ede75cc8bb145fda08f720d98a77e102a2903e08a0fef75a0b630631db6f35e
-LibUV.v2.0.1+2.i686-linux-gnu.tar.gz/md5/4ec3415ef12615581f8b26ec374a35bf
-LibUV.v2.0.1+2.i686-linux-gnu.tar.gz/sha512/fd37bb83cda297ec80332cdaed2a704ea43c3ec72fb539042ef09aa510275a0418c750278fca9e463bdecdca957f8457103f0be6eeae1017387141eb2b906694
-LibUV.v2.0.1+2.i686-linux-musl.tar.gz/md5/f24ea24837ef06be346d239cbb33ae7e
-LibUV.v2.0.1+2.i686-linux-musl.tar.gz/sha512/e9388568b20fa71d95e331a336aa3b17396e87d99aef6d752cb48416cdc9501e2ea887702a5765a22dcf6b5f7b730f5666ed3a639e7fe0113e9032f0d760b352
-LibUV.v2.0.1+2.i686-w64-mingw32.tar.gz/md5/890d1f7963a5dc927c15f8433b69dcf7
-LibUV.v2.0.1+2.i686-w64-mingw32.tar.gz/sha512/7e7d2b6405bbb1b62725a61d649fcbd53c2dcb65b8a6deea5a186717f88dbab4198a0f58d4223500aa991976725f8e1c4272ab29866174c5f555ba75a2e9b0ee
-LibUV.v2.0.1+2.powerpc64le-linux-gnu.tar.gz/md5/7fa0d0e9344f4a4c4d5075ec5d368b0a
-LibUV.v2.0.1+2.powerpc64le-linux-gnu.tar.gz/sha512/b5587e9e1072bc6becd5d1354294a3afcfda1c52e9a5f56387d43c7300369106059a2bac8669a919ce25d888b2302711c7433a82e366648935481568420daeef
-LibUV.v2.0.1+2.x86_64-apple-darwin.tar.gz/md5/716960539cbae1e38e1cf88c2670927d
-LibUV.v2.0.1+2.x86_64-apple-darwin.tar.gz/sha512/7b064d99428b312302c698e73e8a7919147c0522857a24e08d16144aea83429c5ac9526b6553697f28784457a5b417958fc5e4e28b4191861004dddc3f95566c
-LibUV.v2.0.1+2.x86_64-linux-gnu.tar.gz/md5/ff70887943a3fc68eddcb66ed941417e
-LibUV.v2.0.1+2.x86_64-linux-gnu.tar.gz/sha512/00610022d700dd6b33c97decea43490fcd4218fde2e57c0d6317abec046adf220fdf4d03f132938ec78af85653a5262d1344527c632c06aec53750710a6b317c
-LibUV.v2.0.1+2.x86_64-linux-musl.tar.gz/md5/a5834444d0b7e7d88cc87e5eb458bca3
-LibUV.v2.0.1+2.x86_64-linux-musl.tar.gz/sha512/e2e6e6726e8ef0962c35d7ff54a60b3370cd5b927fda8b4415e8d2f19b098ed9bd00e262eb18d11a73e2e27c88aefa72c3a6e9c193d27eab436c4d9d6531cd47
-LibUV.v2.0.1+2.x86_64-unknown-freebsd.tar.gz/md5/951d9da43208d2c48eb00c7ce300b4cf
-LibUV.v2.0.1+2.x86_64-unknown-freebsd.tar.gz/sha512/87e578f6cf34c9cc1c965f4958048967740b4ab530836aff33b3339c0d927beccf1f0c58f7e256c9ba98bf1fa0362186a24fcc5bb79ae1f149f86183b4b7f5c1
-LibUV.v2.0.1+2.x86_64-w64-mingw32.tar.gz/md5/4e9c2f078ed7b617a1aa447e1c44abbf
-LibUV.v2.0.1+2.x86_64-w64-mingw32.tar.gz/sha512/b3b14c5d447cd742cade43b56bf3867d530dd391c105ddbd7f2b9e0e26ee6a1f3e6fa11148a9ba1540fa598b155da3e56d369a96273a5ea1343b5c3cd4821953
+LibUV.v2.0.1+5.aarch64-apple-darwin.tar.gz/md5/54a94c839c561f5b74601d6d2bd5bf1e
+LibUV.v2.0.1+5.aarch64-apple-darwin.tar.gz/sha512/bba06826461a4f35abbe54ba5266d9bf354d22e1f33d75f4273a917ce92437432d8b2cc9d4b4670164c14542e896ee97396a1c34ce0f653d6a2787ab4b6160bb
+LibUV.v2.0.1+5.aarch64-linux-gnu.tar.gz/md5/b2680a3cebeb850bfec0df820e27072c
+LibUV.v2.0.1+5.aarch64-linux-gnu.tar.gz/sha512/9c5611ae653642ef0060c46235fa2d2e0e4094804fb52629456ae4e5deed7e5fcc88640537799d11d824b6c0c00e75fa2bbddc0206e69c587ae3a77b68e11366
+LibUV.v2.0.1+5.aarch64-linux-musl.tar.gz/md5/a50cea6c75ea4093851cd7420168a59e
+LibUV.v2.0.1+5.aarch64-linux-musl.tar.gz/sha512/51ed9be7dec0546cba4822eb116188c15c464ef155df03f0d5d8e9431ba8fe4c23dffde33c3331ef6e7ef3f8135b025fe26b01f036ab193aa340020f9d3bcb6e
+LibUV.v2.0.1+5.armv6l-linux-gnueabihf.tar.gz/md5/1b6750b5c85c5f456a448325a77bee06
+LibUV.v2.0.1+5.armv6l-linux-gnueabihf.tar.gz/sha512/06decd104aad78de07101576fab5c0200867c332d12f1cb0cbe8c558c0c2c84c918e5772fbfc62f6ce80437ad68ae97e3d180c97dd40383c80d5e81fee96ecd7
+LibUV.v2.0.1+5.armv6l-linux-musleabihf.tar.gz/md5/54e9820e027e97af7f324d7b5c12fee1
+LibUV.v2.0.1+5.armv6l-linux-musleabihf.tar.gz/sha512/a30353cbf74bf698e38fd357e57fec03345a4ce71e971d9eb034aa211b536dc83b994da533df914a65ba3f5babc7ab66423ed12da665b67c050a8e799cdeada6
+LibUV.v2.0.1+5.armv7l-linux-gnueabihf.tar.gz/md5/252f5fc6d094edea5faef71630f4ba83
+LibUV.v2.0.1+5.armv7l-linux-gnueabihf.tar.gz/sha512/79ebe1e57cefa243219525fdebad35765736534a4b036f2487d6dfa0376a685c8e9f16259bbce83155baebe5ceeeff2592933b597ceafa724060ffd4dd63b0c4
+LibUV.v2.0.1+5.armv7l-linux-musleabihf.tar.gz/md5/39bc81ad36519ee9261a662d444c13b4
+LibUV.v2.0.1+5.armv7l-linux-musleabihf.tar.gz/sha512/97a312f2a42a2377458ff5d5356905fb469c9c30f9ae3fa7d091c7e2cdab3a7ea813e1142fb7d08f2e0000a3d8388fb5fe0d82d3ff646310924439ba99f02903
+LibUV.v2.0.1+5.i686-linux-gnu.tar.gz/md5/ca4b4a317b62cd48f4277bba5ebb9b80
+LibUV.v2.0.1+5.i686-linux-gnu.tar.gz/sha512/2cf17359c976b10a2e0e08d92b43ef2d113a0071748209ad6b2896d9578cb3e96b55f7c72a7c7243ded244b95945c67ea3aa248c1513b5fd37ea714154e04c2d
+LibUV.v2.0.1+5.i686-linux-musl.tar.gz/md5/7f088f43c6ae4029e9d90c2881cf2509
+LibUV.v2.0.1+5.i686-linux-musl.tar.gz/sha512/b3653bd4cd95b2d4247b4b83215bfb756e211a3cc02e7e7ca1887e820cb1a7d461397d7259057b63e51825dc344e2f20e904d17defeba59584ddc54df94f1ccc
+LibUV.v2.0.1+5.i686-w64-mingw32.tar.gz/md5/8ec8f225a708ebb95fd6dbe6039c386d
+LibUV.v2.0.1+5.i686-w64-mingw32.tar.gz/sha512/fd9575300a65af9b7c3a59451646a5f617fd9df0fcae21db02f0f1e9c689605b1e75d12f0ee46654cb8d2b44ac044d2b44b34f9c6d008c19d41b001a69e40c6e
+LibUV.v2.0.1+5.powerpc64le-linux-gnu.tar.gz/md5/54c51f81a0b69687f0cbfce63b530991
+LibUV.v2.0.1+5.powerpc64le-linux-gnu.tar.gz/sha512/79a9daa826432da8f389bbb6788720f0bdf0e6a09a16b8296f0ead8e0eae175a72a0690e4ffa5e5d8169e22f596a8ad41607eb836d3f55b217bcf74885e707e0
+LibUV.v2.0.1+5.x86_64-apple-darwin.tar.gz/md5/9ea7e5bf6107f0773e7cdb875d831939
+LibUV.v2.0.1+5.x86_64-apple-darwin.tar.gz/sha512/07b5137c94adaf1c024373b27c2a2a0e77b20cc87f536551e6080b59bd47f65d6ccaaf40ec14068e9e24140c07ad518ef749c09d93fcc36b0507c4ed6acc7032
+LibUV.v2.0.1+5.x86_64-linux-gnu.tar.gz/md5/c4feae1cb61b43ab38b8adb80f8cb46f
+LibUV.v2.0.1+5.x86_64-linux-gnu.tar.gz/sha512/cef015385abca586215796c7d2420a4b2496b8a50a62bd9c483d76bb00adb4e3decefe17ba8398353166818bb23b758d3bdb311965849ea68f8b68377c1b08bc
+LibUV.v2.0.1+5.x86_64-linux-musl.tar.gz/md5/47f23d12e6c2094604f168c6c40ca131
+LibUV.v2.0.1+5.x86_64-linux-musl.tar.gz/sha512/abe0d74ceabc2d7efc80c1e8d0a6938205bea883257c43a637fc739c82a7085d4f0109c22d0f67e332aa14bed60433dd739676e0237fd28aba6a15c82d3e41f4
+LibUV.v2.0.1+5.x86_64-unknown-freebsd.tar.gz/md5/6a6eeb9108db8a30f776685d4f98a853
+LibUV.v2.0.1+5.x86_64-unknown-freebsd.tar.gz/sha512/e08961cfeb904145b67c2833e6ea3f91b90bc9c8948cfd61399c7d10b1a9cffe17728a6c906a9d791b71da406d8012014b7dcde70ed445084d21e99563cdd377
+LibUV.v2.0.1+5.x86_64-w64-mingw32.tar.gz/md5/7d592fefa8b295e09b4640bd999aa358
+LibUV.v2.0.1+5.x86_64-w64-mingw32.tar.gz/sha512/b4e738c5d86ad27171289f284e35124c6bcf94fc55512622563c6be75027de5033672100008e283aced530c71a6bb1da038872719e1073566d5979278ea76e0b
+libuv-3a63bf71de62c64097989254e4f03212e3bf5fc8.tar.gz/md5/a385b594c170085018bc954e50cb42cc
+libuv-3a63bf71de62c64097989254e4f03212e3bf5fc8.tar.gz/sha512/5415e992a20498ae29c09bfdb4819857d15be83367488e9fbd8c5f6a460da4cd2d0dff7eaa6087a4bcf6dee6d1c873acbe5751f5594851c978456665d6a21cf9
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 71b445c82f00c..4278c6e75c7ff 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,242 +1,234 @@
-libLLVM_assert.v12.0.0+1.aarch64-apple-darwin.tar.gz/md5/9b6a02cdb753464998c2108401321c8c
-libLLVM_assert.v12.0.0+1.aarch64-apple-darwin.tar.gz/sha512/4986a37bdfebb17a2c70f3e24dc104a2b9c48fda5149e1e8f0df1b7139689e09bc6f5438f34e3fb6ca1ccbae57792959e2ca6fe66b267a2081df6a999cc11d35
-libLLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/md5/4e6d298d5680c3a063739f508561e357
-libLLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/sha512/c70ef95a4e3c571b119f8d623b17d98591d268f7242c024e33b87ff80dc6cf96a7589c5e4e528d4f3b54fb8c7b1b6653873b4ec7a94b297524b9fda31dded006
-libLLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/md5/941d910dbfa4f7339c942f547596daa3
-libLLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/sha512/486a01f9c8f632b6f84d802b00c443cf9a5fbcca61ea7cc392c9211c71243e39f50ae8812dde7f5347965e0aa9a8dbd73a4988d780ae77dfa04fe329359c50dd
-libLLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/md5/ab8f2a3167d192a933c86214c275d1e9
-libLLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/sha512/fec5e2ea1398c226cc9a3c31c5041f9b4e111793039f7cc373ff47e86e72685a4cc254ef3d42db7cee239a7422f56fdba513f572f5156f8dd6c286c799547636
-libLLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/md5/dce59160064ef20720e2b97e903dc829
-libLLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/sha512/704d2007d0074a87a2c780e6b9f38117cee75a9e3e7a84edb9915962d7d5dc92453dc8ad3f2a73404173b7cdce5759f7764011452b9dd0da71053fa87bf89be4
-libLLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/e8e483bea9effe1a43cb14eb32d2edce
-libLLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/709e10ae71641ac285380a73457988df8745d9baaa1584ef67871daa42ef16b568aa93633f774a9d4541ed1cefe49c7614b295f16da6fab91e493a2a58ff7143
-libLLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/2ef8f91e7df587e2bd8c73140c29d771
-libLLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/6b16d86a6636b070924d251581ec01df759197e642f8703111869c2510194b40e3dd6d56e605208d9e4d1a0fbe96709d56a9d065f3111979c989cda9c44093f4
-libLLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/md5/7ae9c70790aca9cc9ed2dc6504f44348
-libLLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/4d5aaedf590343bb788d6a8cfd714d3d659a82e44c411d93bcd8f524bd0563dc8ac2ddfafe87b2c762e7d9f22a1bc6cf7a331343738e7a38c246f41a8bf24d64
-libLLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/md5/2be0d9c87322bf63d3081e434b3b5eda
-libLLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/45bc193f86093e479710455ac7bbd4e853537e2d22946a4c30d942a3f2f1946b2156fd80282f05a3fc73a65835d06d372b76ce90685cb4f1ca31606d2481b1d6
-libLLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/7009d45f00f0ce67ffca15a59ae0b682
-libLLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/5030c05e5b91d1fcaa866522ebabe0f112493fae9aa64cd3a003266939093bb422320f2cad31c19ee1989f5beb84781b1b54090b6a4c2a8fda3e80809ae65160
-libLLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/cd92d0ea3f9aa34c834f6ba9c5d6b199
-libLLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/b79e5f42890712e2ced99e80e1827aef0824568c34f029ed94142a29b925375a8e85d305411e42d05bee1959863906f1958485b3fb982b022cf567871005e20e
-libLLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/md5/406cc9431dde64b98b7c2cf0d44b8528
-libLLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/426a99f9b4eac3835de4a1ae3f49a23c342f773af5a3c4c6354cb27a120f8d83513ac9dbdcd1040b018ffdf6f5092de66fb37a8eacb07b9eed67a1f6da411fe1
-libLLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/md5/c6d6c5e91e33fa326763034b58c9c306
-libLLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/7b2557c766371fc12172ac6469624fd7452e2be49c0ee4e0658018e1e779122d89cdb81fddf8df1374ad17286b96e299320d076b2527137680423bcd3a88e5ae
-libLLVM_assert.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/md5/8f3acc592c604ea386488a28c12a69f6
-libLLVM_assert.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/sha512/049d11ea4d44a60037c25c8893a336358f2422a4ed7d350f0212736b040b7af2d50fb80681d20ff58c220f9bc4559cc2b70ead1efa923cd0a36b12f3673f343d
-libLLVM_assert.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/md5/9727f0bd0663847095a8f4574a87c318
-libLLVM_assert.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/sha512/0d3a344b37cc4345d31612964f95674b18d2d41dfb6314d12c88290772bc9ed5d15dd90c2e8940710c1accefa368046c626d12c42297de162e24f08653ef2a6b
-libLLVM_assert.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/md5/9bb55b1d0e0e52443296b96b2117b6cd
-libLLVM_assert.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/sha512/02450ebbd012aeac1e86ce22ce699dc0ebe75748335ab37ef854d3ff27d08e5762a1addf400e51aa26dda44f16c93e7b16199cc6d8e4019736bb0138044693cf
-libLLVM_assert.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/md5/f7bc96ec83a87233c3eaab79f117f9b3
-libLLVM_assert.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/sha512/c398b9684ea06e1cf4595c30eea69bce0623f858647ff686a6592d6d062479cd1ae4bb01367f7ef156ed75457454e6f4df7bcaceaa2f3f50ebdbfe2777f326d0
-libLLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/md5/b495bf59b53148178937ead404f887f2
-libLLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/sha512/ec5e2316b846e436024c16aec6a271f23ad7879119660488509e3a88e8012e1b1592f492696d645b7ec5b2d710c9524100a186df836ff03ed9b8728b009a381f
-libLLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/md5/57e237b29d01266c97c2dc716f8648a7
-libLLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/sha512/742eeba173b2ad95b8ac9ba3b21afc49a081aee6ed12607d1e17e648310372dc95c5f839cac8db74a49a0eed8b82700922176b55a57b3dc9ee4a820da9816283
-libLLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/md5/5d1c3f5cbf94df631b7f1a73a0a2063a
-libLLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/8072be35d0ab07d853815ccc84013f76895603297bb4f4b77b22fe538467b273a6e6918486db8170f7e2ba2507b5e36ef42f9f321c797ac5e871618d3372ce66
-libLLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/md5/5d1ea307a7481ca0393f88f3929de53b
-libLLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/c62e3f7807390144f65cf9081b3f6710c1022f260a2ffda0d7e72583224cac638e87655d472de2490ce465279d79edea18a6fea99b6afbbdd9a427ef5d6439ad
-libLLVM_assert.v12.0.0+1.x86_64-apple-darwin.tar.gz/md5/f7764a7eb7bb79439bcc8a853dff5b22
-libLLVM_assert.v12.0.0+1.x86_64-apple-darwin.tar.gz/sha512/b1cdc7f1078eb07e2d37482c96b14c288f6c0d5c0860854e02c3c1a407fcb66d4f03abccf1b74b7ee9a6eb503b3c60e67e7ed75ece765b385f17c6f502a5d489
-libLLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/md5/99a5965cc849e3dd79878a7c6b29b6df
-libLLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/sha512/9b138f955de9266ff364022f53b6b8641670a8221e0bb6b61d5bbc9151e8c4ce08c61fcb90d6572c51853723f53c6b6afebc5c4e17e6e33e39590c53c5206f91
-libLLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/md5/2a27b02a388679164b58dbd853f5145a
-libLLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/sha512/febee5224a7e4499509105c59365eca340526e002f12395bfb2a31c944fdaf89345fec971c67874356c8235abd14bdff3d7485c13bb4af944105dbb43b2d665c
-libLLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/md5/f23b627174133dd8e94733c6bef0ac89
-libLLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/sha512/b7b2fdc4d154b2e1aa805a98d6b15c6a88a1dd59812dba1030ca0628192b993c965ccc7e0aefb723ce6ce8ecd3a6788fdd0cdcdc5c966bef55f67834b056e52b
-libLLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/md5/0dbab594b2066ffdcf4c1fe47612cab1
-libLLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/sha512/86e4d06a6c52d04175d8751cd84cfe1896ac66614aa92c7d60674a3ba49aa6a68e9700cc8150abecc7acdae4786d9899eb052a015a902a2459858156b0ffc2cd
-libLLVM_assert.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/md5/5b39f69421ab3c5a234436a7b8430c82
-libLLVM_assert.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/sha512/2dc735103c4c4eed676a149ddb8d4959ed8c6c00698683b5213d22661d5a1c079fdaf2c21196bf5d4e3841692bc09dfc4c8b723d808b33b94bb06a407a8ad809
-libLLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/md5/21bd6a1e696b015a880efd795fa0e136
-libLLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/sha512/218bf452d5336d1ed69879b33f50f92f0118fefe21366733746cf50d6accd7b0ce71659233470f1f3eff26180a5245550b2c53b2678ded317bfd0a5c2a4fc2df
-libLLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/md5/cdce26125c4f37ee3c9f92cdd7598da5
-libLLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/sha512/9bed9f8b1fc7f94f67aa5d2496a5081ab594d3210b93e5ee1f71e32bb053e87229afd783d34c8d048afbd396451c75c82b7c7d5f60d018b7325f703687eaa07f
-libLLVM.v12.0.0+1.aarch64-apple-darwin.tar.gz/md5/4ac10ba89a464473457448f2f697abc0
-libLLVM.v12.0.0+1.aarch64-apple-darwin.tar.gz/sha512/ee80c94d8a4c89910d170a9781f72bc53e0b3e3b853157d0830b16626ba7d188d4a3c1c7b723c4c4d428676e4f8c3120928dc7b59a5f950d096d25b5d488ed31
-libLLVM.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/md5/95500f6f9eef53d670fd77a6949d43b0
-libLLVM.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/sha512/e3099e8587aa36785fc48d05691236c62c7121c953f645ac5fc7d9068127c8bc7e3aa48f0a1ec8a994c3729f75ae3bf155baaa4f1c7e761860e38d7f89d29117
-libLLVM.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/md5/a900d9889fc22a19e005e550f0154709
-libLLVM.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/sha512/acc1bc5338d8ded0898818fac5b3a1e0528df3902ae9e4062ce2db3a769ead7168ee152262ad98cfada19941f9c49a72fad7e356d1b7f046bf0a05db83af3f9d
-libLLVM.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/md5/4d62b2d7e0de328a8f1d497acd9dec4c
-libLLVM.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/sha512/6c199d25f038bb619e8c7449d65dd61be4d264109acfe5e694881bd7ae7d207cc714bf3350442c27a73a0d3702a5ab386c90ac6c7147b3b0b7de862f28584e74
-libLLVM.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/md5/52af995cd60019b0fc607f7d89d1db73
-libLLVM.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/sha512/98c412b6ef5d12f6a0fbf2b9a848a3e26ec98f9eaafa9c7048d52164df69716aa2ce172e546c56627d208e9aff03f1fd0b011eeed842cbc92280bda5c42e1e5c
-libLLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/ba0a2f3e9ba09b2081062437d12c3b58
-libLLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/a4f5a47a876b660c1d08805ce296ae59ec65dfa89334542cbbdd696909b2be8697b104006f365ea404c2967928dd5580bd2c7d9a03f3894952742456f450415a
-libLLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/d2c4da0530fea40e082404886e86caf7
-libLLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/a66ec214d80023110e281b2029d9dc8ad61a1a3be9bba96ed7d5bbfcacba76d7359c152982036d317d383a390e27a6eea969d6bd5fc50ef7b879dccac5dd320c
-libLLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/md5/abd5d1cd535566f61ff0138ca852b771
-libLLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/f247d4127798576e0a39cb2b78c359c30679b2f5ade08a39973989fbd72387a06ef35b3421c4ebde564240776a4be9b3a24f78eebc22ea192e72941558264093
-libLLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/md5/abbe7e449a3a0d78ba320b0cf9b98534
-libLLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/8b2b50d972d76b18938856546f7fa13b243cffb348f64f0c528668c1ad6e28bb03f3af05fa5af8ca01a4cb12062719665fdce6018c6141ae7bdeef1cb876351a
-libLLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/a5d0b33472dc32278cacc0b5904aa569
-libLLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/501f6a43cd75f33056723781becc40fa1babab2a0e720700bcf4708782cecff20140d753141de8a2bf06fa8d97360b9e7533a3947a5eb39453fca5ba6d94fd15
-libLLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/b9f62938d21eef15bf58b799d06f049e
-libLLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/6430c18e13c7f97c2b12594d6ec0afee9ffc8fdd0e1608a40df79345932b9739bfd8f7d15b494bd576784b12b4a1400c9fdaaeb350f74bf2f15a3dd66bea4d4c
-libLLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/md5/c87fd44a812346a6c471a7992c1898fb
-libLLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/f00d6caafc3298ac8fd7ecff937e453c97e41167dc2e0f43abf83cf6b899380d1db70e74245ebcb1bba8022e0f86d30dfae214005dc6b02232b0bba6045ddbca
-libLLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/md5/260f3e0ed2bcea0122f4c104cf115661
-libLLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/3a853ca386e8cf578d92ec55c824e0e1b6ef8551dfbd4bd042f81e3063fc9a1bd25a3a3b182444e178e34c4a477bc4293a3b4ab1eea1915f9c063c396c548ee4
-libLLVM.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/md5/eedcb1470ba914c6df43d41aab2f336e
-libLLVM.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/sha512/4634c7bdcb1a7ec31d7f629da1418dbf7c2da8ef35c59daa8acde5ddfb04acf617e60368c1ddf21d7ce3684f0d091471264107a3674ac7cb8eccf05e8d7ef82f
-libLLVM.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/md5/26854ac7b1d8b5cc516310c6f0603e28
-libLLVM.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/sha512/2579b123cbfd7f306747ea0eb9830228b330c2443153dcee3d97388465c974cc1b67ad729a5be8519c31e90a85221c03c56dc244a202a190cc9b54b1e9b4bf09
-libLLVM.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/md5/844eb964e50bdcca7c7c046bc3f7be99
-libLLVM.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/sha512/da0f995f96f060933f828a38c1d1b586ee5ec9a97f22626fb6297d1da7c9be4a79fb17b5207f51e96b0454c9684bd7c371a0f43585e135ccc28d873ca9215649
-libLLVM.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/md5/b3ab85c5a4d2aee0aa07d75e49d35f88
-libLLVM.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/sha512/b1656422f172685ebd8ef090ecf4c06f7364612f4b195820d2782d5252dc965322b66ae3f0d42817685a12ba2460e66ea201bf2bf5e48199156c4f124862097f
-libLLVM.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/md5/619ef68aad7d051b32777551b6465432
-libLLVM.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/sha512/fdedb19322bd216d6aa4908afbc99b0789424c6b27d6be7769c9b1b80e35ae04b3da290d7a5002bf95bee725684d5c8303b3fb762807cde5b5451b5a107a6e5d
-libLLVM.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/md5/9a62570aaafc78a52bfd0799ef27d38e
-libLLVM.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/sha512/3b629bb4611f6ec388c4ffd03f2456780f41528f6f99446f2a0adb35c1953921641d2fffed5224817dfe76a6faad832a11267e57b01469959242ab34d1b1823f
-libLLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/md5/fea7ed9b9d20e76d2fe31ca3e5cfcd3a
-libLLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/5448b21d8746f15c8c496b2123963c4bab271770003bad45150e93d6dd69651b604720bfadd5fe95a9034cf5bceb41c2f85efb0bd3420bc2ff0c0e100a51f10b
-libLLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/md5/04700344e64312cf3d8e0cac982e8a33
-libLLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/f89b350716508a9278ebc64cadc6cac0f7bcf76e966facfa6e8713103554806d8d788d2a8bc8ad7f8c21d5ddb3e943fa11db9cf51aa24f92302d4f592fa5a601
-libLLVM.v12.0.0+1.x86_64-apple-darwin.tar.gz/md5/37dd7c11c6533af26774c8d60f47b944
-libLLVM.v12.0.0+1.x86_64-apple-darwin.tar.gz/sha512/793b04f255546fc76158b21c1e8018700416341199c2681a14a3bd28f9799fe2e7bab40b05421e6bbd9bd08608af3db705e14d1fa52f213821a8a80c69d2ccdb
-libLLVM.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/md5/88b02af758c3373c026f6c727431a1aa
-libLLVM.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/sha512/eb29edd8b35417adcbd400fa5c0327d5c03ca014e6699330d08488c4d83d10aeeb21b60acfc51f5fc05c9fe46a048edd12702790cbd731db653d06b1746e5650
-libLLVM.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/md5/c3c759d8706cc339077d2f084f36d6c7
-libLLVM.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/sha512/595edb80d42b47835ae435526711ec3fae39d12fcad28e1c2abdfb3dd414f71c7ac3e1ac5ac16c25e7e1c0885c6591358ed066d9be0d701c17d155fbc0f9f701
-libLLVM.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/md5/5e297c6897cf1bc9e0d5627a31295473
-libLLVM.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/sha512/3a18eb2fb891071516fec4340809eb511369f70674c96db02558ea07f3819680f4f63cf20fd57a76293ed9c18746b3c2572cbd1b1621721142e7c4526294baf8
-libLLVM.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/md5/a3f0facc8ffe1a806f38a7441e307be5
-libLLVM.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/sha512/751a55ac5ecbe328097998c6a9c57198cf326bd5bbbadcdfc3abdcdbf6802b4a039df8a6a5020fea911fabff7eb113207e87d4fa1a7debef59832d5aa9e10846
-libLLVM.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/md5/2728fc4d132e5f6e955ca684c75dc021
-libLLVM.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/sha512/64b28ec60e564d815f3554f363e70770cec9936264ec5c3022a422acfff836aafce0d7bd3a83819735fec203f62af43e84f8770b89784f3a8e71890d1bc900a3
-libLLVM.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/md5/82abd763588375fa58700085edf4d1b8
-libLLVM.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/sha512/89de803c913967c087851197d18f2ae9d6ea10c653b3988ab77edd1a90b94073fe2477f4291f10fb9496dd5c77c952e7358f68788d487989fb164001b6522d41
-libLLVM.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/md5/6805ae4e8276b1e774decc8d755b518a
-libLLVM.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/sha512/002bf892daa13ce1200996035ad7159d90c5f5974989c5c20a4ebc74c0f82553a2d0198459dc4ee7f7363cc3d10e5b11bf7088bd1288af9fac0cce707de8452c
-llvm-12.0.0.src.tar.xz/md5/ceab21c9081e122a88d82216a80d0dc0
-llvm-12.0.0.src.tar.xz/sha512/ec17153ef774a1e08085763bda7d0dfce6802fbaa17e89831695ce1b2eb015a6c2aebfaa9fe7985a83b9c51bd75d40bb4f1fc706dc16d4c0dc2b2722a1d8a24e
-LLVM_assert.v12.0.0+1.aarch64-apple-darwin.tar.gz/md5/a6ac9c9c1831bf01edf1bcb526e7c62d
-LLVM_assert.v12.0.0+1.aarch64-apple-darwin.tar.gz/sha512/4473ba37065329cf91192c0496136f161030da03a9a7e72106f41403e5b49debe7f0c203839f7deac7ca0b61b3d136404bf442b4f044f10e636a994e34994a40
-LLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/md5/0ccdf2fadf185c90e1938d946a6542e3
-LLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/sha512/d35e314b2928ce16b0369c1b7b38682d42d1443e0ca72f77e1b32d2231af5dc5a66eaee1881832ade8777566a0447319805e8fc1f2b702b19097240ac36da1fb
-LLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/md5/3acde3cd87feb3984f45f0ef323002e1
-LLVM_assert.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/sha512/be8d40794a5eee7ea775e33c8173deb3a775e5eaebfdc20341ad66a6d12d48692e0838e872245c74a8bea46ca085f8f3e7995f6ba33df2444fdf79d251ee272b
-LLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/md5/760edd92d241365409c59ca9ba46ce6c
-LLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/sha512/45b52ec2f516a7098f89f6ca6027d677329780ce61886821478d6889b1b1624d7f43e6f23daa2037c4fdbb25a71f97ef5a1ad9e0574760f0f304a2eb01cf6793
-LLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/md5/907174cef57551c44825034487417b31
-LLVM_assert.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/sha512/5abac8be17a83108ce3bf0c6d48342cc5db506049623c35859045758a3c31829bf388fc9c6ecc2ad1438e464930d9e1870cadc895eb7dbc72bdafd04666f60d7
-LLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/f90b16fffb796299a4e061d302a9855d
-LLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/cc3f019f88f167ee45638e3bf23f04ec8e4b021000e53342189bd75a2145f99523aa501e60457fc5cbbf5d35e1f8fd17a97fa9b18ebed5a04c2366333094e440
-LLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/3aa7211a765fe31a56d48151b7ec820d
-LLVM_assert.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/1e4e6a1a91ff1093b6441b43e50dd2cffa14d8cacecb6d812fde94164961e44569c17d74c65feccd613dee14311478bda112088d245e89bb307a3453e4d4d378
-LLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/md5/0155f81732916dcad2aa40b14d9db752
-LLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/7521fb007a0d589c9e70f7434afdc9e12867b6f82e6b7bc4c1f59fa337149648d6ea0367ae9dc7bb88840f6db05734d921f8da86633776a48529a0d6838feeee
-LLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/md5/90095ad6a9506c7e83b195becc9f9cf4
-LLVM_assert.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/15d34c652513a25d967d2721b4ab196f570fa80e02f17a905240ae838aafeb4fc88c99810c26558babf35ef2b478b067126d76f1e4c9366a143b139640a14939
-LLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/0384e43e079b05f8cc2bbc23012794be
-LLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/c599404cfc5b78342ce8f4790b6f9b1d7e7cc0e26e674040d1538d676e11c1c95734bfec7a7869a5eeef20641ac3c4b510bc40f2c6a1ef40dfd196d975c0d596
-LLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/7d1c3fd8fe1458ad5e8996dd282087e8
-LLVM_assert.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/c5b378c395be0339639baf8f10918ca2c6eed59a8fa8a34601e26c2433c6b543e92f71b73ceda254fa41f5b73796062758df26fe46335882c70c31c97ec128b5
-LLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/md5/6b42ab461336ea90274db8d059b7eec6
-LLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/3cb43d785825705567cb2b557a19486c75810d223e8ae828bfa9502de38f4bf76a3913634402529bda9f4a7525cc04fc33ab2a152c5dabb5adbbfd2497ade3fd
-LLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/md5/8fa56521559c7bc15c52e3282b75e377
-LLVM_assert.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/9632ff9a34cb92527eda6b3904f3444dfc79577a30d047062d1a1320fb0b91fa8bd2ff061ef075b2a7340db330800e9ca14f26090ede82707e92f6bb449be3ad
-LLVM_assert.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/md5/b93efe32fa26b868a37b698d27aff94b
-LLVM_assert.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/sha512/fc3d05bea5f40ae893585314f6f7fbf92eebd58676489760c8dc3e5a7bba1963fc251a87d685c3fa1922f9d5e63cb59645b2126b3746258bc01acaeeb425796a
-LLVM_assert.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/md5/01cb79e3c4d4d198f53fe0f79223ad03
-LLVM_assert.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/sha512/8620f2339e270c3cb5f455cc95acaa11bd9539475e64786c1869cd8582442812e732b903cda89a06bca8abf2aad3ff111f320fdd54e52d96ece6d57b35da3678
-LLVM_assert.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/md5/84948b34ac2d1a3d3e5011df423070a9
-LLVM_assert.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/sha512/df02613ed334d9d1d59daeb99e346e824e2cb0a35f092be5521e520ef0255c90a1d80b3b65db05162be2ae9707b7655da048f958b3dbcb7b57302a4105a57ada
-LLVM_assert.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/md5/d2e047b04a3a7aa8006b310f105f6503
-LLVM_assert.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/sha512/3f574b15d3341e2897986a1cab9a883582c1eff594b4d952550ac4753b2ee795f8f2c6947cfd8dc1ee2dd2a5ed4414a11675dd86f53088274aa6d5c71111dd32
-LLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/md5/1bc857bdf2d143fdf808a956ca528f05
-LLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/sha512/0c2357fa990b20f247487a22a2d73289b7288a23269d7212c4066dd86ba02a18eff59be86e77b6cd50cd20ef05cc50bba8ed7fb7cddcae7c61603ff556e01fd0
-LLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/md5/baed06cea973e29804f05ed77c6974fa
-LLVM_assert.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/sha512/b119901a524211cab7fd5eff962722fe5768975775267134094990a682d68779dafc9a9fba0b011e6927fca65e2cd52bee3e80fc505eb9571f32ccf41dbc1bb4
-LLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/md5/be94db30f358856a1e246be7355be7cf
-LLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/de63c33dec3440626315457a93a860230cce3c8cb0bd967459ffc12af9bd0684fdf3c81edf7440ce2dcd02367dd3e5679238c588ca52daa870308bb6b3e56fce
-LLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/md5/df5844b2dd49fe0eb4eac58fb22b7f8d
-LLVM_assert.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/ef4a99aa6957d11aaa8a94027f6a7c2b7800f9b8f3fd8f7a7e9762383ac0064245e695c7040d9189a47ad7f95b1a0760fa6b655aa0f44e95d7ba684b9ccf174e
-LLVM_assert.v12.0.0+1.x86_64-apple-darwin.tar.gz/md5/bdb347aa9b67ce6d24aba48d34299f0b
-LLVM_assert.v12.0.0+1.x86_64-apple-darwin.tar.gz/sha512/527164a4db3e146f72eee0f2972209cf9eb705c3d614f0e505e0b503ef2a0e7986eeacdbf3e1197631454c90bfdc6f55dfc80c85a1537c3791ea0360610ffe19
-LLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/md5/a900923c8800c1860d5c98f9172b4c75
-LLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/sha512/04732d78b628b8fb06b2368d1ed48d21a3aefd5def2566f7fa17ab279b99e9fb0c0cb7f1d65933cbc6ff473c4e566034066666838644f9792eaa1d4ed0a8122e
-LLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/md5/7669055a5e43049fc14282d9a37bd39d
-LLVM_assert.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/sha512/71a8daba8baae164a6af16353d49a3f5bcb4b41714813659d8d488d7c8d26ffd8fbec92c6b53b98f05a4234224df81eebe7006e0ec5567518d7b42fa285be1a3
-LLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/md5/1c38ee7ccb6dcbe22698f2eb08ffa627
-LLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/sha512/dcd059ac9121f59507391657baf4c8eaaea7f531adc0ca02ea688d09643ada9e10a74e6441755014914f3f1764b90f99299ed4a3af5b234ec64cb5ed84934cb0
-LLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/md5/a98e8304dd300af4b14211fd5f5c8a68
-LLVM_assert.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/sha512/2a4a567d681a42cb6b05059dd68646f85841ba431eecfa36d0b8d0d0a9290d3aa9a628317050711e6c3a599e48b77d1988cfa9e4429b07e9f371741aa8d6f841
-LLVM_assert.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/md5/9c92e5737faf62609844983f1603f75f
-LLVM_assert.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/sha512/dfe1f1836d74693f4061a07d834f622c6c9142ce8c949c07a5ca9a0976feecc67a483edcb1ff4dfab04fd54f063c0890655e855f55734d81fa3d595fcfc23109
-LLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/md5/578f4ab4fd290342f3290e5b182af9c4
-LLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/sha512/f8d23c8b994f8e503d041322dbd68b0d5682d8939480afab620ee861799000ba91571f59832f148dcd97cb0ab2bac9691eaa83107b98b8dbdf99569bc1b77432
-LLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/md5/818b94c89b0af9f9e1dbfc98db278d03
-LLVM_assert.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/sha512/672853d70cd25b13249479c1c831ba121e164a1aefbf46d6b9647501bf746b0925538bf126c552317482b9729fc9653800246ff25206775f5f5c6fb4196bffe9
-LLVM_full_assert.v9.0.1-1.x86_64-linux-gnu-cxx11.tar.gz/md5/1faea42ae449b43c9f3119ccc0c1b375
-LLVM_full_assert.v9.0.1-1.x86_64-linux-gnu-cxx11.tar.gz/sha512/c99baba5a831405d4ad4d8636eefbc260fcffcb8026fa1537cf63dc79d71dea7e57b52acf6815cbbc0aec8ae8c9aadfb1349791b0b31cfa8418529faa494195e
-LLVM_full.v10.0.1-0.i686-linux-gnu-cxx03.tar.gz/md5/68d90f435664d4fef450eff70539dca9
-LLVM_full.v10.0.1-0.i686-linux-gnu-cxx03.tar.gz/sha512/65b07bc5f6145b063d7bc8d35fd22a2d578cd972c74b08ec827b9980e4ceb1fd77ce5ba921cd0d907643594fe73d67a7b56c699d3e64f9ac6b2b6953972a4179
-LLVM_full.v10.0.1-0.i686-linux-gnu-cxx11.tar.gz/md5/2128286984b014f41e1449e884a8f7c4
-LLVM_full.v10.0.1-0.i686-linux-gnu-cxx11.tar.gz/sha512/d8e99a78e29a7eec99bf5924c8f095b82fd7b020c19f8f41401eec514e468c48e489fd047401d751c8273f264fafd13964d1dd160d2be1e19d50f48133a7da9b
-LLVM_full.v10.0.1-0.x86_64-linux-gnu-cxx11.tar.gz/md5/7718b789700c0f6f5329efabf6b0bfd0
-LLVM_full.v10.0.1-0.x86_64-linux-gnu-cxx11.tar.gz/sha512/94d6a951e5f6d6b9a2399a80b9cae5a52d37145956e4752f0bf2fb9b8b9e43e16987ed3833126efacef32831a200925f040fc2eeaf7b1f83817ed17954a993c2
-LLVM.v12.0.0+1.aarch64-apple-darwin.tar.gz/md5/e438402bcff0a77f20510badbb1f3c0f
-LLVM.v12.0.0+1.aarch64-apple-darwin.tar.gz/sha512/14cafe77b353fbede64c736052032db77ced2757bb53696925899b79037a3b5a72d75f1ad8972a191e216a25a83f26517b0291fd246e0aad7a1ca040bdc7011d
-LLVM.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/md5/8ff58f44d6f27d3dfb2b3670533a472c
-LLVM.v12.0.0+1.aarch64-linux-gnu-cxx03.tar.gz/sha512/b9738d4b4f00c08cc06b3f1d73bdcaec96e578e02de08f14e2e084eabba934c24f9c4e65758e102515e3c9a1527612c468d8b178f2e87ca667f2e7a62343b1f2
-LLVM.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/md5/f4994c8abfd4fe71004b499bd95140bc
-LLVM.v12.0.0+1.aarch64-linux-gnu-cxx11.tar.gz/sha512/fa01e3d6f154b695e9230c13365098af60ca5f6077d63e3cbc6c410a89615eb7634413532db4f93661289c590f4576c18488cae20ec5dfabe1c273cd97c18f7e
-LLVM.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/md5/52066c3a492d69cc0da23f0861f1f6a9
-LLVM.v12.0.0+1.aarch64-linux-musl-cxx03.tar.gz/sha512/db60079a09654f00a452c70862d1453c2ca8bf9b5ef055b66e99edfcc35a52a94245e21b0cd6acd88f556dd99c492b6f057dd48e3047286af0ed612493e895c7
-LLVM.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/md5/b02a6d93f190aff2de4f03a1cd121a30
-LLVM.v12.0.0+1.aarch64-linux-musl-cxx11.tar.gz/sha512/3c0c9a3753b8e709361f1faceb02b3d19cc8e0c7a183601be528283c8871c92a234c3805206c37e1ec04577a53f57282b4874e4175e2ffb83b4b1ead744b9c0b
-LLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/88858237d7941887f27af62b7e249a62
-LLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/342d4bd418547f3b6543182a5593e21fd4af84eaab0b91ae04723e485b48accb2a119c7c89d766fcee2fa13af9ec3454da310d452e1262aa0e97fab9aa498422
-LLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/1cdef3420fc7fba9e74765c839742350
-LLVM.v12.0.0+1.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/d7af8ec5abdc1ff75efa3f4f0546efa7d2328893b11533cde68a515a0d117272efa5e8bb1e821996ad12214668644bdf7a9a3f5e7578b24a174b5b04bc0cf1db
-LLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/md5/3ee4b9db085916dc73aa4131ff6a73a5
-LLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/b456e096bbe2964f678f09edcfd49e0e8b4922984cfb1fc47358868eb099b82655ba8d5209ac31452af511d7c773de2a2f56a24ad0b36841d9f557ddb2c09a88
-LLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/md5/bcfe6505477413c52c3193dd8d0c22fc
-LLVM.v12.0.0+1.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/50a2af7655acaa0e13f15cfcf490b334e104cdc847ddd5900fae04b322ecdecc69d85bd3b15b6170f584fe364dedea140e1fbd9a8835499067da59f4b665f849
-LLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/c11d635651c82dbd5f0038f24afdcb03
-LLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/da3676b598851f07a875fb3282f2a1c078d0f447213049dac6e33bb07b5334d332df6f26b1e0e851f85bba20a72e65e158f848c67563be9a6e23c9d06b1a7f19
-LLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/a299db2b5337c8b79bb0f37a39929c13
-LLVM.v12.0.0+1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/46c7f0737dd3f46cbd30173d39c268a4606b169d80b146070fd2c61f32eb31b1cc8ef509bd1bc7f0c197a4b7b4a6e3d45a320b4a11a27d6a0233bbc1d79dd90b
-LLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/md5/3f8c17f38d47008ef26457ca29cd626e
-LLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/265dad34a231c55b07f9fcc059dde0ef72c156374d7334372f0ee486f9c1ebe95e8ec187cc4b07623dc94b8f8def7d3235fba0ee11a1b8419d9b7a7182a91d5e
-LLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/md5/5e49fc12546f4b1deaefa249f7e1f77f
-LLVM.v12.0.0+1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/85940dac65199307fe6a571b7f369b02430c4ac980c7fe35c1ca4dae6d29a1e5ceaa72151ecc4d8fd29116b82e1d1d4b9054fc0fdc1de31becbdef1365164ecc
-LLVM.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/md5/74c5e7b4e2bb44879f8a907d54c57398
-LLVM.v12.0.0+1.i686-linux-gnu-cxx03.tar.gz/sha512/41124a32c83e19e0fbd0f0c870bdda7d9a8f59ae51ec1017eba203e01e7fbc33b43334e12154ab0be57892f5bf85faf0c76c375da77e7c9097caf54b45a0d19b
-LLVM.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/md5/6ff76e552a78453ac1e149a85179d7e8
-LLVM.v12.0.0+1.i686-linux-gnu-cxx11.tar.gz/sha512/8623d31b9bcc59ef3565db310e323e8b165c9c35a0ce170f5f5e2d58ff218974d2ad35ce1781a9558556a33247bcba2bcb1e1171254838fdc3780e9408df7525
-LLVM.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/md5/8e097ffb90dfb3786ae1d9074314632c
-LLVM.v12.0.0+1.i686-linux-musl-cxx03.tar.gz/sha512/96291e3c1620a358594be1dfd99354dcae76b9c7536ffb93bac93bf6523f323dc44d55b1faec85a763f1579f9ce0f1ae220f2943100d2f286dcb4cbd043ec088
-LLVM.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/md5/bc19a91fcca8d8aad83f5b38c8db2a0c
-LLVM.v12.0.0+1.i686-linux-musl-cxx11.tar.gz/sha512/ce5914eaa76e9eb21a218755211d613c4261ae284773222e40b383c20c5e7c6ef00be2836750cb1e85d2b1f6e65869e016d49f365d554ce845486fdd352c1b7c
-LLVM.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/md5/67f319faa58c136910cc6497eb73b234
-LLVM.v12.0.0+1.i686-w64-mingw32-cxx03.tar.gz/sha512/e5697c29f54bae9d9e551084ca702aafb09ff11f6de60aa723d90cad9675d18846c1cf2fc093ec1eb54c5db7f2328aa05555bf453f798cc3e96f79f2afebb283
-LLVM.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/md5/e5d38ab0e686a0cc9452c1f1c81c21a7
-LLVM.v12.0.0+1.i686-w64-mingw32-cxx11.tar.gz/sha512/9cdfd778255faed49a53fcbc691a3a1c234b0649b13ba8499a90a61ffe7ee864ace982fd33e6e736f4466fc11d5a2c0b735bd68f7e5ac4eca5892ee76a5f0bd6
-LLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/md5/c300da4c0c1a26115756d5b34583b0c2
-LLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/1ef72ad1007363f25a410733efd5ce8c69d47718bd3df8d3e00e8cf22fd8ec3dbc4a19d4211a72877f90767a9b5d46318d5c059642de21a32072dd8099225add
-LLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/md5/32863e5fc4dbc7003326474e3b057412
-LLVM.v12.0.0+1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/2f3b726d787a6bb7cf92e7e4e91a9f72eac0da7fbf61b5cb94a49de812ec07b05b2cdb37800fce556071b410328dd7db1ed375961684e141fc54bc632ff10df8
-LLVM.v12.0.0+1.x86_64-apple-darwin.tar.gz/md5/0bd322858942def11a7a331211782917
-LLVM.v12.0.0+1.x86_64-apple-darwin.tar.gz/sha512/a33160fdee69ca06bcc1fc6843d19f306dacbf88037108c9fe5a0c57e2e4176c9ec108bd286486029312617a86da4791e123e89407a2ae71cae6f1726f44e9b3
-LLVM.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/md5/3efdba4e35e0c76b3d60bf8ad03fc321
-LLVM.v12.0.0+1.x86_64-linux-gnu-cxx03.tar.gz/sha512/0018caf8c878b482a7621d950687018586b5c0aaaa08a68d26855b40bc9b2ba60600e83d6bfd729b69d25eee3f0e4126afab9f7f523b41d5532400f6289508f8
-LLVM.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/md5/4ed4c4245ead0905cece3c5ac4c48886
-LLVM.v12.0.0+1.x86_64-linux-gnu-cxx11.tar.gz/sha512/609fba22a5d438d039f77fbb8c726766e71da9c768df83033d9b7cc8f178d2b25175427a5adfa6a004d61b610e4303f2a301e4ff5af51cf90296b883bd33c824
-LLVM.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/md5/d78fe12460c99fc47f9139325c19d377
-LLVM.v12.0.0+1.x86_64-linux-musl-cxx03.tar.gz/sha512/698fd57da10afef2c5c43915292a4e9778bb9a005694391273cd2c2184aa792f914bf3f9e37bc31bb541054ddbe75122d01d34a4ac688a8b87ea22c795369fd6
-LLVM.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/md5/49855ae6aaa48f3b6d55cf6a8c354449
-LLVM.v12.0.0+1.x86_64-linux-musl-cxx11.tar.gz/sha512/06bec6da4dba5956b35571dec54f81ccbd55a0e88aafa4d2d4477f50ca8f4c875853ce25db5fee236a2cd8be9f4b98528d051690db61efafb51c964e1953f159
-LLVM.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/md5/f2362ea7f927839c1a12724e11c721da
-LLVM.v12.0.0+1.x86_64-unknown-freebsd.tar.gz/sha512/e42248a84d8bc52d3cb23cb8c5b5d51560480333ff367d95ba5390e956bc78b8a48d5d7f70809aaa9288997db4b99a13e0e99f484f97713da9b167666c2fa284
-LLVM.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/md5/4115b1aed76cfab64e6327593be41e2e
-LLVM.v12.0.0+1.x86_64-w64-mingw32-cxx03.tar.gz/sha512/0c184e7e788dd5b9e76a10a6a0ca367eb0df76608d1d773abeec4583fcc8c59014eaf09ff9af67815c935a29816e055f9c4cf8d9c1ab5c98840bb24515f6ec22
-LLVM.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/md5/2e663337eecbd7745e9decd60030f12d
-LLVM.v12.0.0+1.x86_64-w64-mingw32-cxx11.tar.gz/sha512/198c8340de6ea5f01347d1465e0059ac6f83e23ab603c5789ed12a5a85eb738544cd490cf42a7799b44203fa19d55034c65b5a3f8e85eb2eade188d2bd32d43b
+LLVM.v12.0.1+3.aarch64-apple-darwin.tar.gz/md5/030bb63a3b713d16f0ac3435c0a43e09
+LLVM.v12.0.1+3.aarch64-apple-darwin.tar.gz/sha512/db1bbf60acef681a8305fc0e4112d6833dc1ea700a1620e0bcd8d03df558dd4acf0581c6a7f11bafa09dcc96c49c8acc0b3d5d958a232ac389890fd9ad618b7c
+LLVM.v12.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/md5/fe34d2e02cee760c62f1f41fe47ae253
+LLVM.v12.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/sha512/66e825ff27e9dbbfa751c96484ff62335b5630e71278e4b10bb965ce4b5e73766a0ac5f50c2a5ed4383550220f45c5fc408a944f1948e0a7a2b4447893cc1d8d
+LLVM.v12.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/md5/85a6bbde762fafa2a1807852f56248d4
+LLVM.v12.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/sha512/0c6d130888eaff79b7d754a169330f7e8faa11d783953bc53822a29c312f60402bf151286f6f621ab08e6d25119f54becfceef6714e3a5aa388dcb0d2254332a
+LLVM.v12.0.1+3.aarch64-linux-musl-cxx03.tar.gz/md5/3d1eb8fd9704fefac898226b3d4ab97d
+LLVM.v12.0.1+3.aarch64-linux-musl-cxx03.tar.gz/sha512/d0f1f76d408102dc2cbeb4ea14718e00b829c8a7f5d5673dca285025d123d2fc8d523729ff637339f32067347bc1e7a235fe209b9d7a997c642499094a2cbc86
+LLVM.v12.0.1+3.aarch64-linux-musl-cxx11.tar.gz/md5/6369070026fc863a83ec0ffca0ee86f4
+LLVM.v12.0.1+3.aarch64-linux-musl-cxx11.tar.gz/sha512/7895fddea3a29073cf317699b44b84cdea93c30897da2b6adde23831d44414ef21f09a33107084eefb596bac114a40b13d6387820fe0a2b380ec657eebeea2e8
+LLVM.v12.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/56508e7869dfd808945120b5a821d076
+LLVM.v12.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/f55cfa6cb399bcba239e7669e0223ede9908204d3f7af880cf83f7e75eac3f33e0555d38c72a7dab8e5a0b69b9c829ebe045a9a5823561f2f8ecc34406d43d56
+LLVM.v12.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/254db0016b8f28e79637bc6fcb802b3e
+LLVM.v12.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/c84b5ca1c0db254c567613748034a4be7682d96462abbb842bf64924b35f48a999a43feeacb3f4e2603eb343fc4dac3cc3fd6f30520c915236637bca8e8bd040
+LLVM.v12.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/md5/6e0c7c26a2e4b3a740d13c2fa8a8ffb3
+LLVM.v12.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/78c214b5e8330663de05f376107778457a94df300660142ac5598b41aad52b7cf39de47b9fc7a589c90425e6adba41e1e816dbe41705897f2eafa060faae60cc
+LLVM.v12.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/md5/47349be8a91bcb4e9c02c6b3d1a48834
+LLVM.v12.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/9c6c0820c60de53512e8b0f35f6fca87d9bc96f37fcfae616fd49dc37f6f0ba46a5d9cf8fe61d2a6b3248c1add386cd850e0faf6049dbe4b399fe43f3e9ddd6a
+LLVM.v12.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/0f192ef028db44efcdba51d82f263449
+LLVM.v12.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/65eed323640babaf50adb8274e1b69dc71132991d2e67523f52550267a4021dad159bbaf66ce94b7bc05b5b449440aa043025533d57d7c6e94d2d1c7b9b15450
+LLVM.v12.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/90bfc9c6cf1699b912dac03305c515f4
+LLVM.v12.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/ad3a2e3e6d6f699145e4752786ba807715c33312d86ac0f14b17d31ac252d0a99ace0238ae7cd5ca439c5a48ad9451fa8eab9efd0a954e6df96ed99612234812
+LLVM.v12.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/md5/42a829237e9b00b53ecb4ab0c34ef1eb
+LLVM.v12.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/8b7aa5e5da8bad8760ad43d647c7b2c0ff26282d79310c78661be43817bef03adfa5b0084f441571e74eb4733cb5fa088952563faabdd473e2f997aa06035913
+LLVM.v12.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/md5/b60ffd48c020b1ac576d27cce13e03e3
+LLVM.v12.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/de804988c639dfab72a6850e7513340f0bd927f0e5ab0dc059f58967f7d296f71d84771533fbc5869a540c6d25b668f3f239f6ccbb7f6ccbce02bc978f457f73
+LLVM.v12.0.1+3.i686-linux-gnu-cxx03.tar.gz/md5/70922a7a3c7e140d603c419b89ff309e
+LLVM.v12.0.1+3.i686-linux-gnu-cxx03.tar.gz/sha512/340f438b091f88f637d86dae2494fcc2bbd874a61dec3b52a2a336328da650a787f310c5b53e52a6c0afd0b2821b44784451e3ecd6419a81f1159f345b4a31ac
+LLVM.v12.0.1+3.i686-linux-gnu-cxx11.tar.gz/md5/0d3f5843efc9ff41ec26b0d2fe06d1d0
+LLVM.v12.0.1+3.i686-linux-gnu-cxx11.tar.gz/sha512/6c7afe936a6fea6c546b86365649c5e3eaeabb676155b1de6a51040d3851bdf6a3cd9ac4b18ad4293e809cde6452adf6ce82dbe34e87dcb063dbf8e20e6e1848
+LLVM.v12.0.1+3.i686-linux-musl-cxx03.tar.gz/md5/af24092be60361066596900ca710068c
+LLVM.v12.0.1+3.i686-linux-musl-cxx03.tar.gz/sha512/73733c177e32cd04227b480974e9fb46f4c3aad294a024be971a2ca6b902c711449bc610947a4dfeff596f45f3b384d03b77478a9c552328be11282e6d58fd4a
+LLVM.v12.0.1+3.i686-linux-musl-cxx11.tar.gz/md5/423b80b7428ef930a9f1491544ab265c
+LLVM.v12.0.1+3.i686-linux-musl-cxx11.tar.gz/sha512/050f92f6700289aea6cfcef16cf3dd74d6aa67fcb1dfab3d3c763d8079e3981277e1466910b5d292cd3be8abcafa683c4b6666bf8eeaf0ec239b970a2aea46d0
+LLVM.v12.0.1+3.i686-w64-mingw32-cxx03.tar.gz/md5/f29d5c7daa02e68011359b4e833ee93f
+LLVM.v12.0.1+3.i686-w64-mingw32-cxx03.tar.gz/sha512/647a861b043411ff0ee543d2e348a8a67d4287708444df16b61a184d42a03749ce31f3a1c3ba0854c30c1b94998e263f992a69b0989ce2bd83227aeb779bcb35
+LLVM.v12.0.1+3.i686-w64-mingw32-cxx11.tar.gz/md5/3cea1896bf46e17788a7bf3cfcda1572
+LLVM.v12.0.1+3.i686-w64-mingw32-cxx11.tar.gz/sha512/d077a8cf530391fbaf7f7ff9866fc006ffe875c63a2b2757e5585cd96fcf50a71aa67bcde7f3d122286c978b91f69949542add4f94d18b36c18fd88dd9d14640
+LLVM.v12.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/md5/52a64aae6d3502b6b6da316826a6f781
+LLVM.v12.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/ec631dc651f97d7c5406439f2c43c1975cb15903593c808749b419904ee19505e404cf6a2c2030928972bbdf3c135443978e6710b628549a52f18be4860447db
+LLVM.v12.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/md5/fd65e34e2fdbe1db1936bca6a60dceb9
+LLVM.v12.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/788efc06e2c48d409e3bfa3be1cbc52884c41884af409d90fb666ed2f8c0ddae2ff0e440a189d64db98919d8630dc6aecf033cef73cb363a2882dcd005e9fa58
+LLVM.v12.0.1+3.x86_64-apple-darwin.tar.gz/md5/114ce26c4df444bab96f990241ce9bed
+LLVM.v12.0.1+3.x86_64-apple-darwin.tar.gz/sha512/fd3d929389c56574fb21ec1c0f587801f312da6938a09c6ca521a378c6e0b268157f4b712b335652033fabcea1baae376d26d52f54022c615fb0d23edc805d47
+LLVM.v12.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/md5/ab03079efbe7d09f21b338810ce3eeae
+LLVM.v12.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/sha512/804fb8b233f2b07f2485be6f9335b3a55e2c9905147f628f949dd12295c2f2fd8fd50b6a4a2332660430bf967402d9ad9adb8eab1990d548a6e5c6bab7afbf45
+LLVM.v12.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/md5/d0990981455842c45be8d65d8d546ba1
+LLVM.v12.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/sha512/95394cc64bf53487c855179a8e415cfd6cf31165d8ddb42c00121212b37c1e2024c6183e3a52f6e251ffd153437b115a5601780530d8c97bd428b961d7b43196
+LLVM.v12.0.1+3.x86_64-linux-musl-cxx03.tar.gz/md5/d5386c4593c7ab43b19d2a9606bf5832
+LLVM.v12.0.1+3.x86_64-linux-musl-cxx03.tar.gz/sha512/4413d5bddaa3b9e90c2e6913ce62046c2b6325f602c8e8922ef32c8f801ba258d1e4cc15e7865b5ca43ffaa9202e84d7f595959863dca8ab4a34bff4f4e228f8
+LLVM.v12.0.1+3.x86_64-linux-musl-cxx11.tar.gz/md5/8ea5455eade726090f609be3150dfa60
+LLVM.v12.0.1+3.x86_64-linux-musl-cxx11.tar.gz/sha512/06499fbe80700ca5b9c935a24c5e12ed68a17aa8fc57b7b51cf6b35e9ad4866ed4c7cae60a9cf68c89bc662a7854e61e6d8e4580bc1e5f05f53e6943ad2c2dd0
+LLVM.v12.0.1+3.x86_64-unknown-freebsd.tar.gz/md5/79e28ee303ac6b62a2573defe7465e50
+LLVM.v12.0.1+3.x86_64-unknown-freebsd.tar.gz/sha512/1b563fa34102d4354ef5703875bc4d3814dedce22891e1ec70f418af523a2dc759f10d1c063b571fc098660a0269cc92e72b6239d30098ff6bb0f190e3c3037e
+LLVM.v12.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/md5/5907621595ffb08e6e4b32662f864d04
+LLVM.v12.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/sha512/c28d2d41e97609f43b820f0702ab73d099a84e33e0dcc73f60e7d9d780ac14cbb041f508cf7948af7647086495e6704b49671ee32454309c57cca7275d3da3f2
+LLVM.v12.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/md5/d1947f14710a789be153ac7dbaf07fd6
+LLVM.v12.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/sha512/a1872be87a399b2488fc71106c6deeaf76508efbf3a8149dabcd9bf704fdf365071b2c159f46834401c17c2a677dc3e94b877f759ba421a5f8fa44a9f6f41c01
+LLVM_assert.v12.0.1+3.aarch64-apple-darwin.tar.gz/md5/4552a6310882e53f4811c8c489cc606c
+LLVM_assert.v12.0.1+3.aarch64-apple-darwin.tar.gz/sha512/ca06e15aa74aff65ddd860f4cc0277996f34411f88a83e8719233b9b4aaa23a2c8a1299215e44a55fa9810b9f762be896a41ac643791ac06fdd0fcafc98e7ee2
+LLVM_assert.v12.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/md5/b748609f646cb3764890a8dfb7e1bd8e
+LLVM_assert.v12.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/sha512/9ef7ee15bcc062f329951e90c743b3abd8ea522f7a1464a9e250483a6f087f3e74a70f7b0344fa882b2cd3e00af2b0f436789111885248b0ef0ce846123c84eb
+LLVM_assert.v12.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/md5/63e23a25c16b1eee756cb5030a37a13f
+LLVM_assert.v12.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/sha512/08493eb6d8b3599f216417590775394cef147ddc4b4582c4176a39890084432c05bd1806256953310a0a18e6d3d76e498f75bbb8994af0b4e13b7e4144f60499
+LLVM_assert.v12.0.1+3.aarch64-linux-musl-cxx03.tar.gz/md5/46a0faaee1285bab78d8c3eae37172c6
+LLVM_assert.v12.0.1+3.aarch64-linux-musl-cxx03.tar.gz/sha512/110370eec758240e347d3ed749ed4ec2734f30887cf1ba433f88eb9ce86d5392fb566e41b8834a1dd802aad9c06ae2997050c4e67ac3d9d8bd02a5607cd155a7
+LLVM_assert.v12.0.1+3.aarch64-linux-musl-cxx11.tar.gz/md5/3118c11faac4a3a6ae28816620e1e5cf
+LLVM_assert.v12.0.1+3.aarch64-linux-musl-cxx11.tar.gz/sha512/8659202d60c105e15f761417e5e234a9a28e3d430f76919a3a97b3468f82a83773fa82b71b388f9db427f64b37cb32850631e69541dad9e4bf3879aa9b259ae5
+LLVM_assert.v12.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/c85321f3bf4921332771f52f91bc1242
+LLVM_assert.v12.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/a364948840635cebd140d3392e1f13ce91e91ba3621b542ff8944fbf467aaeeebe3a2de29faf0a5a0704244f7befc2bacd27637aac8fa82027cddb158a279f22
+LLVM_assert.v12.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/8cf84c038afa0f88c433ef80de7fcf78
+LLVM_assert.v12.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/a212d9468bb65aa940bc80cd4e3bd325359997231a1d1b736cf3374c8eaabd1e542483a915b9b31dded4fc954e8969dad89a8d80af66e20030b3ec68ee910e44
+LLVM_assert.v12.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/md5/be16a49eae21f6cc64ebb702942fe056
+LLVM_assert.v12.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/4b3a8a5ffdb699267a8664e774dad3a3fc57459d2d5e91e58382ce0cb62bad43e6e6fd40bca70c0e09131a7a6c11106e14099efcbdb80100391a50db13d74a39
+LLVM_assert.v12.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/md5/24bc7545947c7849e4a682681963792e
+LLVM_assert.v12.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/be3a91ea0c48dcca90d537db774fd02c1a7eed1e260c3b5e15d9cf1659dc7abae74375e85203dac9ffd218c69700b3ce2f411d01fb8c31380f0c662dad3ce8b2
+LLVM_assert.v12.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/b426daf1d4f60f22f4640a0a8cbe2c1b
+LLVM_assert.v12.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/b7f1cb3611fe0b0e66904654e7afb7e1f122a25b17505eaf9000b3832924b8801166ffda3e9195ac30cf0dd97c74847418f25701169e561807bd638acb9f16f8
+LLVM_assert.v12.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/1027a5d4137f2454cc431b42a9a5669e
+LLVM_assert.v12.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/96d3df07e31cb91dee4f179bf82e38c6c81de711a05aae69342d04f615e1f87cd3e51b977cd66603577a79b59485dc6253aebe55b75cac0016227d2b86c5a348
+LLVM_assert.v12.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/md5/63c82c7c3d0a641433c724409fb98a8a
+LLVM_assert.v12.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/905404dcde1b59b6a43ec5bae571bb1c50d8571e5add5fe97d72dcfed48d825419bad03a5d54bf9c86387d97b1e01411ab6bc62be736aa944ca559317c7a036b
+LLVM_assert.v12.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/md5/2135e5b0431edb042700f39ed23d277d
+LLVM_assert.v12.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/415d81437863cb9b86d68e31c676c30e011af59c2399446d96b99adc9e928a5af346ecd26119fb9a170d2f89a7bd7abfbd07021ace5a4f2e8cab7abd5d158f7a
+LLVM_assert.v12.0.1+3.i686-linux-gnu-cxx03.tar.gz/md5/bc45e5019e32802cc430c3e9b267e3ff
+LLVM_assert.v12.0.1+3.i686-linux-gnu-cxx03.tar.gz/sha512/a84fd21fb29781b07ae20a7bcd3204fc48ed109d66318310d95fdf03633eeb02fd81d0ea9647502b0696940b8b03af790846a936d7fe9a0cfc906e6a8150d309
+LLVM_assert.v12.0.1+3.i686-linux-gnu-cxx11.tar.gz/md5/e30e5b78b50aeeb5f26b1c9825ebf82e
+LLVM_assert.v12.0.1+3.i686-linux-gnu-cxx11.tar.gz/sha512/38e96b6b214bb731f339cd48387bcdad8efdce1be8e10e6b9762ffdc22c18938112a2f3886d60ab439ce0ed6c6b9f65553ea0316a46f373def02ee481cacb953
+LLVM_assert.v12.0.1+3.i686-linux-musl-cxx03.tar.gz/md5/33574d54af8ecaef67643f1d23c97a54
+LLVM_assert.v12.0.1+3.i686-linux-musl-cxx03.tar.gz/sha512/5bbeac360ea5fe8cd3f6d341463bac7aff8b173cc6e3c1566957c8f88e50e51197d094cdc6fdcf01437d0c4a0a9eadc912cab94084c9f549bd29d449b2d82981
+LLVM_assert.v12.0.1+3.i686-linux-musl-cxx11.tar.gz/md5/ba2480e263bc3c97d580686ddc0b3683
+LLVM_assert.v12.0.1+3.i686-linux-musl-cxx11.tar.gz/sha512/9c4ac694c45482f7a156a00e357fc966262f26e1dbfbce1c0812824c629f38bbda457d49e1952656993d0e7b8ac72dd56dbabef30c2192d42885aca0db419329
+LLVM_assert.v12.0.1+3.i686-w64-mingw32-cxx03.tar.gz/md5/11a8ffa4d3e4786bf68b810ae6f66369
+LLVM_assert.v12.0.1+3.i686-w64-mingw32-cxx03.tar.gz/sha512/dca1cf597ac3014f8e91b9049aab559b6648d9e6e143d926621f45ec0b949ed3527df017258645ce85556f3cc89e15ba92850e53977b94315041a4ae4788ab2c
+LLVM_assert.v12.0.1+3.i686-w64-mingw32-cxx11.tar.gz/md5/af7db3b8fad4eedd384f79c87c24eefc
+LLVM_assert.v12.0.1+3.i686-w64-mingw32-cxx11.tar.gz/sha512/ecce308df2f2beb782a3daec890e05f5231ae3f0d92ade72429f2c4b5e53d8003f72b4e580397c5199f1cfdbba31b9d1439cb7ee921a1b7480d16ad38a4f6ffe
+LLVM_assert.v12.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/md5/207ece8c5dfb46a0fb8da91424a98e72
+LLVM_assert.v12.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/1a89cd456656933d341a742411d8439aea4968c94b23e42fb776690c82630e15fbaf9c3f71f4577377dd6b719a3f8e3c45d9b2e7887b15748f39d8c852943979
+LLVM_assert.v12.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/md5/557492c1ee401357e59412728bff6606
+LLVM_assert.v12.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/166376da0e6a53bd613f8029bccd1ed348a16226fee7bfeb2ea8682f3a919a243d54c08708bdd0bfed053383f5c9e53ccc494e92289d1418b9dfc7fbca3e0596
+LLVM_assert.v12.0.1+3.x86_64-apple-darwin.tar.gz/md5/789248a43777cac637c111c29f7bf455
+LLVM_assert.v12.0.1+3.x86_64-apple-darwin.tar.gz/sha512/6638ef4237961821cedb69b54993609274c10e9b8116b1a64e8cce68c6a09cda4e65f2661a5ea37a359e6bdde7e7b1055571434d852e6edb57f95fb7490b0036
+LLVM_assert.v12.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/md5/e9c6659bbb538ef970e9b678d2c0e382
+LLVM_assert.v12.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/sha512/b167c002d394890b304ad245f644754c0077fed559bb0570001cb01bb14341d6f7807a1b231a12aba274a097ccf347fec2e87b5d86a21381685e27bffbfea9d7
+LLVM_assert.v12.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/md5/a19ade942624f4e0bd589419fd4a4a3d
+LLVM_assert.v12.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/sha512/a0e1376c598706dccb681acc13a4e653e2cc8d0a9ffbaab39fc242ca4a14bb24ad5579d0caa716f21de9b5cae365c1aaf9245d05364d5d9fbf5bd820585bc824
+LLVM_assert.v12.0.1+3.x86_64-linux-musl-cxx03.tar.gz/md5/8c9729ce6a72e13c43cd135437e2d4ed
+LLVM_assert.v12.0.1+3.x86_64-linux-musl-cxx03.tar.gz/sha512/45cff53b9d7444ad50f3e6769b54d5de8ae971909839e8a96207671b6e693ab9583c4d28c9e892190e5826d8c571f746de6298e37dc308c5f79dbc4fc2f4342a
+LLVM_assert.v12.0.1+3.x86_64-linux-musl-cxx11.tar.gz/md5/5ac2b72a7a51d90a1479b6bd09f1c584
+LLVM_assert.v12.0.1+3.x86_64-linux-musl-cxx11.tar.gz/sha512/1f64459828c02f76d4a03bf49157ee3e617cf39ffcb4cdadc74444eebd9485393751bfb2fc00b552a52cdfbe2a4a068e8c39ac7eda9744ee0f70159299e8fe25
+LLVM_assert.v12.0.1+3.x86_64-unknown-freebsd.tar.gz/md5/2fe9735db03a1e3a86065c09e660b589
+LLVM_assert.v12.0.1+3.x86_64-unknown-freebsd.tar.gz/sha512/d846b78f90c3fda31e228b12c5444fc58a657b7c1a2cedcd2965ae798ce114039b81eb531327a0ee53a87822a2610b11f6a38c5e55fd3f26b37fff2fe6387972
+LLVM_assert.v12.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/md5/22fc04ad75e0ecc828b8a41f5aff6741
+LLVM_assert.v12.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/sha512/a78770fb2af0ebd4a020b9dcbbfc30b0f422b4d4c2d52e49714f1c3e67bccc492c2ef6e322326899586de45fc2628a36d3a86fba26672f1ac13c152f545725c5
+LLVM_assert.v12.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/md5/03f376024f9a8b757c148f23a09f0ffe
+LLVM_assert.v12.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/sha512/8ef022894b6d488a93d38837a629ee11b6c6ee2962e7bfc8b169effb6ff7f949aa50b6282b212ca6b3e1cf8fd1c94f2ee4621619c5e1b842385758a07c42be43
+libLLVM.v12.0.1+4.aarch64-apple-darwin.tar.gz/md5/621f8bfbcbe5f9b7438803620be58eb0
+libLLVM.v12.0.1+4.aarch64-apple-darwin.tar.gz/sha512/9699c073dbbaa33b9d31fbca5d0ea6d7306d771462f257dce29d95e02bf6272ed3dc16049521ec4628967bf3bb86467b0eb659ca85c12d5c5c80b9a333d8b785
+libLLVM.v12.0.1+4.aarch64-linux-gnu-cxx03.tar.gz/md5/c06a14c4a509e1257ac2f15abd8e8003
+libLLVM.v12.0.1+4.aarch64-linux-gnu-cxx03.tar.gz/sha512/e410ce7602e7c167742f865963435c200321d8bb76bfb23c1239008b8c62660bba4735a3b6e0121127db7624d0ff209211b66ca2f57cc736bfc3177d68c1b22c
+libLLVM.v12.0.1+4.aarch64-linux-gnu-cxx11.tar.gz/md5/c1c8af2f5c640d160283a4d2a54520c1
+libLLVM.v12.0.1+4.aarch64-linux-gnu-cxx11.tar.gz/sha512/e4fe03a8065358d30954a935307b5589afb8cd979b05d21c1d19efe6bea7b698bcff338c5fe9ff16c2eb03475414b4dce1dbfb513ce43ba7e82ad61c827969af
+libLLVM.v12.0.1+4.aarch64-linux-musl-cxx03.tar.gz/md5/8ed77b634120fa18d2835470b1967948
+libLLVM.v12.0.1+4.aarch64-linux-musl-cxx03.tar.gz/sha512/11494d13f855a411a41faf64a59269d9fbe0a82cdc2ce48e435d3d6883e3e48dd8a1c157aa0de2545ef020f061271dadbf1988053d3a908a7e64f8efbda1f9e2
+libLLVM.v12.0.1+4.aarch64-linux-musl-cxx11.tar.gz/md5/a952dd78e79a3a1923d9cdc9836bc1e7
+libLLVM.v12.0.1+4.aarch64-linux-musl-cxx11.tar.gz/sha512/64281d032eef688af5ff9828564b1f919526129a7a8b3e14d39819763e65805eb64ddb9cd1c037c76b3fa4f9a31ff5116d5838d36ead33840dd0ef159400d46c
+libLLVM.v12.0.1+4.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/0cf98dfeb06bbfe271d039c02e1f65a5
+libLLVM.v12.0.1+4.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/344f163964299d40c6124375d8ef825afd77616440f0bfa7888407c7ad4ab9b89b605dbb9a8fff78ee14b79fdb56102852e9243a77839902683b87094e808cc3
+libLLVM.v12.0.1+4.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/8d414f272c7c85806f231219dfb31601
+libLLVM.v12.0.1+4.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/4cd1d2b5fb8fab8f97fd409a7e444add73904b5db0ab590efe0c687d6b2a9e12cf3e2937c52b520ad95cfe88ca2ed5f565eb2dfd50be0dfa0e58f694ea456488
+libLLVM.v12.0.1+4.armv6l-linux-musleabihf-cxx03.tar.gz/md5/7d91435a2ad1e33f0c774437674573eb
+libLLVM.v12.0.1+4.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/150a220b18f8afbda55a818bbe00f68a64c04f824fbe948c9a227f002d1fe3dbe7cf23f0ff1edb38b42085174f0f8f3a80f512924729ba44b11ee6d5d872b75f
+libLLVM.v12.0.1+4.armv6l-linux-musleabihf-cxx11.tar.gz/md5/13ca171abf028cd207863967fe4472ca
+libLLVM.v12.0.1+4.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/4d7a38de2a2d39f06bb37c3621f04ebe23769ccd9958d1235c940b5b12e94128ed0623e7b86a5d579582ec0b6b2191349306cbe1d6af8202e0932ec7609c829b
+libLLVM.v12.0.1+4.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/a7f2e34c93ec4bbecea33a22c64d889e
+libLLVM.v12.0.1+4.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/18c0066daa48fbc87e9501e646e7b3ecf34e8778f159d9ad623950c7a87d27b2cd46a204d948223aa1fdd1c283f89fbaae9096a3b381f641450b4ad720b07738
+libLLVM.v12.0.1+4.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/84c827833c3d4a3ccee9e31634abd568
+libLLVM.v12.0.1+4.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/8aa13a464ff45402cf95b31b15ff410aa6986cc5328867f8b443b940e144eaf7491290c92177b2b3355f40accec3a27254978efb095e5a88be8f4096f2aa464c
+libLLVM.v12.0.1+4.armv7l-linux-musleabihf-cxx03.tar.gz/md5/0f009e3e1496d6dba9b00a13c2b3f78b
+libLLVM.v12.0.1+4.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/5f3115b2fcfb5e92c0cbfa1e9f22e62d6ae7ff8b8febd147fa01acfb52ac67a99f7b38df379374fc91506e5b2178ab4672788ee9e39776afe0172485c0585412
+libLLVM.v12.0.1+4.armv7l-linux-musleabihf-cxx11.tar.gz/md5/2fd2e1f3f61566f99d7532d423c345e1
+libLLVM.v12.0.1+4.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/2cded91fe2cf936e358204372002abf6b1b3dc4b19f7f858e8d240d7d5f401d28992df9bb06b342217588a2a0f59743e53bd763c5d20da7d6bd28e267a526e9f
+libLLVM.v12.0.1+4.i686-linux-gnu-cxx03.tar.gz/md5/8f05950d76774eb69496c0405b7867d3
+libLLVM.v12.0.1+4.i686-linux-gnu-cxx03.tar.gz/sha512/7dabedad8408d7420112c26b34273e40f8a0407115a200dcb3e44865cc74a35102615c75819c5fc07e280e35f7391bd67b5a67db62d2aeaf46fa56005543e1fd
+libLLVM.v12.0.1+4.i686-linux-gnu-cxx11.tar.gz/md5/faeb19a3c2ba3f960357d792e78367b4
+libLLVM.v12.0.1+4.i686-linux-gnu-cxx11.tar.gz/sha512/0629a2ff321f6c8e9cefbd40838dea3cd728fafeb2ae65c3feb2296922e07015a6c97161e16a9792bd4675c87f13ea9ad3295c343170ebc99eaf36a444580ada
+libLLVM.v12.0.1+4.i686-linux-musl-cxx03.tar.gz/md5/ecd268fc164b119365262a7b576ed6c2
+libLLVM.v12.0.1+4.i686-linux-musl-cxx03.tar.gz/sha512/666978c259f5e7762a75f5fcf9ef0f480cd75d710c14a9e2dd2cf95522404f51e412934c9ffba644b5d065bf8d9e01fb4e8552be1fedd8e01b888de8ab7f30f2
+libLLVM.v12.0.1+4.i686-linux-musl-cxx11.tar.gz/md5/44f5b84a241ae43381fe63484599a7c0
+libLLVM.v12.0.1+4.i686-linux-musl-cxx11.tar.gz/sha512/feedcafc79c4d83908ffb06b4d4ae05273c92f2fd775511603af90a154a8be3b61f3d3998366c343b405a6e4365ebd540b29cfe6a3cea7deb843907bff277c27
+libLLVM.v12.0.1+4.i686-w64-mingw32-cxx03.tar.gz/md5/36476c00f600b9e3aa11e4ff3b2b737a
+libLLVM.v12.0.1+4.i686-w64-mingw32-cxx03.tar.gz/sha512/8b85b80588a6fdddfc6ac52fb27e5fe337789ae82de86ac436aff05ee18d1771c43628fde7bfe9f4fabdb34cacdc198dc05c1f2c9c1b0ce18f244201a91c94b1
+libLLVM.v12.0.1+4.i686-w64-mingw32-cxx11.tar.gz/md5/c8594e76923ddb14e054b469b693b1e5
+libLLVM.v12.0.1+4.i686-w64-mingw32-cxx11.tar.gz/sha512/f846ff64003ec35b608798e4f31f404f0f228b20d0c8d499131ec62d0f3ac688c51a773149a9a3be65e8dedd0bc6ccd76a337ebe347f439290fbb35835aaab10
+libLLVM.v12.0.1+4.powerpc64le-linux-gnu-cxx03.tar.gz/md5/407e8dbbb31748898960b50b457f643f
+libLLVM.v12.0.1+4.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/0acf0afe1686943c7c69e55b6419a32a071490fc104cac815aee9529c4dad60ef94617bfc2e865d56a5f064302d1bb69d9caa1211c24364afdea34e7a1d56214
+libLLVM.v12.0.1+4.powerpc64le-linux-gnu-cxx11.tar.gz/md5/65e5cd3e35cf6823e42e79ea39b1cc1c
+libLLVM.v12.0.1+4.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/4a3908c699c1d90bf845d1e7dab0a1be1eb32dce5db8916f3498514ba0a09099dada2f2632c418b14b33731d18b802f60844eb333ef53ff642e9d3cd7c2ea1c3
+libLLVM.v12.0.1+4.x86_64-apple-darwin.tar.gz/md5/848ae9725ce3d3a86c6af68d2fabbbb8
+libLLVM.v12.0.1+4.x86_64-apple-darwin.tar.gz/sha512/57a445e0e7fe344f3ad486c518b01b1c2713c8b7b5bbe0ff066bd64b49a6c6755f01455745c2c618988655f90977b1562a438d947121c93cefc9a2a5dd3b32a6
+libLLVM.v12.0.1+4.x86_64-linux-gnu-cxx03.tar.gz/md5/7aada269027b825655011c2f2a797a9e
+libLLVM.v12.0.1+4.x86_64-linux-gnu-cxx03.tar.gz/sha512/b05e052c403d26e689c625e6ed9c01fac5d5d5e5af246066c3c83a16b92b927a21525955a3a6ddf64afd6921883a5c1631055a6783630455db3815a630a5c172
+libLLVM.v12.0.1+4.x86_64-linux-gnu-cxx11.tar.gz/md5/6c3c26c9d9af98ed13e1d74287e8a8fa
+libLLVM.v12.0.1+4.x86_64-linux-gnu-cxx11.tar.gz/sha512/a1530c1c69e44fa7a737219c58a36ebdbdd0fba7abc175660e37a0ee095a11b46e2aa2cf9858a69ca26654293d1ceeb024520762a78ed02178b5979dce6b26ed
+libLLVM.v12.0.1+4.x86_64-linux-musl-cxx03.tar.gz/md5/c0e3c8d1755dcdbc63c4ae401deddd5e
+libLLVM.v12.0.1+4.x86_64-linux-musl-cxx03.tar.gz/sha512/f1de0aec33fa628d965d43037dcd1de7838668ac98e7fc338b956f18a770ff1372bc05e13288c5bd45b18b15a6ca0a3ce5c090d869605bd5476bf9ec1c0cf884
+libLLVM.v12.0.1+4.x86_64-linux-musl-cxx11.tar.gz/md5/252cb346d0f14220ab59150e9f3cba3d
+libLLVM.v12.0.1+4.x86_64-linux-musl-cxx11.tar.gz/sha512/2fb74c90e9df2d06e7a6dbdc62d6b80b1e9b7d2db80b487210d130788cbc749853776276ad5790d4efc2edf950a7fecbb809b2502a2a44f930518139326cf781
+libLLVM.v12.0.1+4.x86_64-unknown-freebsd.tar.gz/md5/5f6b694aa723b4596aa5fe494e599769
+libLLVM.v12.0.1+4.x86_64-unknown-freebsd.tar.gz/sha512/4c1f256522de561d1edd305822da9aed067614e8c46b26bd1448c713e9587ad1f44dd4f539e9eb3f53f6d84a3846567dbbbc6638751afeea17cace6dffa853da
+libLLVM.v12.0.1+4.x86_64-w64-mingw32-cxx03.tar.gz/md5/55331e2ed8c38591ae62b17d53c99775
+libLLVM.v12.0.1+4.x86_64-w64-mingw32-cxx03.tar.gz/sha512/b45e87e50f8161b5f234539ab796aa2e0d7845878b6b6aa73e96708a27480ba9dffa0b6d2bb15f19f6b269046b75c67902c7913bdb1fef33a74a78488f7031ae
+libLLVM.v12.0.1+4.x86_64-w64-mingw32-cxx11.tar.gz/md5/19996847c13ee330b6e3716296c9e0e3
+libLLVM.v12.0.1+4.x86_64-w64-mingw32-cxx11.tar.gz/sha512/17b8ac789369c27eed7c0cdddffc758a9dc795f38d3cd862d8e87a68186a451a92641cee0ba2dbd5f80bd0b42d832898d79db633bae34fc7659d57308d470df7
+libLLVM_assert.v12.0.1+4.aarch64-apple-darwin.tar.gz/md5/fb6217c318081e176aa16109f84a5f50
+libLLVM_assert.v12.0.1+4.aarch64-apple-darwin.tar.gz/sha512/4fa1c310185b81a4ea9908b9fe0c0947f8bd012ec1ff7eab7a12ea37d26d0556b576bab5531c2262f638728efa83d5d7843fe381dd070b45741725171b1c9635
+libLLVM_assert.v12.0.1+4.aarch64-linux-gnu-cxx03.tar.gz/md5/7bc0b9dfb642a3ddefca68d8ee0a2a6a
+libLLVM_assert.v12.0.1+4.aarch64-linux-gnu-cxx03.tar.gz/sha512/7b7dbbddb69cb9a261d44dfc01b1443de65770b37ab2e2d4130b34fa00328b166afbed5361b84f92047c152a2754402b4a6bc4d61134f956d38685ab0e514693
+libLLVM_assert.v12.0.1+4.aarch64-linux-gnu-cxx11.tar.gz/md5/6041ae140d2bcffd58a385f6901ab939
+libLLVM_assert.v12.0.1+4.aarch64-linux-gnu-cxx11.tar.gz/sha512/1873b4861540ce349d5f6ebd939af793ba876c6270f4f9f11afcda98768201f28c36eb80c3310133be833938367960b4464efaf94051edb21ce332c1b13069af
+libLLVM_assert.v12.0.1+4.aarch64-linux-musl-cxx03.tar.gz/md5/dba4630abf36ff9e1cc20bd7f18b1982
+libLLVM_assert.v12.0.1+4.aarch64-linux-musl-cxx03.tar.gz/sha512/9b9b506a0b733f8f61af197b6de3365059346b66c014cfce1488ad03760898ec52ac505e41e14b6dc6db0481cc97b85b6f82b64132060e8c0d40777ba4d20e40
+libLLVM_assert.v12.0.1+4.aarch64-linux-musl-cxx11.tar.gz/md5/aa120cd6debf20831208543e027199a1
+libLLVM_assert.v12.0.1+4.aarch64-linux-musl-cxx11.tar.gz/sha512/92bc4d48a8f57939c61c3a4bf8efdf552a43b46898bc67379c541dbfee58641b1591519f55abb35a1f342b43ecd9563d663361e80ca42816b5e89bdaca0eab34
+libLLVM_assert.v12.0.1+4.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/dea1228bd476eb8550bd3bc47ee5b973
+libLLVM_assert.v12.0.1+4.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/f7e878e1c1206cd7a666f3741e12ff09931f13ced2c5eb24222feb10189c690cea294087b50ea974b90386cf91214dc91b0e70f0e2e1cda52f9b6abce65bba8b
+libLLVM_assert.v12.0.1+4.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/e6927105c60d50d1f6045e65e066f382
+libLLVM_assert.v12.0.1+4.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/d17a31d3219c07d4633afc3fcc08e1031cb5a0f2c90b749ce078d3f5ce9cdf653d2cac565ac0ff01813266ae3d5eba899469db7e9ceec5849e89286ce68910f9
+libLLVM_assert.v12.0.1+4.armv6l-linux-musleabihf-cxx03.tar.gz/md5/740fd5056703049ffee3cfa983f8c2fb
+libLLVM_assert.v12.0.1+4.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/4eb0a0e5ab3b9d358e1cd266c83a330d507a2ad70719cd0be28ea7bc465d4cef1134cd75f186b9ec7a4932be77e32fa1a0ba3304575d28198c12a3384c3a8aa3
+libLLVM_assert.v12.0.1+4.armv6l-linux-musleabihf-cxx11.tar.gz/md5/fa4bfeca2b20e59548f1a3b8b033eec9
+libLLVM_assert.v12.0.1+4.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/7a5294805627f03ebc307a40d623e24b07bc428db200ae7f9dcf19d977426ab996920ea8e023155fedb5dabc6e8bfa12669bc5ebbab579c9380f61fa1082d4b0
+libLLVM_assert.v12.0.1+4.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/f67bb394d4dc62872ea1e4dc86444e8c
+libLLVM_assert.v12.0.1+4.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/885fd434ac972badb040f0a70dfdf4c6dd27e4cd30395b9c6bdd78caf2703ebbda61b5a7137dc42dd4458276bd6821e0027c77e21405812c33999cc910a0d04d
+libLLVM_assert.v12.0.1+4.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/040dc97394599f9b8bc3f776e0586c66
+libLLVM_assert.v12.0.1+4.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/1ec3fe1fac87169667928723f2426ed5321052f6b6731ddb30b42cf50a0e791dfdd706f9c0d3f9c216ac3589baccd08e0f9ac1bc2b4f2974883f13d83367c0cf
+libLLVM_assert.v12.0.1+4.armv7l-linux-musleabihf-cxx03.tar.gz/md5/aa7320fafbb6289e5814cd30ee433268
+libLLVM_assert.v12.0.1+4.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/809aa4d7b2e0fb65f8100e75ada90e9bdb8f7f2ef4d30b91af110ed4dd0a5ff7faa35d8384469fef0eb4306bf1f3c7ba678aceeaa1552047b88a5bd3566faeb1
+libLLVM_assert.v12.0.1+4.armv7l-linux-musleabihf-cxx11.tar.gz/md5/e52889326dfd0987ef586354d2616eb0
+libLLVM_assert.v12.0.1+4.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/5fde4dd0ec4ed45c11e4c2c44516609c02cf24a7de8bbe2e6c6181738969f32a44d50e1bb0350be250353161b5dd5586b272e1fde135adf834b7114272ab14cd
+libLLVM_assert.v12.0.1+4.i686-linux-gnu-cxx03.tar.gz/md5/cb0a3775ce8f814a02006abdd14a09b5
+libLLVM_assert.v12.0.1+4.i686-linux-gnu-cxx03.tar.gz/sha512/e47411e25df40b39cd4ba30fa0525ed563c3664a8381e2d103c77d39e799dfda5f2aa2e5ab188796db5bcc39018a5db85a82b412177982b9fd4aa83d2fffa5ad
+libLLVM_assert.v12.0.1+4.i686-linux-gnu-cxx11.tar.gz/md5/e96885bc37f46e254df788d9ef617acf
+libLLVM_assert.v12.0.1+4.i686-linux-gnu-cxx11.tar.gz/sha512/ceee99f7dd64cd2785dfad4d7b172250e12e560fc5e489d907b0b5ea9a363b50e833377ae31ce12e6bf7f640841a108fbea15408bfa15a8609ce3599ab1c67c6
+libLLVM_assert.v12.0.1+4.i686-linux-musl-cxx03.tar.gz/md5/ed57762608e698bbc1aa47fbebdc4300
+libLLVM_assert.v12.0.1+4.i686-linux-musl-cxx03.tar.gz/sha512/440061bf11373eb9e1a1d549a0fb7b89010bec464b2da64ad2a067fa53c297d1d8e3590bf5724d656a2664f1ebb3c168c011a267436ccb3a354f9e2cf3e303b4
+libLLVM_assert.v12.0.1+4.i686-linux-musl-cxx11.tar.gz/md5/a712486b6fa6b1275bc17ed7b7daed94
+libLLVM_assert.v12.0.1+4.i686-linux-musl-cxx11.tar.gz/sha512/9ba2f48ab4a3c1b25a5e792a4ca54b525b1a54546354baa63169b7eda02456bb7b7c725ce99af5f5ece3ae4a1d342fe535807ff53ba4f72d849846c430e1893f
+libLLVM_assert.v12.0.1+4.i686-w64-mingw32-cxx03.tar.gz/md5/b34bf1afd4ac90460ff2c7eb2123b1c7
+libLLVM_assert.v12.0.1+4.i686-w64-mingw32-cxx03.tar.gz/sha512/160e27cb5fefd19759aac18ac720194897142e1aa6d82225ab1c752df121cd10734e24183958ae3b734dfb56bdda73761846d1d051559a8d67e0c64db61b2455
+libLLVM_assert.v12.0.1+4.i686-w64-mingw32-cxx11.tar.gz/md5/74b2df8045efc59e33a0300d183557b5
+libLLVM_assert.v12.0.1+4.i686-w64-mingw32-cxx11.tar.gz/sha512/81680815c716e9a50f6c4fa3dbe19d646115eaea19f076ae1a6820cf2b0363862ce8e077fdb1e819e6b33ad040245487281cc3a928798db77fa3ef4719749615
+libLLVM_assert.v12.0.1+4.powerpc64le-linux-gnu-cxx03.tar.gz/md5/7211cd3d6dc3776f75c2ac9f4391bb38
+libLLVM_assert.v12.0.1+4.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/1bc70c15fec2f4558e815c6160e84d4802039ec5d2159c71299f7a830a87cd4e5fa7c88ce214bf65b18146c0de178e11ca00f19bb9998567e57c6e0cd9ed4734
+libLLVM_assert.v12.0.1+4.powerpc64le-linux-gnu-cxx11.tar.gz/md5/cfd50e9f55a048c792baa2750e028084
+libLLVM_assert.v12.0.1+4.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/0e76ac71af7d1d8af63f245bd6d23b853e477079c469d2ea41fde991a100c9fa5670abdf0c50d62b8ec046bec816f14438d241c6c65790edd315547eac105e51
+libLLVM_assert.v12.0.1+4.x86_64-apple-darwin.tar.gz/md5/de4388d774bd849388028e45f4746bf6
+libLLVM_assert.v12.0.1+4.x86_64-apple-darwin.tar.gz/sha512/1196ebc9ccc29ee0ba351fa6d96c07d09a6d7f3aa35928ef6673eb7155a7b0d2658f7eae5ddbda48f567b07736be0473e32d5b4ae72627e500ec68aa44cc09ac
+libLLVM_assert.v12.0.1+4.x86_64-linux-gnu-cxx03.tar.gz/md5/282a8a04447201dbce2e5d3f4f6dd693
+libLLVM_assert.v12.0.1+4.x86_64-linux-gnu-cxx03.tar.gz/sha512/23899c5c597f6fdd122eee2cad30acfa81cefc4990665dfe6fe86477a215a4729a5e50c8880a7c8614edcf23fc951fcc4022c4868ea8bc9babe05d373f75fa46
+libLLVM_assert.v12.0.1+4.x86_64-linux-gnu-cxx11.tar.gz/md5/cdb8ef28e47de1273401bfdc1cc502b1
+libLLVM_assert.v12.0.1+4.x86_64-linux-gnu-cxx11.tar.gz/sha512/5d8380428f60a06500b0afa3d808d1d01060c28401dc7b9323823011dfe9264d0abf72482c0d3775b586276ec5509aec7bc619c031d31984408cbb6027703ac1
+libLLVM_assert.v12.0.1+4.x86_64-linux-musl-cxx03.tar.gz/md5/cd0c6b627367cbb82d5ca8c1eca94c33
+libLLVM_assert.v12.0.1+4.x86_64-linux-musl-cxx03.tar.gz/sha512/c20e070f33ba4bc654dc9e53d5e884b86d0beed9f83a4eacc8eb0792113f1689bda42af6609cde8edf262d4e2325558b06f0acb85a2f0bfc53a220ae54f87bc0
+libLLVM_assert.v12.0.1+4.x86_64-linux-musl-cxx11.tar.gz/md5/6cf2033565bd51c057113d2ec0cb46f6
+libLLVM_assert.v12.0.1+4.x86_64-linux-musl-cxx11.tar.gz/sha512/75fd6f53dcf01a40aac5cfd9a7a0edecd3dc99bc1ac32591d8152f3c9930dd1d255219c74e4f609131d271343802a31d487387072ecf6ead3f72a8562513bf57
+libLLVM_assert.v12.0.1+4.x86_64-unknown-freebsd.tar.gz/md5/28c0970426c16c31d247f521cc4a4196
+libLLVM_assert.v12.0.1+4.x86_64-unknown-freebsd.tar.gz/sha512/388837a840d4fb07cc991ddbe06e56d3c9747439a11578a565b89b7f44a298d0d1a4aaeb9ead1e4219ea2410215cf8ff51410a93e7389bc9caf4b9b0229ce98a
+libLLVM_assert.v12.0.1+4.x86_64-w64-mingw32-cxx03.tar.gz/md5/cc94941da66349a81a6e2f7ac39e7282
+libLLVM_assert.v12.0.1+4.x86_64-w64-mingw32-cxx03.tar.gz/sha512/3300cf6c1d7c860e80a3013f3ca0f2de269543372621a1de0550dd80c198bb500688fb58261f3ddecb4d0cb955c9049c584476faff9762e17e469d5d419e7f8b
+libLLVM_assert.v12.0.1+4.x86_64-w64-mingw32-cxx11.tar.gz/md5/fd9779e1d84db70068119d36f991114d
+libLLVM_assert.v12.0.1+4.x86_64-w64-mingw32-cxx11.tar.gz/sha512/70fbee1a6f050c0a0610b246085e1d3dc4e9a3192ba22185752b7e09d2a67f68522572eea7d1d6c5ef04c06b0b6e8053c993a9fd6b42028e1d137c0fd10aefe3
+llvm-julia-12.0.1-4.tar.gz/md5/1e19759d7c91fed6d2331335140f2dd8
+llvm-julia-12.0.1-4.tar.gz/sha512/476c5488123cda48a864028068253133811f46ef702014ca0c33d5747a4b9408d5952b84154386fe1ac25f31d7f84ea0ee593ef1875b6ae800dd5b81c427c856
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
index c2876364d4066..678ae7b0c3fc4 100644
--- a/deps/checksums/llvmunwind
+++ b/deps/checksums/llvmunwind
@@ -1,2 +1,34 @@
-llvmunwind-11.0.1.tar.xz/md5/b030a6d5807d797e505e4fbd32a36c2a
-llvmunwind-11.0.1.tar.xz/sha512/9cfa1eae720a99ac10d05aa4d1e1b205da5c78841aafd6022a87d1272c821a43402309dfa42d8863bc6dea330ab2c0917d62284b572abb56641aa80e56a9be69
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/sha512/d3b0c81498220d77e4f3cc684fb2cc0653792c381207390e695ac30bc74249f96a333a406b2cebdaca14e0b0a27b188cba6209bb5c1cbbb5c184d5626dbdc7a0
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/md5/052a35e879d52244e4b0804be875a38f
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/sha512/d1b34fb97f9928e046d3131a050454710a93d38e60287b7e3c92f179f436586d3230cf90b0ca0eb8a3f9ef89fef7b1ffd7d52871645dfa233a8b07ca87ea2ee4
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/1ad96a03a5dde506b5c05773b1849ec4
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/82306fb7b920fa7c71bd53b23d6915e7f256e8da9679cc926a53bb0d879f1f4469f43efe556ca32c9ef59e27b435572c7b39859090652635db4eeefdec0d1685
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/6a24fcd3a4dc3b1a98bb7963b1bb4930
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/9ba6b83ccec061a1e5260c807dc8afd6e18799431b25a7e65b97662cc4db02509d02ea07fe12025d80914cec7383624b1c8fc9add46511c668e184ede263ac52
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/09f1bfcf58a4124561553ab5005f9538
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/b0907cb857131183ffc338780c6c6dd1d48bf0ba61c3da1b8f20cf9a943373173b621cf9b2e8f1fbc657059a896b84aa025e6d4f0f1d1e8b623fac3e96541765
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/19158bcfae716b26f924d67c4e719342
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/a90be57990b6699cb737ba96904e94e1f082601ca9d01e670f025b5500f526980741921c9cf672accab78cb5327714ab6ecdbb875174088f0773ebb627a98819
+LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/md5/ba75556eb96b2bcdaf73ff68386d3bc3
+LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/sha512/612fb765695b7aae11ef29608eedf8b959f60c021287a67b03a2a0f57a5814001ffa9b261c9d60d5f3d0582c06c2b41f75fd3afb66a045a248bd43d29e304c97
+LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/md5/2fcbceeb1bfde29be0cbca8bb6718bfe
+LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/sha512/58f281cfc70b3f8a59cf4faa7732824637c811ddc5ea6a058f294f4c3ed4fa6c8ddab5c007567b439f2854635cf4fd146284059bfbc73e7006000ced9383f705
+LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/md5/153c028d97dceb6924414a7a9a137e1e
+LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/sha512/7ae1f197600eabde9036ae58623de34a6d25636d7861777e324eb97902f65e26c6f3775e757178f8914b0cb6c2e925413f5ffc6abc9b6138470dc9e67a17f212
+LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/c08a6cf3e1baf156eb05003ed4e9ebe9
+LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/f74e44986622329990842cb3ff549ff9254c81863d8bee468b0e58b7621067e7e7f7f18e4cbeafad6a05e0c107323de6828a78dc7afbcd7cd1892383ff417968
+LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/caf151150e56827be09acca6964d2b18
+LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/cb3e7aa71367ec4a115bccc2e8ac6bd5d9f22b3935b3889eee1fbf7303c5f553d7d3108977bc1f6c9b6917a6ed9e10bff211fd56b8169233ceae287b112894c2
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/md5/d95874cbf6f8b55bc314c3968a6a4563
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/sha512/4986a8d9cc9d8761a99a4f02d017b424484233d4cbe2d4f49ccd371591384b1b8d1c4d31cb908505b86b00f2b164568e57751dd949d91af203ee4a582971798a
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/md5/89077d871e15425b1f4c2451fb19a1b2
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/sha512/b65a218b05ade2e2d1582188897b036a4596d09cf65558f178c49c1a1a62b7d992b1d99fbe86a027dc83b614f178e6061f3dfb695b18a8e2b6bf76779b741d96
+LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8829dad5e34
+LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
+LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
+LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
+llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
+llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 4b4d477ddc3d2..31e6e27d61d20 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,92 +1,94 @@
-OpenBLAS.v0.3.13+6.aarch64-apple-darwin-libgfortran5.tar.gz/md5/db35f9fcf744a3d86c0a20b8ab39c7c1
-OpenBLAS.v0.3.13+6.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/45ffdac8aa4150e96981cdc3d32242ebf7b7987fed5408d6cd6a9a6518cf7b7f204ca2a4226a0837cae76c2d4d4822d4ae93d2de1164428830c04e2147976341
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran3.tar.gz/md5/aecd6bc6356314108c4ef2bfd768d006
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1f5e861d3e14d64d0574a4dde4909f21cc0c22d1bfe052aae93a7ba3b3a9e564e01a7e1c921cb6175ba251a8aadfb0c4fce345803966dd7452db3d289bf144d3
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran4.tar.gz/md5/7d20f2dd20459cee45adb024adb43efd
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/b06dcbf820f74e3e3cda4cfdf76113e7f518639642f141dac13ca19b05fec09160c3728fb4f7b001a9aa63300f6e289cd2126da1179ef4efce18e814d8c32bbf
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran5.tar.gz/md5/5f0a683b55fc2f5a4dcd134a0d03c175
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/89c25976dd89e2e1c856790aaa4d0951f912fd7ded92223952316e40b08e4b9d9218b25a35cf9ab19021b356ccbb72c8bab2237bc62b8dac37317abe31edff6d
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran3.tar.gz/md5/5a7815b5981d30b89cb48a3e3bbf8f4d
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran3.tar.gz/sha512/358b7d25a069d50434b6621d1831903b88f6e120f10b5978235cc82f795da4d31ca4e6d02eb5eb1fd5587085828e95835e2ad84b2042865c552c5493cc272227
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran4.tar.gz/md5/02062032841900e941cfc66a0ef94dae
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran4.tar.gz/sha512/86f3072c3b8e36f3b33d90da755bf9d2a95ba0317852eaf1c74deb8a0f62a2b5c19a3b1d551c054536277da50865ef341c5e05fbab195edc4cd1fb160b4203b8
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran5.tar.gz/md5/ec50a9a3e5078726d6e3dd011b7a4713
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran5.tar.gz/sha512/548d4b893648de6c1a3d6b24f4531c4b190afc338887d1b8eb9040a9aae72cf846127e9c545841568a2f358f090451e13cf12a191456d4c27431661ca41f6e10
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/15cb058e906a1f042d51e8dcc44dac90
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/d74ba1d83199259b07787424832318e31013384d4f7217f6d7adb47dcbfe0836147997c044c8ca7a27b5a5eea435948a42d7f81a38014b7f7b3f4fb049e3578b
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/14b991b59eb27538331fae0544130d8a
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/d2c194692265325f9e5b6c09c23d7dcb45f1f2ce88edf2fbe6f9b021bfedf6b0c7c4b94a7ff5aa7095b7a131870759cd81ec80369e315660f5cbb0ac1c133e76
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/161a6506630eb035bc6afae69aea91dd
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/f64ce6bbaac4e16d5956b6298204628648b35e76f14a528aa8df815b0021053e4e1963438edc7e5f66fd82ea1e1d7bc38b14c52ad0ea7b90eeb1ee59d0927fd8
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b4d102165aff04f4a3ff583c754ec90c
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/f488da922deaac3fa42f5637003c9dbfd943aa267104e6fce46b77fd9f10dfc580191bd5aa4c97bf5b41ad6a92fd669daca8b11479a3a7e28f41047826f0e6bd
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/9411f83736cbcef0b840914ace71d869
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/3b0c9077255fa639d8798193fb1c5fd8ad7824f58889d0c99b388b3ddc7622122387acc49fc29f7c5b5a62ff7dd2335a47b6e60c14d613ba37e11b79faddf7d2
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/1222f66dbd5eb8dc910efe04d37fb763
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/a747df8a04d50ef4a4b90bb66e682cd7414b6d2f0cd9577e25b18c80d36b599e9506e8fcf24729a8bc0f5ef464c57d86a87e1e74140597466dbd862eeb9a0b18
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/35fd828c77d3e1817bebef49aa045f02
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/fd4ce90ea21f64abde4497d3d6518c341383eae4c8f5052951b5c1469c87f1464cc1c57f7047bd4881b55d70d6453ef558e6d6e1986fe463a98a0567bbb876a5
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/deaa63f74369dbf358946c6796e8bd6b
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/7b16a7f5b5710de0b38122af6ed9e4a6b3ede4cd9c18c79314fbde366ca92c2dae17d1ab9e43213b5a6f80470455afbb06d54ff326e0404d60f5454164f2c62a
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/450506080f49538628dc2407461b894d
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/95dc7f14c1b1f450de59a3f95673dc510bcd0e38b6d82a8657d4dbdd97158d2095002a61ecb4a4c514e530c0a9879afd232f24a71561e8516683c564406a0a55
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/cadda67c770ea3835170c63cf5c1a93f
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/ccd326df1d3ce8e138fc22db37880a0f15b3b5740b75f4d6e54c6496735dea48d1011c31d0fbf6fcaf7f4ccc565cb2aa59bac473b9b12251da1adaa992998373
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/e89c9935ed19d9b6bedd1b70cbe1ea27
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/d537e954d424240315280fe632bfa83088825dd770042a750448e1553b2887a8c3d4edf193c89d2bccb7b0c3eae560937156eb989373accca1dbecee47e32cc4
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/7072bd88910ce5402e18527f178dcd56
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/0e4e038f317faa7a14cc29267202ad781a2551ef444b27f841ad2a39f5fb5032d20d50749d1b5a925e6552247aca40d84a1464c268022d8b9560c6e6fcf9a9bd
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran3.tar.gz/md5/261636c2b2b3a734e0d054b67fc0e617
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran3.tar.gz/sha512/0777c0cccb6f688024756e12f8a09ca107cf6f2408d04fb1efeae67299eb8de834de158b9ada232e3e50d4bb0481810181c54f6b63238ba8d4f1a779bf30ceab
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran4.tar.gz/md5/af9998d911a0919bbc611279f6957d8f
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran4.tar.gz/sha512/639d0d837dd62f4eff32071e2ef5d95d3d1a80995dc9da0a97e0a2f8bedf4637e3082acec309744d0d36dca8e82b3f7bf792ffb9ba47c18d8b9a44aa0f368adf
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran5.tar.gz/md5/7bee1a7c3470c32c10e3776289ce730f
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran5.tar.gz/sha512/ff76d5fc5ff2432dfcd9a36cfb95943fecab3e75153c12260b729a89c6bc2269f7f0ad256f6334d58445de27d32f6073e830cee4a59e9196a0b7395c3a3b7ab0
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran3.tar.gz/md5/362e299c65ed4011563caf8555f55738
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran3.tar.gz/sha512/45eeae6bc817e8d78c0daa69ca2add3c32d714766e1e1341d14c445a1beb5a5a7ae93e88649c9a62f07c5463b6ee300b60acc06d9d29974cc6725d08d9df66d9
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran4.tar.gz/md5/791075ccd19280d58209f48b487ec42b
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran4.tar.gz/sha512/44b9bf0b5d31048fe05f78a71fe9ddee799bd70f7586061fdd9a1390a894701eb96678ad9c332a21f2c2b079896924bee14d64ea89f6314babae1faac289d6eb
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran5.tar.gz/md5/712e9c7ef4640dbc150371ef3a10e249
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran5.tar.gz/sha512/3407fab09ae6e2b12c2b586915557d121bfa345a4bf66597bec2d5850ce33ad70dddb45ad08a975097e2a428e65abffdbd9747f1b46fa944bc52218798fd2e34
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran3.tar.gz/md5/93d7254e1e03f4ef1acb6b4e8d63c813
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran3.tar.gz/sha512/198d4d0455f981345f20ff4a196cca056fbd7c5fd4d6a2b11e0ec6ba695c362d309947b9fcc13a6c51a44cc3ea73e559c0246a98b26fd6baa6cf07a055f5c972
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran4.tar.gz/md5/728d9f80b9e6b5ecce0ffab86b7e1c52
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran4.tar.gz/sha512/1b8fc2e3e14fb172ec7d99d5beef54bcabdc807318f1b0415f1bdf7bb97a1e49c20168a9bfc0e89f4f9367dfbd1011e3cffe74b515da53fce00f06896387ca72
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran5.tar.gz/md5/b989478ab0496a27daf87f8ebb396316
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran5.tar.gz/sha512/c56ae711ecc9c6fe9e65e7610011f7189ecda4c0e94cfdd6bb150a32eac6f3d2343c671005f4008873e2f026fa312ce0257716a47fb4e91f82a6d29013dfc303
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/194ec8e4078fc6624acfefb29a9a1177
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/ecdd5b17232ae08e76f6822ec52cc96e4b5cde0748baf799799aa7946966b61f83c5b1d8a70e4f14b4e074e13e0cc72f2261f2a304ab8d8be15e68a004210be1
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/f08aad57a0d92ba7811b40deb7c40e5a
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/f5759dfce2854f929a73e11253edd37e100b9437829eca893f97a2c08a7ebc7af4815f588466cc8230985932f47b150e671d3a822e8463c1461bc3ce698f222d
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/bf291c76d9c9642e6964141eb541e4e0
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/0792f5d3c4c7f1ff5f43bcf6aafc8547c742e969ef4fc056f098649f7d99470538827349e5f39f0ce81ac15ec992f11d93a78f1ea9673a67ec076787b6d7b9c5
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran3.tar.gz/md5/9a1979528b2b54df3012e2182b834bbd
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/1752e0ee45107eec916a42370e19b6091b41423eb0f9443f23f78c3e8dd8db5fa0b8b72f5edf2d26e759e0f44056034dde1bce38b9c12f58d6c931ec873bd67c
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran4.tar.gz/md5/1b30b010ee8ecf949d83d98be7cd59a0
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/bab954ecbc2e9ece41807409bfef66063dc98cc7fbdbb0bbce24a331d5b121b0c63432a13cea935c5c27090f790e9fba599e1c129e0005656952805260732da6
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran5.tar.gz/md5/da031443b1bd5ed8abb8e956a05c616c
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/0009d10265ff16603c8552663b3c71ab619905b18fe87119a3203fe24d531148b8b18f727260fc125362c58a6226d1dca98a6517e9b7a93418a2cdbb2c66806e
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran3.tar.gz/md5/133b638a2efa22381cd70abe871e6ebe
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/98067cbaf1f5cf4a6ba01cf09ec9de044c04007f3a1953e51a75439cfb7215caa5b1a7f1b848b216926231a9511c45e78ba78abd39da06c6fbec4ce9542890f2
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran4.tar.gz/md5/3590e16f503a615a8c8886af39d3fd14
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/b7f3fd487e44a4f6cbbf035bc9fb433aa761f05bc1cf0c5351e6f9a9e5b80450ffbd11f86f904477c89aadbe24e22780ce108e228585e701d92141a735b454fd
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran5.tar.gz/md5/05472a418ff1d7f6bedb58894d6f5356
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/fc8a84b28db834b93a0c9a9c96ba22dfc6018cba90c0d43f4e1db7fcbda73c0aec04d7347db02b94df5375e785d447b3aeb993bf0ded69e5d43c2486c13b2aa5
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran3.tar.gz/md5/22200029744717079b3b8663d683273a
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran3.tar.gz/sha512/664bc2e95f10ac5668d51a2ffae488ad002f00995e5e7b620dd894e816bcaeeb7ccffb45f448365484f97f7aa5ac7b237ca1767e2a9421fd5c5fa39098c9fcb4
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran4.tar.gz/md5/b9fb6101fa172dd0f1a00c07673b308e
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran4.tar.gz/sha512/cf49792da8bc3e3a971b0361f2bdd835db46764c308d4ad0e20215c8bba5d6bd9b96e9e8fe2cdfb835bba4f21e62287f7b67245ff1d00a9ef3f9e44201b53412
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran5.tar.gz/md5/17c0ab204c65b252988bf873226f003d
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran5.tar.gz/sha512/02f493a6cb20c51c38203928a5a9e4890fc9285ce1907a552b61bd96bc64bc50a1932236d7617e83edc5ae1c40da84cc1d8db80c190605676869a8d1a57c4d7e
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/24e787f88452b2f304c269061ad07b0a
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/d45272120a6e15431b9a08afe5648afa903b588e2d65541f80ce123117dfc0e6d3b620ce4063211a420f1cfd398e969be69eb6a6302211fc368c4af3c9d6d3ef
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/9aa8cd86c2de41ed2ed47bccc315f19f
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/1c42e55fef774a34d3b0e0b0f899418a501cc9d56c4d38cfa0b4823a7622c7eb594f4ab222bd6994ba1c1eb7b69a37b10ec78b206a24d54276b03f69133b7b40
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/e09d926e41b3a52188cac7efe9d9aeed
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/eddc11f4b5535e629af6fe2705f24b142e457fd7721d6f9892e1c951d2722e996f32a59d05df803bc7a77c15ae011cc5f36a88709a7ebc9e6be00cd52789083b
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/5f09322a961185e965f8914b87fb769c
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/531860456a4604d7743b52632ca1562448e3b34015e0a7082935a12fe7537c3824fd6eca29813b8b28043c85db4c748ca2e42dfb443149e225b2ae1ebf641ece
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/68bf07ec07fab8eb000742f5b34a297a
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/5cf754e09737a9ccf67998a0dd64a6eb836784489b337bd9cd3379773ccc0d8261f6eb91ae6811dc45f3dd13480c6e0abc603f13add94bc5505ed4aa41e82951
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d30d1b10c1a98ecbed686a1d133f4abc
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/6a61cd1eb2b20f33bb6370d760cf98c8c3af2f323b3c83c866ab8d2e3010771da7345fccbbb94880ca0c0956b711d3127566f040bbb79166e281b9ea6d14b2c7
+OpenBLAS.v0.3.17+2.aarch64-apple-darwin-libgfortran5.tar.gz/md5/9020e93ed6349bab95c2ca7cf21b2ebf
+OpenBLAS.v0.3.17+2.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/3058c47b1fecc9d9d63dee30d277fbe665b3641850e72349415c18dc8372971c3f1c36c9cf62ceec672604e70f5b5a0c118e484f63aaf1aba37075324537908b
+OpenBLAS.v0.3.17+2.aarch64-linux-gnu-libgfortran3.tar.gz/md5/02f560828fab7c2df6ce7d81927045ed
+OpenBLAS.v0.3.17+2.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/54f9acd7842ad8305073dde0e0e689a35e79cdee8f843560091fa3277957b9ca298d1516d027c6f0870d48743a70285714fec4f09e0eb43bd6954e8d6bea3843
+OpenBLAS.v0.3.17+2.aarch64-linux-gnu-libgfortran4.tar.gz/md5/24f4d8eea07a992735fc4433d24cdd74
+OpenBLAS.v0.3.17+2.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8e1fb731cb9e3e0a9214c01538b2974eb6ed1a69857327e29dd166719491015d9a0695a75100ec804a5f9beaec121cc095f1ddf8c7a417f18a046035f1969c06
+OpenBLAS.v0.3.17+2.aarch64-linux-gnu-libgfortran5.tar.gz/md5/de3d9d1bd4b8d148084499f97ef9eff3
+OpenBLAS.v0.3.17+2.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/d7d31bc345389c5240a5dc7341741264ea328adc3604c8fea3e21914c13c3a1720270427465daccdfce080d2df6723384d2d9e9907db2a24c8fde32e492ccae4
+OpenBLAS.v0.3.17+2.aarch64-linux-musl-libgfortran3.tar.gz/md5/665a8dd827b32769fd307f65f18ce09f
+OpenBLAS.v0.3.17+2.aarch64-linux-musl-libgfortran3.tar.gz/sha512/070d015f72d0030838985e949f1855e40997fcf31e1c51a1cc5666d681cb47fb02a289435cebd8ef15346bcb85140b0e164874dcf9e269e8799253fb538ea3f7
+OpenBLAS.v0.3.17+2.aarch64-linux-musl-libgfortran4.tar.gz/md5/fe47ac70b33442c9c7d882ea87e86901
+OpenBLAS.v0.3.17+2.aarch64-linux-musl-libgfortran4.tar.gz/sha512/d97588cb9511225e160fd6fc828a13e8f99ca6e16ecdbf57bc8e7a95296c004ca11316854f90421cf0ac7935a7ec09045324af2de6084b11c62dcdc3e96d1249
+OpenBLAS.v0.3.17+2.aarch64-linux-musl-libgfortran5.tar.gz/md5/fd550b91aec55ed97c86c876f2339edd
+OpenBLAS.v0.3.17+2.aarch64-linux-musl-libgfortran5.tar.gz/sha512/53c258962bff09e8a4642c6bd02949792e36b7681bad45b3d21b711428025262cac3b8171530fe97fcf09b31e1e2029c6e32300ee1facb9c7de497beb8a99edb
+OpenBLAS.v0.3.17+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/eb8996220a8d2ab0ff3fccf791c19d2d
+OpenBLAS.v0.3.17+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/f2a91bb7523ed50607936774c6d31bba81584046e0bfffb2cccb84ac3319fd1700003991edf54d1c0af4b0558637275309d826fac76a908e46f5f58f006baba9
+OpenBLAS.v0.3.17+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/02b7b39750d7f4dd4b37c0260dd5ecea
+OpenBLAS.v0.3.17+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/1017388c9141381e37625ade63ad58ee16c0da6ec775e0c8f20e13912e155e9e868024595accc388708c22341e36b5b9cd8f9343c904ea8e7d30ec1bf6c05310
+OpenBLAS.v0.3.17+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/56cc6e5f74809a81319ed36ca783bb81
+OpenBLAS.v0.3.17+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/fc416c3842ffd49a1a201138559f4271d92d6840847b8b224046c6a6310f30044c598aee453ac4f5ea52e5aafe1b3ebe1dd55486883d5197f15bc4dfe0262af6
+OpenBLAS.v0.3.17+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/78d82e6b98ce18f3a0ea92f2e18eb1bb
+OpenBLAS.v0.3.17+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/bc7476532fed7efa0726937cc6ae8e4a693929cff2dc49fe28dc16ad4d3b18265b907ec0c14e12822d00a018d49dfa487fc3d7867da5c428ced381ccfdf346c0
+OpenBLAS.v0.3.17+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/e55e149728e4e2c18957f6db4dc38c4f
+OpenBLAS.v0.3.17+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/e0403a40a91b2f7db4b23ba46b221b39996f7e6c8a417a4b0346c728e1e8520651e0a3a9ef6bcc0214251f34a968a42bfc124ddf4ea6b4fa2d1122a1e7540365
+OpenBLAS.v0.3.17+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/71f7071a2702ccb32cb9eb296c921210
+OpenBLAS.v0.3.17+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/29861c10bc8fbdb9163c21e133ac972898ce01eadfc38af089cab680d1d059cbd40ed16304ea3b256844c68070233dfce4197d690080cc9ec12961b8d56b5a94
+OpenBLAS.v0.3.17+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/b6c52ebccedf4d31ad03e4e883c9cb85
+OpenBLAS.v0.3.17+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/f9c04600842629b4ad4dea8afcfa54bc2e06bc4f204714d725e1e87044b155261870ec74bebd05ed21739c6e81e2876226732cf65367e12cb3e52c4fac1db332
+OpenBLAS.v0.3.17+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/3c154804cea0f5b83a5bb278d8a2bac0
+OpenBLAS.v0.3.17+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/5ccf2cab5a473619cfca7f381aa4c5da1f2057d10235224aad76d40c9349880d4e0f84dfe173f1f47653c82ff523fffd01bb6360179d2b1e4687029f64fc2d81
+OpenBLAS.v0.3.17+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/f5cecf92901773f2aebb13cf23e9603b
+OpenBLAS.v0.3.17+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/855763d0071009c4d799942e86808c90e06c00a78db4350f8b798a414fad333e5b3fca7397cfcdfc06c5718497d1f19a4c19bc79f8d23685d064947585e98a4f
+OpenBLAS.v0.3.17+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/16376d821f9b6b16d7b0ee1890ae79af
+OpenBLAS.v0.3.17+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/68319193bfc960d7879cf2370fe17415d15086587958dfc85bb781c26332399b75cf3928ac0e6d727f6d54ecb41425f1bd724eba4bdba2648c73cc860ff7eba6
+OpenBLAS.v0.3.17+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/7d8099352db1e40a02bf80172979b2f3
+OpenBLAS.v0.3.17+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/5e73b0b13fe6db964332d663475590d750c3a21c85dd9d2bf181acc7834d22ae94eca7cd69f0dfe58fc4b195dfcdb28bdf526d3603e5706350153a71223f377e
+OpenBLAS.v0.3.17+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/efd2b34c3931fe3354ab49f8d6fb330c
+OpenBLAS.v0.3.17+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/ce5f743e261e2801beb075b48d87ff756c8fe157042beb2ffc3d7b506cdf182da11d07bd24dd543103d549f20b83212a0d390eb36c3d9ad715d9ca2cabdeca50
+OpenBLAS.v0.3.17+2.i686-linux-gnu-libgfortran3.tar.gz/md5/f52216036e4f1be71257bc876c67d95b
+OpenBLAS.v0.3.17+2.i686-linux-gnu-libgfortran3.tar.gz/sha512/f83db9977940844b220a1ba0e2c2f3c63dfd355301e5d14b01ad85599fb931f5b797bc2ace5563ee5df47a243cac1800514cbe4884ca2a33db78cb1f9937185d
+OpenBLAS.v0.3.17+2.i686-linux-gnu-libgfortran4.tar.gz/md5/381088794504a68c826d62cc27d14b9c
+OpenBLAS.v0.3.17+2.i686-linux-gnu-libgfortran4.tar.gz/sha512/60b8fa109d32764ad9306e386aabb1ee6809aa03e04253a23a6ea97626d520bafa2ae09ea2f6762fa6bc9d88295bf7dd59fd2978e510c3c63925e7a6560947c2
+OpenBLAS.v0.3.17+2.i686-linux-gnu-libgfortran5.tar.gz/md5/f560fcacad77bf87d8d5945c921938e2
+OpenBLAS.v0.3.17+2.i686-linux-gnu-libgfortran5.tar.gz/sha512/9741eea135584ca23b74827ae02c8f2a91dc8a54b83401e0b2e119aca8c48736ba9816fc224a57f853cfe18fd10467b7f9934f3a10a50073af333270622b4796
+OpenBLAS.v0.3.17+2.i686-linux-musl-libgfortran3.tar.gz/md5/2c52064ddbd658e158347b62ffaa1cb2
+OpenBLAS.v0.3.17+2.i686-linux-musl-libgfortran3.tar.gz/sha512/4fba023c3caefe5fdddf27bac7915d075073c6ed0589348c26864686680710b7e84518072c8e94bdf444e25b5063ee6655afefcb1bf72e64ee5e3247e16fb39a
+OpenBLAS.v0.3.17+2.i686-linux-musl-libgfortran4.tar.gz/md5/66da3df20820d2ee0de93e8a512aa5dc
+OpenBLAS.v0.3.17+2.i686-linux-musl-libgfortran4.tar.gz/sha512/dca0075ba332ce1e68543f77b4ef666265d8e0bb443171d8cd53775800a3b8e13a755a9de067bcf4503835949bd1bc123f241a32fb74ec0014ef642151f36f1c
+OpenBLAS.v0.3.17+2.i686-linux-musl-libgfortran5.tar.gz/md5/2df728b678feae582515a048abc6a3d0
+OpenBLAS.v0.3.17+2.i686-linux-musl-libgfortran5.tar.gz/sha512/755480899352f501fd2bc98adf5cd38a0869b7afbb8d3eb4de173d51ab355f31f03937d6fc2a8f560ca840f3adc04084090a11e495b00b04b465ffb1e0d003e5
+OpenBLAS.v0.3.17+2.i686-w64-mingw32-libgfortran3.tar.gz/md5/52b682596ac8a728bef3baa4e3bcc156
+OpenBLAS.v0.3.17+2.i686-w64-mingw32-libgfortran3.tar.gz/sha512/a6b59fef2d03da5a6246bf1832f0dfa654ab99d0275f69f280bdc54d9a8ab19d2ecce4f53d0f2406114ebdac43b09131c7c3982311f627810cd1de3001bd06b9
+OpenBLAS.v0.3.17+2.i686-w64-mingw32-libgfortran4.tar.gz/md5/0b63ad0bbada8158a000b2f1f64579df
+OpenBLAS.v0.3.17+2.i686-w64-mingw32-libgfortran4.tar.gz/sha512/ace0c217299296662ed2e2a479096f26e0bf3a14166429b089ca856214c3d46442ad1b71ae94e2b14fe654fc5acdd940e3ad3970f956e75377601fd99f82b270
+OpenBLAS.v0.3.17+2.i686-w64-mingw32-libgfortran5.tar.gz/md5/a03556c3a4ee2d02f956aa011e5a53ad
+OpenBLAS.v0.3.17+2.i686-w64-mingw32-libgfortran5.tar.gz/sha512/dde7ea92fdd47ec05edbeeb71fd3d75cb8b5ba5893e18419e47fd1f06032177a9453fc5920c6bd08aec4e2381c5f2c606ce9df7cbbecdda67d2e67aec8be3265
+OpenBLAS.v0.3.17+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/8c8b0dbb3e0c81d9430460c421dd76ab
+OpenBLAS.v0.3.17+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/8639a186f74c9bf4bf5f9e2f69becf700a3ebec4e119519bdbad53fef559fd525e5f532bf7ea5a63bd29059d9c0564eec89a1cf7802cc7f6a3aeb4be9af3cbec
+OpenBLAS.v0.3.17+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/e67d9c5a54b6a5dda63e0fe5ef5b24ad
+OpenBLAS.v0.3.17+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/960cd0bf59fed7c70115358a673cc049cb539aa1b015cb473697309327e3b9afb9447b62239d58d8c56a9e8b1955b2b097b31c14b0013cafe77fbb4b967679be
+OpenBLAS.v0.3.17+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/028c1ed0a8b84c83ec64b2970b1739fc
+OpenBLAS.v0.3.17+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/2427b8f4de817ffbbd697f8b7caf710c3a3d9c02045a9650e8fde26c891c7cdc70482bda14f067b0cfa29d436a53f4484a00da8caba6188cba9fe25e7b57dc4c
+OpenBLAS.v0.3.17+2.x86_64-apple-darwin-libgfortran3.tar.gz/md5/0277b078caf9b0f0a33bf1da351fcac0
+OpenBLAS.v0.3.17+2.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/52c11d822859209f989462aa38cb8c3a7886cd881da40699a06998498d59bfe40276196218c122b8c0c314384a27e7e4b1b6181c818ad1e543cd2af896be521c
+OpenBLAS.v0.3.17+2.x86_64-apple-darwin-libgfortran4.tar.gz/md5/d43dd98167a2c99bd4bbd3f52271595b
+OpenBLAS.v0.3.17+2.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/5eef221ed4e30090feec8dfa32a732a1987c692dbd2cf943aafb733ad4e5bd669ec55919ca5c89562e2500b4b1fbaffd6b1bbc8de3f71c9dc0037104412bb234
+OpenBLAS.v0.3.17+2.x86_64-apple-darwin-libgfortran5.tar.gz/md5/e93a6128adb949c43ea946ceca159d38
+OpenBLAS.v0.3.17+2.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/3fa4829b0c18085e935b1c3b7b5062a06ab4ebff60948ae6196ada22476798ee68b4e7b637cf3e5df9dc4dc8a5dbf7c924960b89d58de5c45dc8c8ca4834532a
+OpenBLAS.v0.3.17+2.x86_64-linux-gnu-libgfortran3.tar.gz/md5/eddb496fe2c7915d61a4ead82c2622ff
+OpenBLAS.v0.3.17+2.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/071d471c973bab1986fe32cd76f4f93eba49fbdf0f72561b90d09b846ce8990e20f328ef1ddfa5e0aa1483f4d95ede80d66fde197bdfec47ea9642a2f16b85d0
+OpenBLAS.v0.3.17+2.x86_64-linux-gnu-libgfortran4.tar.gz/md5/91050bb45fc71c6532d9b3a204903cab
+OpenBLAS.v0.3.17+2.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/b02a226dab088e289b4bdcbf6f3ad2319ba26fa880ade277383b482c1e65bc056b834056d7eec0c75b425615d4167bfca581252eb31b87bd2b53d597fb8a47f0
+OpenBLAS.v0.3.17+2.x86_64-linux-gnu-libgfortran5.tar.gz/md5/87a0516c856af6128e2ecd2631c19d34
+OpenBLAS.v0.3.17+2.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/73012b9e99c57fc812e0f64fda6233ce204f2cdfc255ebbea221f614fd1d7ccdf5b2e1f017f55864a5dae8febbd1ed2fafb1fb3a79a53b8c1f1c7d6455ab7fed
+OpenBLAS.v0.3.17+2.x86_64-linux-musl-libgfortran3.tar.gz/md5/6446a0328a83c504740b81e0a93087c5
+OpenBLAS.v0.3.17+2.x86_64-linux-musl-libgfortran3.tar.gz/sha512/8f77e02f32e69bf24205f10a3524d96d8bf79050d73f51a522db4228744ad9745a02c1bae1fdd3236a195481b93bec06e92a266fcdc36ea1bcedde33362c51d5
+OpenBLAS.v0.3.17+2.x86_64-linux-musl-libgfortran4.tar.gz/md5/6de9e28283dc703e8597cfe81cb036be
+OpenBLAS.v0.3.17+2.x86_64-linux-musl-libgfortran4.tar.gz/sha512/9d99cc42bf17ef982c4884774a43beeb2a160db950a31a5b1970dcdac38ffad316bc21830878aae818cfb4235fe486d757c5d67816ffd556b161acbe66c686fd
+OpenBLAS.v0.3.17+2.x86_64-linux-musl-libgfortran5.tar.gz/md5/f1ebb2a6447a2a44c52dafe94499b2f3
+OpenBLAS.v0.3.17+2.x86_64-linux-musl-libgfortran5.tar.gz/sha512/9d1b57a4fff907e7f730de7090e285c5158bcda0867730c23e32cfde4e1b4e5d9be27d19df26178d35fc6f578290e43e120ddcd76854df3c9155b6144ab85dcc
+OpenBLAS.v0.3.17+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/e12409bcb87b4889aef1ee6055193777
+OpenBLAS.v0.3.17+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/f93f703bc74ab355b7fd09f057d7cc0de0bc3a21193e7515bdc4601612ae8d2cfdb4afa61c9450db28058c0cf311e93a2c12a0f921633003df7fca0f4a2e47c4
+OpenBLAS.v0.3.17+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/80e9374a5c694c62085099d16e12b0c5
+OpenBLAS.v0.3.17+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/cb235f5415fbf7b96c5013e9931b5790e15262f2bb65512064af31e1ec31af86f9a64f4b9874ec97c861ed001ebd0602bff860dda0703bf174db80332e77dd02
+OpenBLAS.v0.3.17+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/42a455ed7d2f102617f7344684c6b532
+OpenBLAS.v0.3.17+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/8e254f1eca11673c859255f257f2015a1fa285554c0697f4602e64770dfa6f7738149d4aadb5f6451cfa2a21c963f61233535ca98af9f0e1b71137eedef99c22
+OpenBLAS.v0.3.17+2.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/d648f4a82c849bb7d6d6a5290868403c
+OpenBLAS.v0.3.17+2.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/a80c9d4af3f4bff2803a1adf1439e1894197a4a86660e5c4bb25741be590e81785711022928910267c862c4368e5aea2f645bb159e23c403135019c6be31780b
+OpenBLAS.v0.3.17+2.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/3e1be20b44219134e47e816682b0b8eb
+OpenBLAS.v0.3.17+2.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/03c64778515e007574c9d14b2dc3dc53dddbb01f6af4872858f5006da446be2ed91b0e07d119651d40d8018968cdf2d3fcc8eebd4834d07b25c2201bb6c3183a
+OpenBLAS.v0.3.17+2.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/bc04ffe4100d89fc5eced47d1ac894c4
+OpenBLAS.v0.3.17+2.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/ab8aea7d065a560305821d199d216e3dfe556e3ec1ebfc98507914fab355e2a0231f628fc7fe4c48dffd80d5d4c4a5a90fd540c8ba90236702ef660af635c09e
+openblas-d909f9f3d4fc4ccff36d69f178558df154ba1002.tar.gz/md5/4acd59865ca8b50c823bef1354148930
+openblas-d909f9f3d4fc4ccff36d69f178558df154ba1002.tar.gz/sha512/227ee7decccf9bdd2e5754757f590e32ada95b576db9eddc2c74ef06d35aba1db9438acaf57750184baacac741917f7f5ad9f15991d31314480db371fe59cc17
diff --git a/deps/checksums/patchelf b/deps/checksums/patchelf
index d30c53e73a1ac..a7122c400749a 100644
--- a/deps/checksums/patchelf
+++ b/deps/checksums/patchelf
@@ -1,2 +1,2 @@
-patchelf-0.9.tar.gz/md5/3c265508526760f233620f35d79c79fc
-patchelf-0.9.tar.gz/sha512/715db21156e6bd91cfa626f5201b32a6619e51532f5635ef52396da8193738ba66113485b61cc1e218b16737e66f72cc2e4bb3a7a33e73061ac2ef2c6330a299
+patchelf-0.13.tar.bz2/md5/d387eee9325414be0b1a80c8fbd2745f
+patchelf-0.13.tar.bz2/sha512/43c3f99fe922e2f34d860389165bcc2b0f3f3317e124eb8443017f71b1f223d96a7c815dc81f51b14958b7dc316f75c4ab367ccc287cd99c82abe890b09a478d
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
index 25123ba015294..001141075298d 100644
--- a/deps/checksums/suitesparse
+++ b/deps/checksums/suitesparse
@@ -1,5 +1,7 @@
 SuiteSparse-5.10.1.tar.gz/md5/68bb912f3cf3d2b01f30ebafef690302
 SuiteSparse-5.10.1.tar.gz/sha512/8f85c6d63b76cba95707dfa732c51200df7794cb4c2599dbd92100475747b8d02b05089a47096e85c60b89bc852a8e768e0670f24902a82d29494a80ccf2bb5f
+SuiteSparse-e4df734c3e0b54cd2275adbd923b5afaf0f7e3d0.tar.gz/md5/719674d1c50606bbf74da55654b94e37
+SuiteSparse-e4df734c3e0b54cd2275adbd923b5afaf0f7e3d0.tar.gz/sha512/fbc2e04bf2cfed1913d417febe814efe6b0396d43f9cb14fb409bc0eff840679e06bc305defd9f025039e7962650da9068dfa315c7acddaa774f3993108a9852
 SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/md5/b9392f8e71c0c40d37489e7b2071c5ad
 SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/sha512/109d67cb009e3b2931b94d63cbdaaee29d60dc190b731ebe3737181cd48d913b8a1333043c67be8179c73e4d3ae32ed1361ab4e34312c0f42e4b29f8a7afda3e
 SuiteSparse.v5.10.1+0.aarch64-linux-gnu.tar.gz/md5/1b2651ede4a74cd57f65505a65093314
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
index dc14fbffdd765..e3a049eb685f4 100644
--- a/deps/checksums/unwind
+++ b/deps/checksums/unwind
@@ -1,56 +1,26 @@
-LibUnwind.v1.3.2+4.aarch64-linux-gnu.tar.gz/md5/8f5cbf9820033211513f6d33e36194f1
-LibUnwind.v1.3.2+4.aarch64-linux-gnu.tar.gz/sha512/589886c4f141064126aecc1bf63365c610a4c3dd70e386aa8e17ce562505cac873542fa92cea023850e9bf54fcef3cf475d52f035d17d830a81c01d06d0454e4
-LibUnwind.v1.3.2+4.aarch64-linux-musl.tar.gz/md5/836a2d8ea7a11d87a74aee09f82582b5
-LibUnwind.v1.3.2+4.aarch64-linux-musl.tar.gz/sha512/4cd3805ae59854fdceee441967ba4b812246cf1a1e9ed20367f5bbbad9a47f0093731b4f78f881c696e52c101dec83498398c7b798c81c1a441232cd4ee96b58
-LibUnwind.v1.3.2+4.armv6l-linux-gnueabihf.tar.gz/md5/0047d02c4b4888050b363c77106d4ea1
-LibUnwind.v1.3.2+4.armv6l-linux-gnueabihf.tar.gz/sha512/8b02fb5189ca749e421fc17d560601e8624cbcc19a4c5c45e38828323b33db30ced8a92e08ebd429c663e52358c486d3e284e7e04898229cff2839cc01c067d5
-LibUnwind.v1.3.2+4.armv6l-linux-musleabihf.tar.gz/md5/1fe78c6f0ff7120b35c6745b16c6f838
-LibUnwind.v1.3.2+4.armv6l-linux-musleabihf.tar.gz/sha512/9576f913fbc40d00b42573f600c038fea85eb3c9b88a4878cff0e041c4096d9d005b856dbcd0d057dc40a3cdb74deeca6e9c1cc5c213e6e062328f75633ba8e3
-LibUnwind.v1.3.2+4.armv7l-linux-gnueabihf.tar.gz/md5/510db51b0364cf17207eb00e44d58974
-LibUnwind.v1.3.2+4.armv7l-linux-gnueabihf.tar.gz/sha512/76f119654a65b460917f41a321008c5a0593523db53fa12ac9aa82732368ebdee05d6366fdfdcdd300ba0fe4c7239aac25d80fb3b1ad0235f79b235dab68c796
-LibUnwind.v1.3.2+4.armv7l-linux-musleabihf.tar.gz/md5/4bb58bdc423312c74eafe52a781dd712
-LibUnwind.v1.3.2+4.armv7l-linux-musleabihf.tar.gz/sha512/02b69ec40dfcacc447169786bab3aac39c6db6b07874e9657c49a2907654be79efe16863abf09ee1e2a647cd6a651155b65bdbbd6d810a3ceaa332fc0a3ace4b
-LibUnwind.v1.3.2+4.i686-linux-gnu.tar.gz/md5/76f549ae171aad91570d7874e73f44f6
-LibUnwind.v1.3.2+4.i686-linux-gnu.tar.gz/sha512/a5a654dd6233099e841d1b9c54b16cb99d736549d063e28d17d5f2014c3090d829a4a8dc4fee042d0f4a9d8a155fb30c5840cb84b9fd71758256fa072137baad
-LibUnwind.v1.3.2+4.i686-linux-musl.tar.gz/md5/f8b58f061a03f24111f39f2f8cf72c61
-LibUnwind.v1.3.2+4.i686-linux-musl.tar.gz/sha512/cc6dedc551ee4d5e131cdd7ea7dd4a9cc64efe930d16cddb0c21dca7b13076b6810e00e406acb949404c80b506ca9e09d1e223069d8159e9f73fa8aa022e3f41
-LibUnwind.v1.3.2+4.powerpc64le-linux-gnu.tar.gz/md5/2fd4fda3c82c99ff102b630d078723f5
-LibUnwind.v1.3.2+4.powerpc64le-linux-gnu.tar.gz/sha512/b1c7f16d2877e08cfc9d1aa63c5c9acf30049bd11bdad90c6b1425a09f86762c76f0c1a27817ea1b939244f6e24320854552bc860c95f297a772403eeddc053d
-LibUnwind.v1.3.2+4.x86_64-linux-gnu.tar.gz/md5/cd98359922fddcbbcfda56fbc011bea4
-LibUnwind.v1.3.2+4.x86_64-linux-gnu.tar.gz/sha512/7b2d78869be148db23ab8372bb6699abcf26cc58718871f050b8e67054c0c6c909f9a8c59d27c748abeef0ecb5eabc09484043c3b2232469d03c78a42a590e13
-LibUnwind.v1.3.2+4.x86_64-linux-musl.tar.gz/md5/bd8ea5006d6078a1d91743f599f37732
-LibUnwind.v1.3.2+4.x86_64-linux-musl.tar.gz/sha512/1c7feea46d70c60dbecfe6b945a29a086dc120e0d674ea9d488dc7943901711ba0505288694c94a2b0804bab6cd826b32e58912e407ed918724d16b6b6ec1d3d
-LibUnwind.v1.3.2+4.x86_64-unknown-freebsd.tar.gz/md5/e72c36f0563a088282147275de90048b
-LibUnwind.v1.3.2+4.x86_64-unknown-freebsd.tar.gz/sha512/3aaa7e5c21b3bcc30ff7826af4bc0b926865cac3a5b14dfa7f27f0c5d4344fa2a568a78c0c4ee32a18e668758cdac70c09f31f5ca55cc56c3d6a88654aa906fa
-LLVMLibUnwind.v11.0.1+1.aarch64-apple-darwin.tar.gz/md5/aceea9c7eca53a8da86c6d0b713a8c99
-LLVMLibUnwind.v11.0.1+1.aarch64-apple-darwin.tar.gz/sha512/621b6c23b852332039bcd856ff330cc6109f5f18e646a7863900dd5ae9115a1f8a2f5da3fd50de2589da1af5e2326634259dec505972e2033f0772b7c38c5944
-LLVMLibUnwind.v11.0.1+1.aarch64-linux-gnu.tar.gz/md5/53999245ae1b82eb15baa9aaad078365
-LLVMLibUnwind.v11.0.1+1.aarch64-linux-gnu.tar.gz/sha512/a6a49da09b476eb87fab7e472a45d8417a701693ea928aa1c753722e430eb17a4b4ce3b8711de0238705eac3436391b806b8eff9b0ee922db095ed4a87ded67e
-LLVMLibUnwind.v11.0.1+1.aarch64-linux-musl.tar.gz/md5/d900059ad3eb62827b1efc19343fe288
-LLVMLibUnwind.v11.0.1+1.aarch64-linux-musl.tar.gz/sha512/14cc60ff4ddfe4ed8568457069614d463d9d8ecd89633c05986da97374a6c1317c363c80654fe61731b574638e1ee62d38c399c850ffa4c0245d5046cc091b75
-LLVMLibUnwind.v11.0.1+1.armv6l-linux-gnueabihf.tar.gz/md5/5c1f9945de5d3537aff8623551eeb92e
-LLVMLibUnwind.v11.0.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/a9d1b6973f2cd681572d86ef5c0d26fce3c7a8eb3a40e3c0344c3356da71d0b9feb05ae33cfc9d28c722054925eb8b7918eaf9edb47536b55497815002852cb1
-LLVMLibUnwind.v11.0.1+1.armv6l-linux-musleabihf.tar.gz/md5/77292837c1947f211a161e26ffddb71d
-LLVMLibUnwind.v11.0.1+1.armv6l-linux-musleabihf.tar.gz/sha512/522a2942796c165e9c060dca9507ec86adb3110b524b856c5e5f364a0540b9f3842fb5d9c8ffdb1af06bdd6d6b1d4de6ae45d9fa274b1e45af0ce7e84785f9ee
-LLVMLibUnwind.v11.0.1+1.armv7l-linux-gnueabihf.tar.gz/md5/4e40751ed7a622970c81c29a28280341
-LLVMLibUnwind.v11.0.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/11c33018723fe2f96e61a7e6126ccef09cfc8504d93e01c8ab4b74acd71230bec2d5be4a3bd65a582cdd582ef81a00095668ab0068c4084ceed6ee8653472090
-LLVMLibUnwind.v11.0.1+1.armv7l-linux-musleabihf.tar.gz/md5/c5d5b263a9291433c222d5812d8ee232
-LLVMLibUnwind.v11.0.1+1.armv7l-linux-musleabihf.tar.gz/sha512/b9e3abea035aafd4cebc00d63d30a15aedd702125d1b1e8988f3563d5adf495fcce86200fad95ab1db0721a533dc0af7c61fd678f17203d629525bc283f74632
-LLVMLibUnwind.v11.0.1+1.i686-linux-gnu.tar.gz/md5/bd3186039f77f656838c73cf6ebcb0ff
-LLVMLibUnwind.v11.0.1+1.i686-linux-gnu.tar.gz/sha512/01b4a99d25a69b367b540b363bcc71fa10d6cd34e53ceeeba7f81f372873bb1cf3bde66fa280bbb60b65b679639479f46acd76823d79418c641d3354cc1e0c01
-LLVMLibUnwind.v11.0.1+1.i686-linux-musl.tar.gz/md5/58bec4c7eef0b79d3878b465f3f52d6d
-LLVMLibUnwind.v11.0.1+1.i686-linux-musl.tar.gz/sha512/b5467a4f25abbc6097d83f414ace31ac58429ce4ee5d8ca7cc0a77612a237e3e8050578b2c90bc8f30e7c346c3eb64a939c854614e1d51dad0133d52905dbce8
-LLVMLibUnwind.v11.0.1+1.i686-w64-mingw32.tar.gz/md5/40ead04fdf8afc1cd96fe6da98bb7095
-LLVMLibUnwind.v11.0.1+1.i686-w64-mingw32.tar.gz/sha512/269fe9159714819dae63afa1fca28fea885eb4d855715c5c38e7ab63f9ff87b7a5c717228b73c8957404a025a8439e711edb15e1df5c70180dc642fbdeab8264
-LLVMLibUnwind.v11.0.1+1.powerpc64le-linux-gnu.tar.gz/md5/cf1fcc8d88040c7c55ff198201b9b22f
-LLVMLibUnwind.v11.0.1+1.powerpc64le-linux-gnu.tar.gz/sha512/ec417cf4d55f471bea1ed3adfe0ce2686f61f34011c6006289bcad923ba1a53d98271decf43fcbaf6fecb0cffe7c8d771200d6fd4783fc26894044da69c3a04d
-LLVMLibUnwind.v11.0.1+1.x86_64-apple-darwin.tar.gz/md5/0bca573e5476ae37191d8ae3a75721de
-LLVMLibUnwind.v11.0.1+1.x86_64-apple-darwin.tar.gz/sha512/90287c998f0d141703a2388d2ff2a496d4f7b57cb89b658f26ea0447edcc095a6342dd4a02d152638c4466dadf2527ad374e3f5ef905cc7ced7c16b5b1ea4d54
-LLVMLibUnwind.v11.0.1+1.x86_64-linux-gnu.tar.gz/md5/88d17930d6e1b95da3d9a0f07a43fa49
-LLVMLibUnwind.v11.0.1+1.x86_64-linux-gnu.tar.gz/sha512/c6969c1380ca02be47010ef3882cde815e27a4e23bdbefcb94aa1af76889519ff8e43d83d3156df21be5c909ab5ca7a2199ec742f9711f11d49a743c8a5cac10
-LLVMLibUnwind.v11.0.1+1.x86_64-linux-musl.tar.gz/md5/6033356ddd94624e785d3820ab091eab
-LLVMLibUnwind.v11.0.1+1.x86_64-linux-musl.tar.gz/sha512/da7d45efb4b53033031b4656a8877250520b905c4103d0e49079f1c0d80360f258694515bbeb8f687bbe0cf931e357855dea0602b7ef5b50b25f7ccd4ea92829
-LLVMLibUnwind.v11.0.1+1.x86_64-unknown-freebsd.tar.gz/md5/bed33a2f926ae888a97f87e68919eb95
-LLVMLibUnwind.v11.0.1+1.x86_64-unknown-freebsd.tar.gz/sha512/a043e01e2e18c4c7b907cc37a7af2231ca3895edaf69ba0df62f5d615e594d2d75193c03d902beb4c39a06cbcb9f394959de527ce7144374b22371f4c47b9b43
-LLVMLibUnwind.v11.0.1+1.x86_64-w64-mingw32.tar.gz/md5/c9f98a3a63137d4a5ad3c81f24d8e3cb
-LLVMLibUnwind.v11.0.1+1.x86_64-w64-mingw32.tar.gz/sha512/4b27d33dc31511e3a7913f360dee4e0ff7f3bf9f6256f4e71473e3b3fafbedf27fbe87d6a96ed4bba57be8c2150f9e42ee2627a48c6d3d87e466c274cd5949cb
+LibUnwind.v1.3.2+6.aarch64-linux-gnu.tar.gz/md5/dcd327c5b3d7b2ba082f2ad7f11939d3
+LibUnwind.v1.3.2+6.aarch64-linux-gnu.tar.gz/sha512/fffedf6df127538dff7cd394b4a780862fded082c7299e5ac36dc682dcd28a35db596c4621e94d9dce8483fb8053c6d0030a25b7a4bbbecc39af3efd3af14ab3
+LibUnwind.v1.3.2+6.aarch64-linux-musl.tar.gz/md5/d1582fc675158d3838f4d36214e51105
+LibUnwind.v1.3.2+6.aarch64-linux-musl.tar.gz/sha512/0636bfc0159b66d6427ae3437be3989930b6d404f94b2ffac92bd9115d87a5509a9bddc723f0c38b7c30ba2098da48c92a74b4648d6361dbebf547ccc4139813
+LibUnwind.v1.3.2+6.armv6l-linux-gnueabihf.tar.gz/md5/3e621439132504c14daae4944fb5eab8
+LibUnwind.v1.3.2+6.armv6l-linux-gnueabihf.tar.gz/sha512/d324018841343873b256df9684b51fdc9108d5b1af73f5275cd2e81d31a4c00917027afac1554b0cdc5bffd5d52a6417612d3f9399bedfb6a49100df594709f4
+LibUnwind.v1.3.2+6.armv6l-linux-musleabihf.tar.gz/md5/4603961011230cf0ef8c24881d7add7a
+LibUnwind.v1.3.2+6.armv6l-linux-musleabihf.tar.gz/sha512/64e87f9074cb51442539a7907c979424682dc5a9a4cf4d538fbdc5f3bcba169c9b4d5aeb445c76060c0b0604d3fb5b0b88dbc7d3d919a4153c403d9c39110115
+LibUnwind.v1.3.2+6.armv7l-linux-gnueabihf.tar.gz/md5/2c8c2ca6b238bfdbd1c4d021df7ac7a6
+LibUnwind.v1.3.2+6.armv7l-linux-gnueabihf.tar.gz/sha512/b665d8d6b5de0b4a23f2577fe12208c4921f7ee6c3643f1434732c3c5203d1892d86f84875e3488cfc85efb250ceb0c66d02f2356e0accb3c24f200c936eeb84
+LibUnwind.v1.3.2+6.armv7l-linux-musleabihf.tar.gz/md5/0f786d4baf3d5f4fc94884ad7ae1b74b
+LibUnwind.v1.3.2+6.armv7l-linux-musleabihf.tar.gz/sha512/1e4c447983d3aee05705b90962f0bbccd58f255b1d7b582069408d8927d21697fdeefea3aeb7ad84d7d087a70417164194d60c66e57bbb3a587845bbf636c06d
+LibUnwind.v1.3.2+6.i686-linux-gnu.tar.gz/md5/d022d2a6a88ccf741c19889055132882
+LibUnwind.v1.3.2+6.i686-linux-gnu.tar.gz/sha512/75615cba1b4e945cd5969fd4debf7edd3913d5ae3819abf8ca724b0f1ece8f03064dcd2fde54d14d052f7e3e15bcb0efa5c7d9b55c4875e7dedee38034038b7f
+LibUnwind.v1.3.2+6.i686-linux-musl.tar.gz/md5/5e57a072a0c683005921db5597f43d64
+LibUnwind.v1.3.2+6.i686-linux-musl.tar.gz/sha512/4bbaac240ef37ed48d1273cb68302e1b856622804f7704917a3db2a213118a9e0bd95ca504555a537aeec7f5baa2057cf93ca62e350a64b428a34770e64602a6
+LibUnwind.v1.3.2+6.powerpc64le-linux-gnu.tar.gz/md5/1ee397961af5567c37b5429d7b7a52cd
+LibUnwind.v1.3.2+6.powerpc64le-linux-gnu.tar.gz/sha512/902017d1c64d00a67a378d0e7aef64493655a88480d27a5f720cac363bbd0aeab2f03f2b77560fae395a5799ae3da1f4122b6e8cda8d80f158c751215a1848bb
+LibUnwind.v1.3.2+6.x86_64-linux-gnu.tar.gz/md5/e45a0c38c35ed4afbdcffe385998e6d3
+LibUnwind.v1.3.2+6.x86_64-linux-gnu.tar.gz/sha512/049ac0d6b74fbc6a96a7abe345b4ec783968a90bd0f3c230558ad9b3a44cbe65cf4553bfc9abdc9128529d746077308570a14f55317ffef5f65836a8413aa938
+LibUnwind.v1.3.2+6.x86_64-linux-musl.tar.gz/md5/77f053b93396484f3e4d37af9a294ad6
+LibUnwind.v1.3.2+6.x86_64-linux-musl.tar.gz/sha512/e87406503348d316940ea28f09b304c9349f3915e1ed193c87b823b7c5c7a1f6046e6b0e5eeba3b4760d5a403def5f87aa42a3f5f4d8c5f540dd4fba3743394b
+LibUnwind.v1.3.2+6.x86_64-unknown-freebsd.tar.gz/md5/33c56decf549b45712642ebc73b622bd
+LibUnwind.v1.3.2+6.x86_64-unknown-freebsd.tar.gz/sha512/ffb3866b2ccd3ddea168b7ce0b345d475914fa4f87a3743b92e3c07ac9453b4ad929ff01596677e00f08d4d30cf67676052cc3c4f985f722a800b82cba8334c3
+libunwind-1.3.2.tar.gz/md5/52a8be39f0d6fd4efb7409973e425fa8
+libunwind-1.3.2.tar.gz/sha512/221864eae6bf0fde281d9551662af1e539ce919fbb7050947e60dbcc09efed4f5d34574dbce11792513e63151e0af72f02801b7bcd37a6a519e6d868abb8b509
diff --git a/deps/csl.mk b/deps/csl.mk
index 423201cbb5a0c..9f95c00f3cfe7 100644
--- a/deps/csl.mk
+++ b/deps/csl.mk
@@ -1,5 +1,3 @@
-ifeq ($(USE_BINARYBUILDER_CSL),0)
-
 # Interrogate the fortran compiler (which is always GCC based) on where it is keeping its libraries
 STD_LIB_PATH := $(shell LANG=C $(FC) -print-search-dirs | grep '^programs: =' | sed -e "s/^programs: =//")
 STD_LIB_PATH += :$(shell LANG=C $(FC) -print-search-dirs | grep '^libraries: =' | sed -e "s/^libraries: =//")
@@ -12,6 +10,44 @@ define pathsearch
 $(firstword $(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))))
 endef
 
+# CSL bundles lots of system compiler libraries, and while it is quite bleeding-edge
+# as compared to what most distros ship, if someone tries to build an older branch,
+# the version of CSL that ships with that branch may become relatively old.  This is
+# not a problem for code that is built in BB, but when we build Julia with the system
+# compiler, that compiler uses the version of `libstdc++` that it is bundled with,
+# and we can get linker errors when trying to run that `julia` executable with the
+# `libstdc++` that comes from the (now old) BB-built CSL.
+#
+# To fix this, we take note when the system `libstdc++.so` is newer than whatever we
+# would get from CSL (by searching for a `GLIBCXX_3.4.X` symbol that does not exist
+# in our CSL, but would in a newer one), and default to `USE_BINARYBUILDER_CSL=0` in
+# this case.
+CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.30|GLIBCXX_3\.5\.|GLIBCXX_4\.
+
+# First, check to see if BB is disabled on a global setting
+ifeq ($(USE_BINARYBUILDER),0)
+USE_BINARYBUILDER_CSL ?= 0
+else
+# If it's not, check to see if it's disabled by a USE_SYSTEM_xxx flag
+ifeq ($(USE_SYSTEM_CSL),1)
+USE_BINARYBUILDER_CSL ?= 0
+else
+# If it's not, see if we should disable it due to `libstdc++` being newer:
+LIBSTDCXX_PATH := $(eval $(call pathsearch,libstdc++,$(STD_LIB_PATH)))
+ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p $(LIBSTDCXX_PATH) | grep $(CSL_NEXT_GLIBCXX_VERSION))))
+# Found `libstdc++`, grepped it for strings and found a `GLIBCXX` symbol
+# that is newer that whatever we have in CSL.  Default to not using BB.
+USE_BINARYBUILDER_CSL ?= 0
+else
+# Either we didn't find `libstdc++` (e.g. we're using `clang`), or we
+# found it and couldn't find the new symbol in it (it's older than what
+# BB provides, so let's use BB instead)
+USE_BINARYBUILDER_CSL ?= 1
+endif
+endif
+endif
+
+ifeq ($(USE_BINARYBUILDER_CSL),0)
 define copy_csl
 install-csl: | $$(build_shlibdir) $$(build_shlibdir)/$(1)
 $$(build_shlibdir)/$(1): | $$(build_shlibdir)
diff --git a/deps/dsfmt.mk b/deps/dsfmt.mk
index bf6062c2130f6..e22f1b51fe8f7 100644
--- a/deps/dsfmt.mk
+++ b/deps/dsfmt.mk
@@ -3,12 +3,8 @@
 ifneq ($(USE_BINARYBUILDER_DSFMT),1)
 
 DSFMT_CFLAGS := $(CFLAGS) -DNDEBUG -DDSFMT_MEXP=19937 $(fPIC) -DDSFMT_DO_NOT_USE_OLD_NAMES -DDSFMT_SHLIB
-ifneq ($(USEMSVC), 1)
 DSFMT_CFLAGS += -O3 -finline-functions -fomit-frame-pointer -fno-strict-aliasing \
 		--param max-inline-insns-single=1800 -Wall  -std=c99 -shared
-else
-DSFMT_CFLAGS += -Wl,-dll,-def:../../libdSFMT.def
-endif
 ifeq ($(ARCH), x86_64)
 DSFMT_CFLAGS += -msse2 -DHAVE_SSE2
 endif
diff --git a/deps/gmp.mk b/deps/gmp.mk
index 9093817b86829..a37327d82101e 100644
--- a/deps/gmp.mk
+++ b/deps/gmp.mk
@@ -22,12 +22,30 @@ $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 checksum-gmp: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 	$(JLCHECKSUM) $<
 
-$(SRCCACHE)/gmp-$(GMP_VER)/build-patched: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/gmp-exception.patch
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
+# Apply fix to avoid using Apple ARM reserved register X18
+# Necessary for version 6.2.1, remove after next gmp release
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
+	cd $(dir $@) && \
+		patch -p1 < $(SRCDIR)/patches/gmp-HG-changeset.patch
+	echo 1 > $@
+
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied
+	cd $(dir $@) && \
+		patch -p1 < $(SRCDIR)/patches/gmp-exception.patch
+	echo 1 > $@
+
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied
+	cd $(dir $@) && \
+		patch -p1 < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
+	echo 1 > $@
+
+$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: \
+	$(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied \
+	$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied \
+	$(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied
 	echo 1 > $@
 
-$(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted $(SRCCACHE)/gmp-$(GMP_VER)/build-patched
+$(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted $(SRCCACHE)/gmp-$(GMP_VER)/source-patched
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 	$(dir $<)/configure $(CONFIGURE_COMMON) F77= --enable-cxx --enable-shared --disable-static $(GMP_CONFIGURE_OPTS)
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index f9b6aba547f7e..8f6ab58bc00da 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -1,7 +1,7 @@
 ## libgit2
 ifneq ($(USE_BINARYBUILDER_LIBGIT2),1)
 
-LIBGIT2_GIT_URL := git://github.com/libgit2/libgit2.git
+LIBGIT2_GIT_URL := https://github.com/libgit2/libgit2.git
 LIBGIT2_TAR_URL = https://api.github.com/repos/libgit2/libgit2/tarball/$1
 $(eval $(call git-external,libgit2,LIBGIT2,CMakeLists.txt,,$(SRCCACHE)))
 
diff --git a/deps/libssh2.mk b/deps/libssh2.mk
index 53e6659304770..990de300fac71 100644
--- a/deps/libssh2.mk
+++ b/deps/libssh2.mk
@@ -1,6 +1,6 @@
 ## libssh2
 ifneq ($(USE_BINARYBUILDER_LIBSSH2), 1)
-LIBSSH2_GIT_URL := git://github.com/libssh2/libssh2.git
+LIBSSH2_GIT_URL := https://github.com/libssh2/libssh2.git
 LIBSSH2_TAR_URL = https://api.github.com/repos/libssh2/libssh2/tarball/$1
 $(eval $(call git-external,libssh2,LIBSSH2,CMakeLists.txt,,$(SRCCACHE)))
 
diff --git a/deps/libuv.mk b/deps/libuv.mk
index ff749fef38651..a51cc5a9f6bb7 100644
--- a/deps/libuv.mk
+++ b/deps/libuv.mk
@@ -1,24 +1,13 @@
 ## LIBUV ##
 ifneq ($(USE_BINARYBUILDER_LIBUV),1)
-LIBUV_GIT_URL:=git://github.com/JuliaLang/libuv.git
+LIBUV_GIT_URL:=https://github.com/JuliaLang/libuv.git
 LIBUV_TAR_URL=https://api.github.com/repos/JuliaLang/libuv/tarball/$1
 $(eval $(call git-external,libuv,LIBUV,configure,,$(SRCCACHE)))
 
 UV_CFLAGS := -O2
-ifeq ($(USEMSVC), 1)
-UV_CFLAGS += -DBUILDING_UV_SHARED
-endif
-ifeq ($(USEICC), 1)
-UV_CFLAGS += -static-intel
-endif
 
 UV_FLAGS := LDFLAGS="$(LDFLAGS) $(CLDFLAGS) -v"
-ifneq ($(UV_CFLAGS),)
-UV_FLAGS += CFLAGS="$(CFLAGS) $(UV_CFLAGS)"
-endif
-ifeq ($(USEMSVC), 1)
-UV_FLAGS += --disable-shared
-endif
+UV_FLAGS += CFLAGS="$(CFLAGS) $(UV_CFLAGS) $(SANITIZE_OPTS)"
 
 ifneq ($(VERBOSE), 0)
 UV_MFLAGS += V=1
@@ -26,6 +15,9 @@ endif
 
 LIBUV_BUILDDIR := $(BUILDDIR)/$(LIBUV_SRC_DIR)
 
+ifneq ($(CLDFLAGS)$(SANITIZE_LDFLAGS),)
+$(LIBUV_BUILDDIR)/build-configured: LDFLAGS:=$(LDFLAGS) $(CLDFLAGS) $(SANITIZE_LDFLAGS)
+endif
 $(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracted
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/aclocal.m4 # touch a few files to prevent autogen from getting called
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/Makefile.in
diff --git a/deps/libuv.version b/deps/libuv.version
index 339cba4441875..7339533223083 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,2 +1,2 @@
-LIBUV_BRANCH=julia-uv2-1.39.0
-LIBUV_SHA1=fb3e3364c33ae48c827f6b103e05c3f0e78b79a9
+LIBUV_BRANCH=julia-uv2-1.42.0
+LIBUV_SHA1=3a63bf71de62c64097989254e4f03212e3bf5fc8
diff --git a/deps/libwhich.mk b/deps/libwhich.mk
index 5f4a50c4bf07e..aae5dead6f9f4 100644
--- a/deps/libwhich.mk
+++ b/deps/libwhich.mk
@@ -1,10 +1,10 @@
 ## LIBWHICH ##
-LIBWHICH_GIT_URL := git://github.com/vtjnash/libwhich.git
+LIBWHICH_GIT_URL := https://github.com/vtjnash/libwhich.git
 LIBWHICH_TAR_URL = https://api.github.com/repos/vtjnash/libwhich/tarball/$1
 $(eval $(call git-external,libwhich,LIBWHICH,,,$(BUILDDIR)))
 
 LIBWHICH_OBJ_LIB := $(build_depsbindir)/libwhich
-LIBWHICH_MFLAGS := CC="$(CC)"
+LIBWHICH_MFLAGS := CC="$(HOSTCC)"
 
 $(BUILDDIR)/$(LIBWHICH_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBWHICH_SRC_DIR)/source-extracted
 	$(MAKE) -C $(dir $<) $(LIBWHICH_MFLAGS) libwhich
diff --git a/deps/llvm-options.mk b/deps/llvm-options.mk
index 2ba3809614d47..723a4bb8e5d39 100644
--- a/deps/llvm-options.mk
+++ b/deps/llvm-options.mk
@@ -15,21 +15,11 @@ LLVM_FLAVOR := $(LLVM_BUILDTYPE)
 ifeq ($(LLVM_SANITIZE),1)
 ifeq ($(SANITIZE_MEMORY),1)
 LLVM_BUILDTYPE := $(LLVM_BUILDTYPE)+MSAN
-else
+endif
+ifeq ($(SANITIZE_ADDRESS),1)
 LLVM_BUILDTYPE := $(LLVM_BUILDTYPE)+ASAN
 endif
+ifeq ($(SANITIZE_THREAD),1)
+LLVM_BUILDTYPE := $(LLVM_BUILDTYPE)+TSAN
 endif
-
-
-ifeq ($(LLVM_VER),svn)
-LLVM_BARESRC_DIR:=$(SRCCACHE)/llvm-project-bare
-LLVM_MONOSRC_DIR:=$(SRCCACHE)/llvm-project-$(LLVM_VER)-${LLVM_GIT_VER}
-LLVM_SRC_DIR:=$(LLVM_MONOSRC_DIR)/llvm
-LIBCXX_ROOT_DIR:=$(LLVM_MONOSRC_DIR)
-else
-LLVM_MONOSRC_DIR:=
-LLVM_SRC_DIR:=$(SRCCACHE)/llvm-$(LLVM_VER)
-LIBCXX_ROOT_DIR:=$(LLVM_SRC_DIR)/projects
 endif
-LLVM_BUILD_DIR:=$(BUILDDIR)/llvm-$(LLVM_VER)
-LLVM_BUILDDIR_withtype := $(LLVM_BUILD_DIR)/build_$(LLVM_BUILDTYPE)
diff --git a/deps/llvm.mk b/deps/llvm.mk
index b6f6455f88255..ff30446df9fe6 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -1,47 +1,44 @@
 ## LLVM ##
 include $(SRCDIR)/llvm-ver.make
+include $(SRCDIR)/llvm-options.mk
 
 ifneq ($(USE_BINARYBUILDER_LLVM), 1)
-LLVM_GIT_URL ?= https://github.com/llvm/llvm-project.git
+LLVM_GIT_URL:=https://github.com/JuliaLang/llvm-project.git
+LLVM_TAR_URL=https://api.github.com/repos/JuliaLang/llvm-project/tarball/$1
+$(eval $(call git-external,llvm,LLVM,CMakeLists.txt,,$(SRCCACHE)))
+
+LLVM_BUILDDIR := $(BUILDDIR)/$(LLVM_SRC_DIR)
+LLVM_BUILDDIR_withtype := $(LLVM_BUILDDIR)/build_$(LLVM_BUILDTYPE)
 
 ifeq ($(BUILD_LLDB), 1)
 BUILD_LLVM_CLANG := 1
 # because it's a build requirement
 endif
 
-ifeq ($(USE_RV),1)
+ifeq ($(BUILD_LIBCXX), 1)
 BUILD_LLVM_CLANG := 1
 # because it's a build requirement
 endif
 
-
-ifeq ($(USE_SYSTEM_LLVM),0)
-ifneq ($(LLVM_VER),svn)
-ifeq ($(USE_POLLY),1)
-$(error USE_POLLY=1 requires LLVM_VER=svn)
-endif
-
-ifeq ($(USE_MLIR),1)
-$(error USE_MLIR=1 requires LLVM_VER=svn)
-endif
-
 ifeq ($(USE_RV),1)
-$(error USE_RV=1 requires LLVM_VER=svn)
-endif
-endif
+BUILD_LLVM_CLANG := 1
+# because it's a build requirement
 endif
 
-ifneq ($(USE_RV),)
-LLVM_RV_GIT_URL ?= https://github.com/cdl-saarland/rv
-LLVM_RV_GIT_VER ?= release_90
-endif
+# TODO: Add RV support back in
+# ifneq ($(USE_RV),)
+# LLVM_RV_GIT_URL ?= https://github.com/cdl-saarland/rv
+# LLVM_RV_GIT_VER ?= release_90
+# endif
 
 
 # for Monorepo
 LLVM_ENABLE_PROJECTS :=
 LLVM_EXTERNAL_PROJECTS :=
+LLVM_ENABLE_RUNTIMES :=
 ifeq ($(BUILD_LLVM_CLANG), 1)
-LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);clang;compiler-rt
+LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);clang
+LLVM_ENABLE_RUNTIMES := $(LLVM_ENABLE_RUNTIMES);compiler-rt
 endif
 ifeq ($(USE_POLLY), 1)
 LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);polly
@@ -55,41 +52,12 @@ endif
 ifeq ($(USE_RV), 1)
 LLVM_EXTERNAL_PROJECTS := $(LLVM_EXTERNAL_PROJECTS);rv
 endif
-
-include $(SRCDIR)/llvm-options.mk
-LLVM_LIB_FILE := libLLVMCodeGen.a
-
-ifeq (,$(findstring rc,$(LLVM_VER)))
-LLVM_TAR_EXT:=$(LLVM_VER).src.tar.xz
-else
-LLVM_VER_SPLIT := $(subst -rc, ,$(LLVM_VER))
-LLVM_TAR_EXT:=$(word 1,$(LLVM_VER_SPLIT))rc$(word 2,$(LLVM_VER_SPLIT)).src.tar.xz
+ifeq ($(BUILD_LIBCXX), 1)
+LLVM_ENABLE_RUNTIMES := $(LLVM_ENABLE_RUNTIMES);libcxx;libcxxabi
 endif
 
-ifneq ($(LLVM_VER),svn)
-LLVM_TAR:=$(SRCCACHE)/llvm-$(LLVM_TAR_EXT)
 
-ifeq ($(BUILD_LLDB),1)
-LLVM_LLDB_TAR:=$(SRCCACHE)/lldb-$(LLVM_TAR_EXT)
-endif # BUILD_LLDB
-
-ifeq ($(BUILD_LLVM_CLANG),1)
-ifeq ($(LLVM_VER_MAJ).$(LLVM_VER_MIN),9.0)
-LLVM_CLANG_TAR:=$(SRCCACHE)/cfe-$(LLVM_TAR_EXT)
-else
-LLVM_CLANG_TAR:=$(SRCCACHE)/clang-$(LLVM_TAR_EXT)
-endif
-LLVM_COMPILER_RT_TAR:=$(SRCCACHE)/compiler-rt-$(LLVM_TAR_EXT)
-else
-LLVM_CLANG_TAR:=
-LLVM_COMPILER_RT_TAR:=
-LLVM_LIBCXX_TAR:=
-endif # BUILD_LLVM_CLANG
-
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-LLVM_LIBCXX_TAR:=$(SRCCACHE)/libcxx-$(LLVM_TAR_EXT)
-endif
-endif # LLVM_VER != svn
+LLVM_LIB_FILE := libLLVMCodeGen.a
 
 # Figure out which targets to build
 LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly;BPF
@@ -100,16 +68,14 @@ LLVM_CPPFLAGS :=
 LLVM_LDFLAGS :=
 LLVM_CMAKE :=
 
-# MONOREPO
-ifeq ($(LLVM_VER),svn)
 LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)"
 LLVM_CMAKE += -DLLVM_EXTERNAL_PROJECTS="$(LLVM_EXTERNAL_PROJECTS)"
+LLVM_CMAKE += -DLLVM_ENABLE_RUNTIMES="$(LLVM_ENABLE_RUNTIMES)"
 
 ifeq ($(USE_RV),1)
 LLVM_CMAKE += -DLLVM_EXTERNAL_RV_SOURCE_DIR=$(LLVM_MONOSRC_DIR)/rv
 LLVM_CMAKE += -DLLVM_CXX_STD=c++14
 endif
-endif
 
 # Allow adding LLVM specific flags
 LLVM_CFLAGS += $(CFLAGS)
@@ -117,7 +83,8 @@ LLVM_CXXFLAGS += $(CXXFLAGS)
 LLVM_CPPFLAGS += $(CPPFLAGS)
 LLVM_LDFLAGS += $(LDFLAGS)
 LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING="$(LLVM_TARGETS)" -DCMAKE_BUILD_TYPE="$(LLVM_CMAKE_BUILDTYPE)"
-LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=OFF -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=ON -DZLIB_LIBRARY="$(build_prefix)/lib"
 LLVM_CMAKE += -DCOMPILER_RT_ENABLE_IOS=OFF -DCOMPILER_RT_ENABLE_WATCHOS=OFF -DCOMPILER_RT_ENABLE_TVOS=OFF
 ifeq ($(USE_POLLY_ACC),1)
 LLVM_CMAKE += -DPOLLY_ENABLE_GPGPU_CODEGEN=ON
@@ -177,12 +144,19 @@ LLVM_CFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
 LLVM_LDFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
 LLVM_CXXFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
 LLVM_CMAKE += -DLLVM_USE_SANITIZER="MemoryWithOrigins"
-else
+endif
+ifeq ($(SANITIZE_ADDRESS),1)
 LLVM_CFLAGS += -fsanitize=address
 LLVM_LDFLAGS += -fsanitize=address
 LLVM_CXXFLAGS += -fsanitize=address
 LLVM_CMAKE += -DLLVM_USE_SANITIZER="Address"
 endif
+ifeq ($(SANITIZE_THREAD),1)
+LLVM_CFLAGS += -fsanitize=thread
+LLVM_LDFLAGS += -fsanitize=thread
+LLVM_CXXFLAGS += -fsanitize=thread
+LLVM_CMAKE += -DLLVM_USE_SANITIZER="Thread"
+endif
 endif # LLVM_SANITIZE
 
 ifeq ($(LLVM_LTO),1)
@@ -201,23 +175,6 @@ ifeq ($(fPIC),)
 LLVM_CMAKE += -DLLVM_ENABLE_PIC=OFF
 endif
 
-# disable ABI breaking checks: by default only enabled for asserts build, in which case
-# it is then impossible to call non-asserts LLVM libraries (like out-of-tree backends)
-LLVM_CMAKE += -DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF
-
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-LLVM_LDFLAGS += -Wl,-rpath,$(build_libdir)
-LLVM_CPPFLAGS += -I$(build_includedir)
-# We don't want to link to libc++ while trying to build it, so we define these
-# flags separately so that we can still pass them to the main LLVM build
-LLVM_LIBCXX_LDFLAGS := -lc++ -lc++abi
-ifeq ($(USEICC),1)
-LLVM_LDFLAGS += -no_cpprt
-endif # USEICC
-else
-LLVM_LIBCXX_LDFLAGS :=
-endif # BUILD_CUSTOM_LIBCXX
-
 LLVM_CMAKE += -DCMAKE_C_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CFLAGS)" \
 	-DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS)"
 ifeq ($(OS),Darwin)
@@ -235,37 +192,6 @@ ifeq ($(BUILD_LLDB),0)
 LLVM_CMAKE += -DLLVM_TOOL_LLDB_BUILD=OFF
 endif
 
-ifneq ($(LLVM_VER),svn)
-LLVM_SRC_URL := https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVM_VER)
-
-ifneq ($(LLVM_CLANG_TAR),)
-$(LLVM_CLANG_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-ifneq ($(LLVM_COMPILER_RT_TAR),)
-$(LLVM_COMPILER_RT_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-
-ifneq ($(LLVM_LIBCXX_TAR),)
-$(LLVM_LIBCXX_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-ifneq ($(LLVM_VER),svn)
-$(LLVM_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-
-ifneq ($(LLVM_LLDB_TAR),)
-$(LLVM_LLDB_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-ifeq ($(BUILD_LLDB),1)
-$(LLVM_SRC_DIR)/tools/lldb:
-$(LLVM_SRC_DIR)/source-extracted: $(LLVM_SRC_DIR)/tools/lldb
-endif
-endif
-
 # LLDB still relies on plenty of python 2.x infrastructure, without checking
 llvm_python_location=$(shell /usr/bin/env python2 -c 'import sys; print(sys.executable)')
 llvm_python_workaround=$(SRCCACHE)/python2_path
@@ -276,185 +202,32 @@ $(llvm_python_workaround):
 	ln -sf $(llvm_python_location) "$@/python" && \
 	ln -sf $(llvm_python_location)-config "$@/python-config"
 
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-
-# Take a snapshot of the CMake flags before linking to -lc++ and -lc++abi
-# These are added to the LLVM CMake flags further down
-LLVM_CMAKE_LIBCXX := $(LLVM_CMAKE) \
-	-DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
+LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
 	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS)"
 
-ifeq ($(USEICC),1)
-LIBCXX_EXTRA_FLAGS := -Bstatic -lirc -Bdynamic
-endif
-
-# These libraries require unwind.h from the libunwind dependency
-ifeq ($(USE_SYSTEM_LIBUNWIND),0)
-ifeq ($(OS),Darwin)
-BUILT_UNWIND := $(build_prefix)/manifest/llvmunwind
-else
-BUILT_UNWIND := $(build_prefix)/manifest/unwind
-endif # Darwin
-else
-BUILT_UNWIND :=
-endif # Building libunwind
-
-$(LIBCXX_ROOT_DIR)/libcxx: $(LLVM_LIBCXX_TAR) | $(LLVM_SRC_DIR)/source-extracted
-$(LIBCXX_ROOT_DIR)/libcxxabi: $(LLVM_LIBCXXABI_TAR) | $(LLVM_SRC_DIR)/source-extracted
-$(LLVM_BUILD_DIR)/libcxx-build/Makefile: | $(LIBCXX_ROOT_DIR)/libcxx $(LIBCXX_ROOT_DIR)/libcxxabi $(BUILT_UNWIND)
-	mkdir -p $(dir $@)
-	cd $(dir $@) && \
-		$(CMAKE) -G "Unix Makefiles" $(CMAKE_COMMON) $(LLVM_CMAKE_LIBCXX) -DLIBCXX_CXX_ABI=libcxxabi -DLIBCXX_CXX_ABI_INCLUDE_PATHS="$(LIBCXX_ROOT_DIR)/libcxxabi/include" $(LIBCXX_ROOT_DIR)/libcxx -DCMAKE_SHARED_LINKER_FLAGS="$(LDFLAGS) -L$(build_libdir) $(LIBCXX_EXTRA_FLAGS)"
-$(LLVM_BUILD_DIR)/libcxxabi-build/Makefile: | $(LIBCXX_ROOT_DIR)/libcxxabi $(LIBCXX_ROOT_DIR)/libcxx $(BUILT_UNWIND)
-	mkdir -p $(dir $@)
-	cd $(dir $@) && \
-		$(CMAKE) -G "Unix Makefiles" $(CMAKE_COMMON) $(LLVM_CMAKE_LIBCXX) -DLLVM_ABI_BREAKING_CHECKS="WITH_ASSERTS" -DLLVM_PATH="$(LLVM_SRC_DIR)" $(LIBCXX_ROOT_DIR)/libcxxabi -DLIBCXXABI_CXX_ABI_LIBRARIES="$(LIBCXX_EXTRA_FLAGS)" -DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS) -std=c++11"
-$(LLVM_BUILD_DIR)/libcxxabi-build/lib/libc++abi.so.1.0: $(LLVM_BUILD_DIR)/libcxxabi-build/Makefile $(LIBCXX_ROOT_DIR)/libcxxabi/.git/HEAD
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxxabi-build
-	touch -c $@
-$(build_libdir)/libc++abi.so.1.0: $(LLVM_BUILD_DIR)/libcxxabi-build/lib/libc++abi.so.1.0
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxxabi-build install
-	touch -c $@
-	# Building this library installs these headers, which breaks other dependencies
-	-rm -rf $(build_includedir)/c++
-$(LLVM_BUILD_DIR)/libcxx-build/lib/libc++.so.1.0: $(build_libdir)/libc++abi.so.1.0 $(LLVM_BUILD_DIR)/libcxx-build/Makefile $(LIBCXX_ROOT_DIR)/libcxx/.git/HEAD
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxx-build
-$(build_libdir)/libc++.so.1.0: $(LLVM_BUILD_DIR)/libcxx-build/lib/libc++.so.1.0
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxx-build install
-	touch -c $@
-	# Building this library installs these headers, which breaks other dependencies
-	-rm -rf $(build_includedir)/c++
-get-libcxx: $(LIBCXX_ROOT_DIR)/libcxx
-get-libcxxabi: $(LIBCXX_ROOT_DIR)/libcxxabi
-install-libcxxabi: $(build_libdir)/libc++abi.so.1.0
-install-libcxx: $(build_libdir)/libc++.so.1.0
-endif # BUILD_CUSTOM_LIBCXX
-
-# We want to be able to clean without having to pass BUILD_CUSTOM_LIBCXX=1, so define these
-# outside of the conditional above, can't use `LIBCXX_ROOT_DIR` since that might come from
-# the monorepo.
-clean-libcxx:
-	-$(MAKE) -C $(LLVM_BUILD_DIR)/libcxx-build clean
-clean-libcxxabi:
-	-$(MAKE) -C $(LLVM_BUILD_DIR)/libcxxabi-build clean
-distclean-libcxx:
-	-rm -rf $(LLVM_LIBCXX_TAR) $(LLVM_SRC_DIR)/projects/libcxx $(LLVM_BUILD_DIR)/libcxx-build
-distclean-libcxxabi:
-	-rm -rf $(LLVM_LIBCXXABI_TAR) $(LLVM_SRC_DIR)/projects/libcxxabi $(LLVM_BUILD_DIR)/libcxxabi-build
-
-
-# We want to ensure that the libcxx linking flags don't get passed to the libcxx build, since it will
-# error on a fresh build
-LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS) $(LLVM_LIBCXX_LDFLAGS)" \
-	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS) $(LLVM_LIBCXX_LDFLAGS)"
-
 # change the SONAME of Julia's private LLVM
 # i.e. libLLVM-6.0jl.so
 # see #32462
 LLVM_CMAKE += -DLLVM_VERSION_SUFFIX:STRING="jl"
 
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-LIBCXX_DEPENDENCY := $(build_libdir)/libc++abi.so.1.0 $(build_libdir)/libc++.so.1.0
-get-llvm: get-libcxx get-libcxxabi
-endif
-
-checksum-llvm: $(LLVM_TAR) $(LLVM_CLANG_TAR) $(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR)
-ifneq ($(LLVM_CLANG_TAR),)
-	$(JLCHECKSUM) $(LLVM_CLANG_TAR)
-endif
-ifneq ($(LLVM_COMPILER_RT_TAR),)
-	$(JLCHECKSUM) $(LLVM_COMPILER_RT_TAR)
-endif
-ifneq ($(LLVM_LIBCXX_TAR),)
-	$(JLCHECKSUM) $(LLVM_LIBCXX_TAR)
-endif
-ifneq ($(LLVM_VER),svn)
-	$(JLCHECKSUM) $(LLVM_TAR)
-endif
-ifneq ($(LLVM_LLDB_TAR),)
-	$(JLCHECKSUM) $(LLVM_LLDB_TAR)
-endif
-
-$(LLVM_SRC_DIR)/source-extracted: | $(LLVM_TAR) $(LLVM_CLANG_TAR) $(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR)
-ifneq ($(LLVM_CLANG_TAR),)
-	$(JLCHECKSUM) $(LLVM_CLANG_TAR)
-endif
-ifneq ($(LLVM_COMPILER_RT_TAR),)
-	$(JLCHECKSUM) $(LLVM_COMPILER_RT_TAR)
-endif
-ifneq ($(LLVM_LIBCXX_TAR),)
-	$(JLCHECKSUM) $(LLVM_LIBCXX_TAR)
-endif
-ifneq ($(LLVM_VER),svn)
-	$(JLCHECKSUM) $(LLVM_TAR)
-endif
-ifneq ($(LLVM_LLDB_TAR),)
-	$(JLCHECKSUM) $(LLVM_LLDB_TAR)
-endif
-	-rm -rf $(LLVM_SRC_DIR)
-ifneq ($(LLVM_VER),svn)
-	mkdir -p $(LLVM_SRC_DIR)
-	$(TAR) -C $(LLVM_SRC_DIR) --strip-components 1 -xf $(LLVM_TAR)
-else
-	([ ! -d $(LLVM_BARESRC_DIR) ] && \
-		git clone --bare $(LLVM_GIT_URL) $(LLVM_BARESRC_DIR) ) || \
-		(cd $(LLVM_BARESRC_DIR) && \
-		git fetch)
-	([ ! -d $(LLVM_MONOSRC_DIR) ] && \
-		git clone --dissociate --reference $(LLVM_BARESRC_DIR) $(LLVM_GIT_URL) $(LLVM_MONOSRC_DIR) ) || \
-		(cd $(LLVM_MONOSRC_DIR) && \
-		git pull --ff-only)
-ifneq ($(LLVM_GIT_VER),)
-	(cd $(LLVM_MONOSRC_DIR) && \
-		git checkout $(LLVM_GIT_VER))
-endif # LLVM_GIT_VER
-	# Debug output only. Disable pager and ignore error.
-	(cd $(LLVM_SRC_DIR) && \
-		git show HEAD --stat | cat) || true
-ifeq ($(USE_RV),1)
-	git clone -b $(LLVM_RV_GIT_VER) $(LLVM_RV_GIT_URL) $(LLVM_MONOSRC_DIR)/rv
-	(cd $(LLVM_MONOSRC_DIR)/rv && \
-		git submodule update --init) || true
-endif
-endif # LLVM_VER
-ifneq ($(LLVM_VER),svn)
-ifneq ($(LLVM_CLANG_TAR),)
-	mkdir -p $(LLVM_SRC_DIR)/tools/clang
-	$(TAR) -C $(LLVM_SRC_DIR)/tools/clang --strip-components 1 -xf $(LLVM_CLANG_TAR)
-endif # LLVM_CLANG_TAR
-ifneq ($(LLVM_COMPILER_RT_TAR),)
-	mkdir -p $(LLVM_SRC_DIR)/projects/compiler-rt
-	$(TAR) -C $(LLVM_SRC_DIR)/projects/compiler-rt --strip-components 1 -xf $(LLVM_COMPILER_RT_TAR)
-endif # LLVM_COMPILER_RT_TAR
-ifneq ($(LLVM_LLDB_TAR),)
-	mkdir -p $(LLVM_SRC_DIR)/tools/lldb
-	$(TAR) -C $(LLVM_SRC_DIR)/tools/lldb --strip-components 1 -xf $(LLVM_LLDB_TAR)
-endif # LLVM_LLDB_TAR
-endif # LLVM_VER
-	# touch some extra files to ensure bisect works pretty well
-	touch -c $(LLVM_SRC_DIR).extracted
-	touch -c $(LLVM_SRC_DIR)/configure
-	touch -c $(LLVM_SRC_DIR)/CMakeLists.txt
-	echo 1 > $@
-
 # Apply version-specific LLVM patches sequentially
 LLVM_PATCH_PREV :=
 define LLVM_PATCH
-$$(LLVM_SRC_DIR)/$1.patch-applied: $$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(LLVM_SRC_DIR) && patch -p1 < $$(SRCDIR)/patches/$1.patch
+$$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
-$$(LLVM_BUILDDIR_withtype)/build-compiled: $$(LLVM_SRC_DIR)/$1.patch-applied
-LLVM_PATCH_PREV := $$(LLVM_SRC_DIR)/$1.patch-applied
+$$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
+LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 endef
 
 define LLVM_PROJ_PATCH
-$$(LLVM_SRC_DIR)/$1.patch-applied: $$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(LLVM_SRC_DIR) && patch -p2 < $$(SRCDIR)/patches/$1.patch
+$$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
-$$(LLVM_BUILDDIR_withtype)/build-compiled: $$(LLVM_SRC_DIR)/$1.patch-applied
-LLVM_PATCH_PREV := $$(LLVM_SRC_DIR)/$1.patch-applied
+$$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
+LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 endef
 
 ifeq ($(LLVM_VER_SHORT),11.0)
@@ -481,7 +254,7 @@ ifeq ($(LLVM_VER_PATCH), 0)
 $(eval $(call LLVM_PATCH,llvm-11-D85313-debuginfo-empty-arange)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D90722-rtdyld-absolute-relocs)) # remove for LLVM 12
 endif # LLVM_VER 11.0.0
-$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # upstreamed as D92210
+$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # Still being upstreamed as D92210
 $(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-PR48458-X86ISelDAGToDAG)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits)) # remove for LLVM 12
@@ -499,22 +272,7 @@ $(eval $(call LLVM_PROJ_PATCH,llvm-11-D97571-AArch64-loh)) # remove for LLVM 13
 $(eval $(call LLVM_PROJ_PATCH,llvm-11-aarch64-addrspace)) # remove for LLVM 13
 endif # LLVM_VER 11.0
 
-ifeq ($(LLVM_VER_SHORT),12.0)
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485)) # Needs upstreaming
-$(eval $(call LLVM_PATCH,llvm-12-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-ifeq ($(BUILD_LLVM_CLANG),1)
-$(eval $(call LLVM_PATCH,llvm-D88630-clang-cmake))
-endif
-$(eval $(call LLVM_PATCH,llvm-11-D93154-globalisel-as))
-$(eval $(call LLVM_PATCH,llvm-11-D94813-mergeicmps))
-$(eval $(call LLVM_PROJ_PATCH,llvm-11-AArch64-FastIsel-bug))
-$(eval $(call LLVM_PATCH,llvm-12-D97435-AArch64-movaddrreg))
-$(eval $(call LLVM_PROJ_PATCH,llvm-11-D97571-AArch64-loh)) # remove for LLVM 13
-$(eval $(call LLVM_PROJ_PATCH,llvm-11-aarch64-addrspace)) # remove for LLVM 13
-$(eval $(call LLVM_PROJ_PATCH,llvm-12-fde-symbols-aarch64)) # remove for LLVM 13
-$(eval $(call LLVM_PROJ_PATCH,llvm-12-force-eh_frame-aarch64)) # remove for LLVM 13
-endif # LLVM_VER 12.0
+# NOTE: LLVM 12 and 13 have their patches applied to JuliaLang/llvm-project
 
 # Add a JL prefix to the version map. DO NOT REMOVE
 ifneq ($(LLVM_VER), svn)
@@ -522,13 +280,13 @@ $(eval $(call LLVM_PATCH,llvm7-symver-jlprefix))
 endif
 
 # declare that all patches must be applied before running ./configure
-$(LLVM_BUILDDIR_withtype)/build-configured: | $(LLVM_PATCH_PREV)
+$(LLVM_BUILDDIR_withtype)/build-configured: | $(LLVM_PATCH_PREV) $(build_prefix)/manifest/zlib
 
-$(LLVM_BUILDDIR_withtype)/build-configured: $(LLVM_SRC_DIR)/source-extracted | $(llvm_python_workaround) $(LIBCXX_DEPENDENCY)
+$(LLVM_BUILDDIR_withtype)/build-configured: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted | $(llvm_python_workaround)
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 		export PATH=$(llvm_python_workaround):"$$PATH" && \
-		$(CMAKE) $(LLVM_SRC_DIR) $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
+		$(CMAKE) $(SRCCACHE)/$(LLVM_SRC_DIR)/llvm $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
 		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
 	echo 1 > $@
 
@@ -552,7 +310,7 @@ $(build_prefix)/manifest/llvm: | $(llvm_python_workaround)
 
 LLVM_INSTALL = \
 	cd $1 && mkdir -p $2$$(build_depsbindir) && \
-    cp -r $$(LLVM_SRC_DIR)/utils/lit $2$$(build_depsbindir)/ && \
+    cp -r $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit $2$$(build_depsbindir)/ && \
     $$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P cmake_install.cmake
 ifeq ($(OS), WINNT)
 LLVM_INSTALL += && cp $2$$(build_shlibdir)/libLLVM.dll $2$$(build_depsbindir)
@@ -562,40 +320,22 @@ ifeq ($(OS),Darwin)
 LLVM_INSTALL += && ln -s libLLVM.dylib $2$$(build_shlibdir)/libLLVM-$$(LLVM_VER_SHORT).dylib
 endif
 
-$(eval $(call staged-install,llvm,llvm-$$(LLVM_VER)/build_$$(LLVM_BUILDTYPE), \
+$(eval $(call staged-install, \
+	llvm,$$(LLVM_SRC_DIR)/build_$$(LLVM_BUILDTYPE), \
 	LLVM_INSTALL,,,))
 
-clean-llvm: clean-libcxx clean-libcxxabi
+clean-llvm:
 	-rm $(LLVM_BUILDDIR_withtype)/build-configured $(LLVM_BUILDDIR_withtype)/build-compiled
 	-$(MAKE) -C $(LLVM_BUILDDIR_withtype) clean
 
-distclean-llvm: distclean-libcxx distclean-libcxxabi
-	-rm -rf $(LLVM_TAR) $(LLVM_CLANG_TAR) \
-		$(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR) \
-		$(LLVM_SRC_DIR) $(LLVM_BUILDDIR_withtype)
-
-
-ifneq ($(LLVM_VER),svn)
-get-llvm: $(LLVM_TAR) $(LLVM_CLANG_TAR) $(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR)
-else
-get-llvm: $(LLVM_SRC_DIR)/source-extracted
-endif
-extract-llvm: $(LLVM_SRC_DIR)/source-extracted
+get-llvm: $(LLVM_SRC_FILE)
+extract-llvm: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted
 configure-llvm: $(LLVM_BUILDDIR_withtype)/build-configured
 compile-llvm: $(LLVM_BUILDDIR_withtype)/build-compiled
 fastcheck-llvm: #none
 check-llvm: $(LLVM_BUILDDIR_withtype)/build-checked
 #todo: LLVM make check target is broken on julia.mit.edu (and really slow elsewhere)
 
-
-ifeq ($(LLVM_VER),svn)
-update-llvm:
-	(cd $(LLVM_BARESRC_DIR) && \
-		git fetch)
-	(cd $(LLVM_MONOSRC_DIR) && \
-		git fetch $(LLVM_BARESRC_DIR) +refs/remotes/*:refs/remotes/* && \
-		git pull --ff-only)
-endif
 else # USE_BINARYBUILDER_LLVM
 
 # We provide a way to subversively swap out which LLVM JLL we pull artifacts from
diff --git a/deps/llvm.version b/deps/llvm.version
new file mode 100644
index 0000000000000..45b9c6a057346
--- /dev/null
+++ b/deps/llvm.version
@@ -0,0 +1,2 @@
+LLVM_BRANCH=julia-12.0.1-4
+LLVM_SHA1=julia-12.0.1-4
diff --git a/deps/openblas.mk b/deps/openblas.mk
index a1ce15100ac4c..50873c9220f08 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -1,7 +1,7 @@
 ## OpenBLAS ##
 ifneq ($(USE_BINARYBUILDER_OPENBLAS), 1)
 # LAPACK is built into OpenBLAS by default
-OPENBLAS_GIT_URL := git://github.com/xianyi/OpenBLAS.git
+OPENBLAS_GIT_URL := https://github.com/xianyi/OpenBLAS.git
 OPENBLAS_TAR_URL = https://api.github.com/repos/xianyi/OpenBLAS/tarball/$1
 $(eval $(call git-external,openblas,OPENBLAS,,,$(BUILDDIR)))
 
@@ -12,20 +12,7 @@ ifeq ($(OPENBLAS_USE_THREAD), 1)
 OPENBLAS_BUILD_OPTS += USE_THREAD=1
 OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=50
 # Maximum number of threads for parallelism
-ifneq ($(ARCH),x86_64)
-# Assume we can't address much memory to spawn many threads
-# It is also unlikely that 32-bit architectures have too many cores
-OPENBLAS_BUILD_OPTS += NUM_THREADS=8
-else ifeq ($(OS),WINNT)
-# Windows seems unable to handle very many
-OPENBLAS_BUILD_OPTS += NUM_THREADS=16
-else ifeq ($(OS),Darwin)
-# This should suffice for the largest macs
-OPENBLAS_BUILD_OPTS += NUM_THREADS=16
-else
-# On linux, try to provision for the largest possible machine currently
-OPENBLAS_BUILD_OPTS += NUM_THREADS=16
-endif
+OPENBLAS_BUILD_OPTS += NUM_THREADS=512
 else
 OPENBLAS_BUILD_OPTS += USE_THREAD=0
 endif
@@ -42,8 +29,8 @@ endif
 ifeq ($(USE_BLAS64), 1)
 OPENBLAS_BUILD_OPTS += INTERFACE64=1 SYMBOLSUFFIX="$(OPENBLAS_SYMBOLSUFFIX)" LIBPREFIX="libopenblas$(OPENBLAS_LIBNAMESUFFIX)"
 ifeq ($(OS), Darwin)
-OPENBLAS_BUILD_OPTS += OBJCONV=$(abspath $(BUILDDIR)/objconv/objconv)
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: | $(BUILDDIR)/objconv/build-compiled
+OPENBLAS_BUILD_OPTS += OBJCONV=$(abspath $(build_bindir)/objconv)
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: | $(build_prefix)/manifest/objconv
 endif
 endif
 
@@ -103,22 +90,17 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/
 		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-exshift.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
-	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-exshift.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-filter-out-mavx-flag-on-zgemm-kernels.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-exshift.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-julia42415-lapack625-openblas3392.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
 	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-filter-out-mavx-flag-on-zgemm-kernels.patch
+		patch -p1 -f < $(SRCDIR)/patches/openblas-julia42415-lapack625-openblas3392.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-Only-filter-out-mavx-on-Sandybridge.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-filter-out-mavx-flag-on-zgemm-kernels.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-julia42415-lapack625-openblas3392.patch-applied
 	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-Only-filter-out-mavx-on-Sandybridge.patch
+		patch -p1 -f < $(SRCDIR)/patches/neoverse-generic-kernels.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-Only-filter-out-mavx-on-Sandybridge.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
diff --git a/deps/openblas.version b/deps/openblas.version
index 7814f78841767..346e75dac614b 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -1,2 +1,2 @@
-OPENBLAS_BRANCH=v0.3.13
-OPENBLAS_SHA1=d2b11c47774b9216660e76e2fc67e87079f26fa1
+OPENBLAS_BRANCH=v0.3.17
+OPENBLAS_SHA1=d909f9f3d4fc4ccff36d69f178558df154ba1002
diff --git a/deps/openlibm.mk b/deps/openlibm.mk
index 6e63571b07cc3..544519e12f0d0 100644
--- a/deps/openlibm.mk
+++ b/deps/openlibm.mk
@@ -1,6 +1,6 @@
 ## openlibm ##
 ifneq ($(USE_BINARYBUILDER_OPENLIBM), 1)
-OPENLIBM_GIT_URL := git://github.com/JuliaMath/openlibm.git
+OPENLIBM_GIT_URL := https://github.com/JuliaMath/openlibm.git
 OPENLIBM_TAR_URL = https://api.github.com/repos/JuliaMath/openlibm/tarball/$1
 $(eval $(call git-external,openlibm,OPENLIBM,,,$(BUILDDIR)))
 
diff --git a/deps/patchelf.mk b/deps/patchelf.mk
index 60e50423568ed..e3a8c6fb9bf1a 100644
--- a/deps/patchelf.mk
+++ b/deps/patchelf.mk
@@ -1,18 +1,22 @@
 ## patchelf ##
 
-$(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://nixos.org/releases/patchelf/patchelf-$(PATCHELF_VER)/patchelf-$(PATCHELF_VER).tar.gz
+$(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/NixOS/patchelf/releases/download/$(PATCHELF_VER)/patchelf-$(PATCHELF_VER).tar.bz2
 
-$(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz
+$(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2
 	$(JLCHECKSUM) $<
-	cd $(dir $<) && $(TAR) zxf $<
+	mkdir $(dir $@)
+	cd $(dir $@) && $(TAR) jxf $< --strip-components=1
 	touch -c $(SRCCACHE)/patchelf-$(PATCHELF_VER)/configure # old target
 	echo 1 > $@
 
-checksum-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz
+checksum-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted | $(LIBCXX_DEPENDENCY)
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: CC:=$(HOSTCC)
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: CXX:=$(HOSTCXX)
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: XC_HOST:=$(BUILD_MACHINE)
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)"
@@ -39,12 +43,12 @@ clean-patchelf:
 	-$(MAKE) -C $(BUILDDIR)/patchelf-$(PATCHELF_VER) clean
 
 distclean-patchelf:
-	-rm -rf $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz \
+	-rm -rf $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2 \
 		$(SRCCACHE)/patchelf-$(PATCHELF_VER) \
 		$(BUILDDIR)/patchelf-$(PATCHELF_VER)
 
 
-get-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz
+get-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2
 extract-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted
 configure-patchelf: $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured
 compile-patchelf: $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-compiled
diff --git a/deps/patches/gmp-HG-changeset.patch b/deps/patches/gmp-HG-changeset.patch
new file mode 100644
index 0000000000000..7437fb6f2f748
--- /dev/null
+++ b/deps/patches/gmp-HG-changeset.patch
@@ -0,0 +1,520 @@
+
+# HG changeset patch
+# User Torbjorn Granlund <tg@gmplib.org>
+# Date 1606685500 -3600
+# Node ID 5f32dbc41afc1f8cd77af1614f0caeb24deb7d7b
+# Parent  94c84d919f83ba963ed1809f8e80c7bef32db55c
+Avoid the x18 register since it is reserved on Darwin.
+
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aors_n.asm
+--- a/mpn/arm64/aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -68,7 +68,7 @@
+ EPILOGUE()
+ PROLOGUE(func_n)
+ 	CLRCY
+-L(ent):	lsr	x18, n, #2
++L(ent):	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x7, [up]
+@@ -77,7 +77,7 @@
+ 	str	x13, [rp],#8
+ 	tbnz	n, #1, L(b11)
+ 
+-L(b01):	cbz	x18, L(ret)
++L(b01):	cbz	x17, L(ret)
+ 	ldp	x4, x5, [up,#8]
+ 	ldp	x8, x9, [vp,#8]
+ 	sub	up, up, #8
+@@ -88,7 +88,7 @@
+ 	ldp	x10, x11, [vp,#8]
+ 	add	up, up, #8
+ 	add	vp, vp, #8
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	tbnz	n, #1, L(b10)
+@@ -101,7 +101,7 @@
+ 
+ L(b10):	ldp	x6, x7, [up]
+ 	ldp	x10, x11, [vp]
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#16]
+@@ -114,8 +114,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	stp	x12, x13, [rp],#16
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	ADDSUBC	x12, x6, x10
+ 	ADDSUBC	x13, x7, x11
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsmul_1.asm
+--- a/mpn/arm64/aorsmul_1.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/aorsmul_1.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -32,10 +32,15 @@
+ 
+ include(`../config.m4')
+ 
+-C	     cycles/limb
+-C Cortex-A53	9.3-9.8
+-C Cortex-A57	 7.0
+-C X-Gene	 5.0
++C	       addmul_1        submul_1
++C	     cycles/limb     cycles/limb
++C Cortex-A53	9.3-9.8		9.3-9.8
++C Cortex-A55    9.0-9.5		9.3-9.8
++C Cortex-A57	 7		 7
++C Cortex-A72
++C Cortex-A73	 6		 6
++C X-Gene	 5		 5
++C Apple M1	 1.75		 1.75
+ 
+ C NOTES
+ C  * It is possible to keep the carry chain alive between the addition blocks
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsorrlshC_n.asm
+--- a/mpn/arm64/aorsorrlshC_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/aorsorrlshC_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -65,14 +65,14 @@
+ 
+ ASM_START()
+ PROLOGUE(func_n)
+-	lsr	x18, n, #2
++	lsr	x6, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x5, [up]
+ 	tbnz	n, #1, L(b11)
+ 
+ L(b01):	ldr	x11, [vp]
+-	cbz	x18, L(1)
++	cbz	x6, L(1)
+ 	ldp	x8, x9, [vp,#8]
+ 	lsl	x13, x11, #LSH
+ 	ADDSUB(	x15, x13, x5)
+@@ -94,7 +94,7 @@
+ 	ADDSUB(	x17, x13, x5)
+ 	str	x17, [rp],#8
+ 	sub	up, up, #8
+-	cbz	x18, L(end)
++	cbz	x6, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	tbnz	n, #1, L(b10)
+@@ -107,7 +107,7 @@
+ L(b10):	CLRRCY(	x9)
+ 	ldp	x10, x11, [vp]
+ 	sub	up, up, #16
+-	cbz	x18, L(end)
++	cbz	x6, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#16]
+@@ -124,8 +124,8 @@
+ 	ADDSUBC(x16, x12, x4)
+ 	ADDSUBC(x17, x13, x5)
+ 	stp	x16, x17, [rp],#16
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x6, x6, #1
++	cbnz	x6, L(top)
+ 
+ L(end):	ldp	x4, x5, [up,#16]
+ 	extr	x12, x10, x9, #RSH
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/cnd_aors_n.asm
+--- a/mpn/arm64/cnd_aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/cnd_aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -65,7 +65,7 @@
+ 
+ 	CLRCY
+ 
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x13, [vp]
+@@ -75,7 +75,7 @@
+ 	str	x9, [rp]
+ 	tbnz	n, #1, L(b11)
+ 
+-L(b01):	cbz	x18, L(rt)
++L(b01):	cbz	x17, L(rt)
+ 	ldp	x12, x13, [vp,#8]
+ 	ldp	x10, x11, [up,#8]
+ 	sub	up, up, #8
+@@ -86,7 +86,7 @@
+ L(b11):	ldp	x12, x13, [vp,#8]!
+ 	ldp	x10, x11, [up,#8]!
+ 	sub	rp, rp, #8
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	ldp	x12, x13, [vp]
+@@ -99,7 +99,7 @@
+ 	b	L(mid)
+ 
+ L(b10):	sub	rp, rp, #16
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	bic	x6, x12, cnd
+@@ -116,8 +116,8 @@
+ 	ADDSUBC	x9, x11, x7
+ 	ldp	x10, x11, [up,#32]!
+ 	stp	x8, x9, [rp,#32]!
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	bic	x6, x12, cnd
+ 	bic	x7, x13, cnd
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/logops_n.asm
+--- a/mpn/arm64/logops_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/logops_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -78,7 +78,7 @@
+ 
+ ASM_START()
+ PROLOGUE(func)
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x7, [up]
+@@ -88,7 +88,7 @@
+ 	str	x15, [rp],#8
+ 	tbnz	n, #1, L(b11)
+ 
+-L(b01):	cbz	x18, L(ret)
++L(b01):	cbz	x17, L(ret)
+ 	ldp	x4, x5, [up,#8]
+ 	ldp	x8, x9, [vp,#8]
+ 	sub	up, up, #8
+@@ -99,7 +99,7 @@
+ 	ldp	x10, x11, [vp,#8]
+ 	add	up, up, #8
+ 	add	vp, vp, #8
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	tbnz	n, #1, L(b10)
+@@ -110,7 +110,7 @@
+ 
+ L(b10):	ldp	x6, x7, [up]
+ 	ldp	x10, x11, [vp]
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#16]
+@@ -127,8 +127,8 @@
+ 	POSTOP(	x12)
+ 	POSTOP(	x13)
+ 	stp	x12, x13, [rp],#16
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	LOGOP(	x12, x6, x10)
+ 	LOGOP(	x13, x7, x11)
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshift.asm
+--- a/mpn/arm64/lshift.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/lshift.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -61,7 +61,7 @@
+ 	add	rp, rp_arg, n, lsl #3
+ 	add	up, up, n, lsl #3
+ 	sub	tnc, xzr, cnt
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x4, [up,#-8]
+@@ -69,7 +69,7 @@
+ 
+ L(b01):	NSHIFT	x0, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt1)
++	cbnz	x17, L(gt1)
+ 	str	x2, [rp,#-8]
+ 	ret
+ L(gt1):	ldp	x4, x5, [up,#-24]
+@@ -89,7 +89,7 @@
+ 	PSHIFT	x13, x5, cnt
+ 	NSHIFT	x10, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt2)
++	cbnz	x17, L(gt2)
+ 	orr	x10, x10, x13
+ 	stp	x2, x10, [rp,#-16]
+ 	ret
+@@ -123,11 +123,11 @@
+ 	orr	x11, x12, x2
+ 	stp	x10, x11, [rp,#-32]!
+ 	PSHIFT	x2, x4, cnt
+-L(lo0):	sub	x18, x18, #1
++L(lo0):	sub	x17, x17, #1
+ L(lo3):	NSHIFT	x10, x6, tnc
+ 	PSHIFT	x13, x7, cnt
+ 	NSHIFT	x12, x7, tnc
+-	cbnz	x18, L(top)
++	cbnz	x17, L(top)
+ 
+ L(end):	orr	x10, x10, x13
+ 	orr	x11, x12, x2
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshiftc.asm
+--- a/mpn/arm64/lshiftc.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/lshiftc.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -61,7 +61,7 @@
+ 	add	rp, rp_arg, n, lsl #3
+ 	add	up, up, n, lsl #3
+ 	sub	tnc, xzr, cnt
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x4, [up,#-8]
+@@ -69,7 +69,7 @@
+ 
+ L(b01):	NSHIFT	x0, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt1)
++	cbnz	x17, L(gt1)
+ 	mvn	x2, x2
+ 	str	x2, [rp,#-8]
+ 	ret
+@@ -90,7 +90,7 @@
+ 	PSHIFT	x13, x5, cnt
+ 	NSHIFT	x10, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt2)
++	cbnz	x17, L(gt2)
+ 	eon	x10, x10, x13
+ 	mvn	x2, x2
+ 	stp	x2, x10, [rp,#-16]
+@@ -125,11 +125,11 @@
+ 	eon	x11, x12, x2
+ 	stp	x10, x11, [rp,#-32]!
+ 	PSHIFT	x2, x4, cnt
+-L(lo0):	sub	x18, x18, #1
++L(lo0):	sub	x17, x17, #1
+ L(lo3):	NSHIFT	x10, x6, tnc
+ 	PSHIFT	x13, x7, cnt
+ 	NSHIFT	x12, x7, tnc
+-	cbnz	x18, L(top)
++	cbnz	x17, L(top)
+ 
+ L(end):	eon	x10, x10, x13
+ 	eon	x11, x12, x2
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/mul_1.asm
+--- a/mpn/arm64/mul_1.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/mul_1.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -56,7 +56,7 @@
+ 
+ PROLOGUE(mpn_mul_1)
+ 	adds	x4, xzr, xzr		C clear register and cy flag
+-L(com):	lsr	x18, n, #2
++L(com):	lsr	x17, n, #2
+ 	tbnz	n, #0, L(bx1)
+ 
+ L(bx0):	mov	x11, x4
+@@ -65,7 +65,7 @@
+ L(b10):	ldp	x4, x5, [up]
+ 	mul	x8, x4, v0
+ 	umulh	x10, x4, v0
+-	cbz	x18, L(2)
++	cbz	x17, L(2)
+ 	ldp	x6, x7, [up,#16]!
+ 	mul	x9, x5, v0
+ 	b	L(mid)-8
+@@ -80,7 +80,7 @@
+ 	str	x9, [rp],#8
+ 	tbnz	n, #1, L(b10)
+ 
+-L(b01):	cbz	x18, L(1)
++L(b01):	cbz	x17, L(1)
+ 
+ L(b00):	ldp	x6, x7, [up]
+ 	mul	x8, x6, v0
+@@ -90,8 +90,8 @@
+ 	adcs	x12, x8, x11
+ 	umulh	x11, x7, v0
+ 	add	rp, rp, #16
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x17, x17, #1
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	mul	x8, x4, v0
+@@ -110,8 +110,8 @@
+ 	stp	x12, x13, [rp],#32
+ 	adcs	x12, x8, x11
+ 	umulh	x11, x7, v0
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	mul	x8, x4, v0
+ 	adcs	x13, x9, x10
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rsh1aors_n.asm
+--- a/mpn/arm64/rsh1aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/rsh1aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -59,7 +59,7 @@
+ 
+ ASM_START()
+ PROLOGUE(func_n)
+-	lsr	x18, n, #2
++	lsr	x6, n, #2
+ 
+ 	tbz	n, #0, L(bx0)
+ 
+@@ -69,7 +69,7 @@
+ 
+ L(b01):	ADDSUB	x13, x5, x9
+ 	and	x10, x13, #1
+-	cbz	x18, L(1)
++	cbz	x6, L(1)
+ 	ldp	x4, x5, [up],#48
+ 	ldp	x8, x9, [vp],#48
+ 	ADDSUBC	x14, x4, x8
+@@ -80,8 +80,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	str	x17, [rp], #24
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x6, x6, #1
++	cbz	x6, L(end)
+ 	b	L(top)
+ 
+ L(1):	cset	x14, COND
+@@ -97,7 +97,7 @@
+ 	ldp	x8, x9, [vp],#32
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+-	cbz	x18, L(3)
++	cbz	x6, L(3)
+ 	ldp	x4, x5, [up,#-16]
+ 	ldp	x8, x9, [vp,#-16]
+ 	extr	x17, x12, x15, #1
+@@ -117,7 +117,7 @@
+ 	ADDSUB	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	and	x10, x12, #1
+-	cbz	x18, L(2)
++	cbz	x6, L(2)
+ 	ldp	x4, x5, [up,#-16]
+ 	ldp	x8, x9, [vp,#-16]
+ 	ADDSUBC	x14, x4, x8
+@@ -134,8 +134,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	add	rp, rp, #16
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x6, x6, #1
++	cbz	x6, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#-16]
+@@ -152,8 +152,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	stp	x16, x17, [rp],#32
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x6, x6, #1
++	cbnz	x6, L(top)
+ 
+ L(end):	extr	x16, x15, x14, #1
+ 	extr	x17, x12, x15, #1
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rshift.asm
+--- a/mpn/arm64/rshift.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/rshift.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -60,7 +60,7 @@
+ PROLOGUE(mpn_rshift)
+ 	mov	rp, rp_arg
+ 	sub	tnc, xzr, cnt
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x5, [up]
+@@ -68,7 +68,7 @@
+ 
+ L(b01):	NSHIFT	x0, x5, tnc
+ 	PSHIFT	x2, x5, cnt
+-	cbnz	x18, L(gt1)
++	cbnz	x17, L(gt1)
+ 	str	x2, [rp]
+ 	ret
+ L(gt1):	ldp	x4, x5, [up,#8]
+@@ -89,7 +89,7 @@
+ 	PSHIFT	x13, x4, cnt
+ 	NSHIFT	x10, x5, tnc
+ 	PSHIFT	x2, x5, cnt
+-	cbnz	x18, L(gt2)
++	cbnz	x17, L(gt2)
+ 	orr	x10, x10, x13
+ 	stp	x10, x2, [rp]
+ 	ret
+@@ -121,11 +121,11 @@
+ 	orr	x11, x12, x2
+ 	stp	x11, x10, [rp,#32]!
+ 	PSHIFT	x2, x5, cnt
+-L(lo0):	sub	x18, x18, #1
++L(lo0):	sub	x17, x17, #1
+ L(lo3):	NSHIFT	x10, x7, tnc
+ 	NSHIFT	x12, x6, tnc
+ 	PSHIFT	x13, x6, cnt
+-	cbnz	x18, L(top)
++	cbnz	x17, L(top)
+ 
+ L(end):	orr	x10, x10, x13
+ 	orr	x11, x12, x2
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/sqr_diag_addlsh1.asm
+--- a/mpn/arm64/sqr_diag_addlsh1.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/sqr_diag_addlsh1.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -47,7 +47,7 @@
+ ASM_START()
+ PROLOGUE(mpn_sqr_diag_addlsh1)
+ 	ldr	x15, [up],#8
+-	lsr	x18, n, #1
++	lsr	x14, n, #1
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	adds	x7, xzr, xzr
+@@ -62,8 +62,8 @@
+ 	ldr	x17, [up],#16
+ 	ldp	x6, x7, [tp],#32
+ 	umulh	x11, x15, x15
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x14, x14, #1
++	cbz	x14, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	extr	x9, x6, x5, #63
+@@ -84,8 +84,8 @@
+ 	extr	x8, x5, x4, #63
+ 	stp	x12, x13, [rp],#16
+ 	adcs	x12, x8, x10
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x14, x14, #1
++	cbnz	x14, L(top)
+ 
+ L(end):	extr	x9, x6, x5, #63
+ 	mul	x10, x17, x17
diff --git a/deps/patches/libunwind-dwarf-table.patch b/deps/patches/libunwind-dwarf-table.patch
new file mode 100644
index 0000000000000..5905982f9a349
--- /dev/null
+++ b/deps/patches/libunwind-dwarf-table.patch
@@ -0,0 +1,36 @@
+From a5b5fd28ed03cb1ab524d24dc534c1fa167bf5a1 Mon Sep 17 00:00:00 2001
+From: Alex Arslan <ararslan@comcast.net>
+Date: Fri, 5 Nov 2021 16:58:41 -0700
+Subject: [PATCH] Fix table indexing in `dwarf_search_unwind_table`
+
+`table_len` is used as an index into `table`, assuming it represents the
+number of entries. However, it is defined as the number of entries
+multiplied by `sizeof(unw_word_t)`. This is accounted for in other
+places that use `table_len`, e.g. in `lookup`, which divides out the
+size of `unw_word_t`, but the indexing expression uses `table_len`
+directly. So when `table` has say 2 entries, we're actually looking at
+index 15 rather than 1 in the comparison. This can cause the conditional
+to erroneously evaluate to true, allowing the following line to
+segfault.
+
+This was observed with JIT compiled code from Julia with LLVM on
+FreeBSD.
+
+Co-Authored-By: Jameson Nash <vtjnash@gmail.com>
+---
+ src/dwarf/Gfind_proc_info-lsb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
+index 5e27a501..af4cbce8 100644
+--- a/src/dwarf/Gfind_proc_info-lsb.c
++++ b/src/dwarf/Gfind_proc_info-lsb.c
+@@ -866,7 +866,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+   if (as == unw_local_addr_space)
+     {
+       e = lookup (table, table_len, ip - ip_base);
+-      if (e && &e[1] < &table[table_len])
++      if (e && &e[1] < &table[table_len / sizeof (unw_word_t)])
+ 	last_ip = e[1].start_ip_offset + ip_base;
+       else
+ 	last_ip = di->end_ip;
diff --git a/deps/patches/llvm-12-D75072-SCEV-add-type.patch b/deps/patches/llvm-12-D75072-SCEV-add-type.patch
deleted file mode 100644
index c74cd70b20e65..0000000000000
--- a/deps/patches/llvm-12-D75072-SCEV-add-type.patch
+++ /dev/null
@@ -1,425 +0,0 @@
-From 4827d22b3e297b82c7689f0fb06b38e67d92b578 Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Wed, 21 Apr 2021 12:25:07 -0400
-Subject: [PATCH] [SCEV] Record NI types in add exprs
-
-This fixes a case where loop-reduce introduces ptrtoint/inttoptr for
-non-integral address space pointers. Over the past several years, we
-have gradually improved the SCEVExpander to actually do something
-sensible for non-integral pointer types. However, that obviously
-relies on the expander knowing what the type of the SCEV expression is.
-That is usually the case, but there is one important case where it's
-not: The type of an add expression is just the type of the last operand,
-so if the non-integral pointer is not the last operand, later uses of
-that SCEV may not realize that the given add expression contains
-non-integral pointers and may try to expand it as integers.
-
-One interesting observation is that we do get away with this scheme in
-shockingly many cases. The reason for this is that SCEV expressions
-often have an `scUnknown` pointer base, which our sort order on the
-operands of add expressions sort behind basically everything else,
-so it usually ends up as the last operand.
-
-One situation where this fails is included as a test case. This test
-case was bugpoint-reduced from the issue reported at
-https://github.com/JuliaLang/julia/issues/31156. What happens here
-is that the pointer base is an scAddRec from an outer loop, plus an
-scUnknown integer offset. By our sort order, the scUnknown gets sorted
-after the scAddRec pointer base, thus making an add expression of these
-two operands have integer type. This then confuses the expander, into
-attempting to expand the whole thing as integers, which will obviously
-fail when reaching the non-integral pointer.
-
-I considered a few options to solve this, but here's what I ended up
-settling on: The AddExpr class gains a new subclass that explicitly
-stores the type of the expression. This subclass is used whenever one
-of the operands is a non-integral pointer. To reduce the impact for the
-regular case (where the SCEV expression contains no non-integral
-pointers), a bit flag is kept in each flag expression to indicate
-whether it is of non-integral pointer type (this should give the same
-answer as asking if getType() is non-integral, but performing that
-query may involve a pointer chase and requires the DataLayout). For
-add expressions that flag is also used to indicate whether we're using
-the subclass or not. This is slightly inefficient, because it uses
-the subclass even in the (not uncommon) case where the last operand
-does actually accurately reflect the non-integral pointer type. However,
-it didn't seem worth the extra flag bit and complexity to do this
-micro-optimization.
-
-I had hoped that we could additionally restrict mul exprs from
-containing any non-integral pointers, and also require add exprs to
-only have one operand containg such pointers (but not more), but this
-turned out not to work. The reason for this is that SCEV wants to
-form differences between pointers, which it represents as `A + B*-1`,
-so we need to allow both multiplication by `-1` and addition with
-multiple non-integral pointer arguments. I'm not super happy with
-that situation, but I think it exposes a more general problem with
-non-integral pointers in LLVM. We don't actually have a way to express
-the difference between two non-integral pointers at the IR level.
-In theory this is a problem for SCEV, because it means that we can't
-materialize such SCEV expression. However, in practice, these
-expressions generally have the same base pointer, so SCEV will
-appropriately simplify them to just the integer components.
-Nevertheless it is a bit unsatisfying. Perhaps we could have an
-intrinsic that takes the byte difference between two pointers to the
-same allocated object (in the same sense as is used in getelementptr),
-which should be a sensible operation even for non-integral pointers.
-However, given the practical considerations above, that's a project
-for another time. For now, simply allowing the existing pointer-diff
-pattern for non-integral pointers seems to work ok.
-
-Differential Revision: https://reviews.llvm.org/D75072
----
- llvm/include/llvm/Analysis/ScalarEvolution.h  | 21 ++++-
- .../Analysis/ScalarEvolutionExpressions.h     | 81 ++++++++++++++++---
- llvm/lib/Analysis/ScalarEvolution.cpp         | 41 +++++++---
- .../LoopStrengthReduce/nonintegral.ll         | 35 +++++++-
- 4 files changed, 155 insertions(+), 23 deletions(-)
-
-diff --git llvm/include/llvm/Analysis/ScalarEvolution.h llvm/include/llvm/Analysis/ScalarEvolution.h
-index b3f199de2cfa..d98fbeb5dcf7 100644
---- llvm/include/llvm/Analysis/ScalarEvolution.h
-+++ llvm/include/llvm/Analysis/ScalarEvolution.h
-@@ -120,6 +120,19 @@ public:
-     NoWrapMask = (1 << 3) - 1
-   };
- 
-+  /// HasNonIntegralPointerFlag are bitfield indices into SubclassData.
-+  ///
-+  /// When constructing SCEV expressions for LLVM expressions with non-integral
-+  /// pointer types, some additional processing is required to ensure that we
-+  /// don't introduce any illegal transformations. However, non-integral pointer
-+  /// types are a very rarely used feature, so we want to make sure to only do
-+  /// such processing if they are actually used. To ensure minimal performance
-+  /// impact, we memoize that fact in using these flags.
-+  enum HasNonIntegralPointerFlag {
-+    FlagNoNIPointers = 0,
-+    FlagHasNIPointers = (1 << 3)
-+  };
-+
-   explicit SCEV(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
-                 unsigned short ExpressionSize)
-       : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {}
-@@ -156,6 +169,10 @@ public:
-     return ExpressionSize;
-   }
- 
-+  bool hasNonIntegralPointers() const {
-+    return SubclassData & FlagHasNIPointers;
-+  }
-+
-   /// Print out the internal representation of this scalar to the specified
-   /// stream.  This should really only be used for debugging purposes.
-   void print(raw_ostream &OS) const;
-@@ -745,7 +762,7 @@ public:
-                                         const BasicBlock *ExitingBlock);
- 
-   /// The terms "backedge taken count" and "exit count" are used
--  /// interchangeably to refer to the number of times the backedge of a loop 
-+  /// interchangeably to refer to the number of times the backedge of a loop
-   /// has executed before the loop is exited.
-   enum ExitCountKind {
-     /// An expression exactly describing the number of times the backedge has
-@@ -758,7 +775,7 @@ public:
-   };
- 
-   /// Return the number of times the backedge executes before the given exit
--  /// would be taken; if not exactly computable, return SCEVCouldNotCompute. 
-+  /// would be taken; if not exactly computable, return SCEVCouldNotCompute.
-   /// For a single exit loop, this value is equivelent to the result of
-   /// getBackedgeTakenCount.  The loop is guaranteed to exit (via *some* exit)
-   /// before the backedge is executed (ExitCount + 1) times.  Note that there
-diff --git llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-index 37e675f08afc..6e532b22f5b3 100644
---- llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-+++ llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-@@ -228,6 +228,13 @@ class Type;
-       return getNoWrapFlags(FlagNW) != FlagAnyWrap;
-     }
- 
-+    void setHasNIPtr(bool HasNIPtr) {
-+      if (HasNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+      else
-+        SubclassData &= ~FlagHasNIPointers;
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-@@ -264,19 +271,16 @@ class Type;
- 
-     Type *Ty;
- 
-+  protected:
-     SCEVAddExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-         : SCEVCommutativeExpr(ID, scAddExpr, O, N) {
--      auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) {
--        return Op->getType()->isPointerTy();
--      });
--      if (FirstPointerTypedOp != operands().end())
--        Ty = (*FirstPointerTypedOp)->getType();
--      else
--        Ty = getOperand(0)->getType();
-+
-     }
- 
-   public:
--    Type *getType() const { return Ty; }
-+    // Returns the type of the add expression, by looking either at the last operand
-+    // or deferring to the SCEVAddNIExpr subclass.
-+    Type *getType() const;
- 
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-@@ -284,6 +288,46 @@ class Type;
-     }
-   };
- 
-+  /// This node represents an addition of some number of SCEVs, one which
-+  /// is a non-integral pointer type, requiring us to know the type exactly for
-+  /// correctness.
-+  class SCEVAddNIExpr : public SCEVAddExpr {
-+    friend class ScalarEvolution;
-+    PointerType *NIType;
-+
-+    SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N,
-+                  PointerType *NIType)
-+        : SCEVAddExpr(ID, O, N), NIType(NIType) {
-+      SubclassData |= FlagHasNIPointers;
-+    }
-+
-+  public:
-+    Type *getType() const { return NIType; }
-+
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) {
-+      return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers();
-+    }
-+  };
-+
-+  inline Type *SCEVAddExpr::getType() const {
-+    // In general, use the type of the last operand, which is likely to be a
-+    // pointer type, if there is one. This doesn't usually matter, but it can
-+    // help reduce casts when the expressions are expanded. In the (unusual)
-+    // case that we're working with non-integral pointers, we have a subclass
-+    // that stores that type explicitly.
-+    if (hasNonIntegralPointers())
-+      return cast<SCEVAddNIExpr>(this)->getType();
-+
-+    auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) {
-+      return Op->getType()->isPointerTy();
-+    });
-+    if (FirstPointerTypedOp != operands().end())
-+      return (*FirstPointerTypedOp)->getType();
-+    else
-+      return  getOperand(0)->getType();
-+  }
-+
-   /// This node represents multiplication of some number of SCEVs.
-   class SCEVMulExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
-@@ -293,6 +337,18 @@ class Type;
-       : SCEVCommutativeExpr(ID, scMulExpr, O, N) {}
- 
-   public:
-+    Type *getType() const {
-+      // In general, we can't form SCEVMulExprs with non-integral pointer types,
-+      // but for the moment we need to allow a special case: Multiplying by
-+      // -1 to be able express the difference between two pointers. In order
-+      // to maintain the invariant that SCEVs with the NI flag set should have
-+      // a type corresponding to the contained NI ptr, we need to return the
-+      // type of the pointer here.
-+      if (hasNonIntegralPointers())
-+        return getOperand(getNumOperands() - 1)->getType();
-+      return SCEVCommutativeExpr::getType();
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scMulExpr;
-@@ -531,9 +587,12 @@ class Type;
-     /// instances owned by a ScalarEvolution.
-     SCEVUnknown *Next;
- 
--    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
--                ScalarEvolution *se, SCEVUnknown *next) :
--      SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {}
-+    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se,
-+                SCEVUnknown *next, bool ValueIsNIPtr)
-+        : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {
-+      if (ValueIsNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+    }
- 
-     // Implement CallbackVH.
-     void deleted() override;
-diff --git llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/Analysis/ScalarEvolution.cpp
-index fe9d8297d679..1fa7b8ce1451 100644
---- llvm/lib/Analysis/ScalarEvolution.cpp
-+++ llvm/lib/Analysis/ScalarEvolution.cpp
-@@ -389,12 +389,13 @@ Type *SCEV::getType() const {
-   case scSignExtend:
-     return cast<SCEVCastExpr>(this)->getType();
-   case scAddRecExpr:
--  case scMulExpr:
-   case scUMaxExpr:
-   case scSMaxExpr:
-   case scUMinExpr:
-   case scSMinExpr:
-     return cast<SCEVNAryExpr>(this)->getType();
-+  case scMulExpr:
-+    return cast<SCEVMulExpr>(this)->getType();
-   case scAddExpr:
-     return cast<SCEVAddExpr>(this)->getType();
-   case scUDivExpr:
-@@ -2679,16 +2680,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddExpr);
--  for (const SCEV *Op : Ops)
--    ID.AddPointer(Op);
-+  bool HasNIPtr = false;
-+  PointerType *NIPtrType = nullptr;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    ID.AddPointer(Ops[i]);
-+    if (Ops[i]->hasNonIntegralPointers()) {
-+      HasNIPtr = true;
-+      NIPtrType = cast<PointerType>(Ops[i]->getType());
-+    }
-+  }
-   void *IP = nullptr;
-   SCEVAddExpr *S =
-       static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-   if (!S) {
-     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--    S = new (SCEVAllocator)
--        SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-+    if (HasNIPtr)
-+      S = new (SCEVAllocator)
-+          SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType);
-+    else
-+      S = new (SCEVAllocator)
-+          SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-     UniqueSCEVs.InsertNode(S, IP);
-     addToLoopUseLists(S);
-   }
-@@ -2701,8 +2713,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-                                        const Loop *L, SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddRecExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    assert(i == 0 || !Ops[i]->hasNonIntegralPointers());
-     ID.AddPointer(Ops[i]);
-+  }
-   ID.AddPointer(L);
-   void *IP = nullptr;
-   SCEVAddRecExpr *S =
-@@ -2716,6 +2730,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   setNoWrapFlags(S, Flags);
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
-   return S;
- }
- 
-@@ -2724,8 +2739,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scMulExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  bool HasNIPtr = false;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    HasNIPtr |= Ops[i]->hasNonIntegralPointers();
-     ID.AddPointer(Ops[i]);
-+  }
-   void *IP = nullptr;
-   SCEVMulExpr *S =
-     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-@@ -2738,6 +2756,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   S->setNoWrapFlags(Flags);
-+  S->setHasNIPtr(HasNIPtr);
-   return S;
- }
- 
-@@ -3615,8 +3634,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
-     return ExistingSCEV;
-   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator)
-+  SCEVMinMaxExpr *S = new (SCEVAllocator)
-       SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
-+  // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the
-+  // operands all need to be of the same type.
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
- 
-   UniqueSCEVs.InsertNode(S, IP);
-   addToLoopUseLists(S);
-@@ -3716,8 +3738,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
-            "Stale SCEVUnknown in uniquing map!");
-     return S;
-   }
-+  bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType());
-   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
--                                            FirstUnknown);
-+                                            FirstUnknown, ValueIsNIPtr);
-   FirstUnknown = cast<SCEVUnknown>(S);
-   UniqueSCEVs.InsertNode(S, IP);
-   return S;
-diff --git llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-index 5648e3aa74af..6936521f3a64 100644
---- llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-+++ llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-@@ -2,7 +2,7 @@
- 
- ; Address Space 10 is non-integral. The optimizer is not allowed to use
- ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12"
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
- target triple = "x86_64-unknown-linux-gnu"
- 
- define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) {
-@@ -43,3 +43,36 @@ if38:                                             ; preds = %L119
- done:                                             ; preds = %if38
-   ret void
- }
-+
-+; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself,
-+; but creates the correct SCEV expressions to reproduce the issue. See
-+; https://github.com/JuliaLang/julia/issues/31156 for the original bug report.
-+define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 {
-+; CHECK-NOT: inttoptr
-+; CHECK-NOT: ptrtoint
-+; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}}
-+top:
-+  br label %L42.L46_crit_edge.us
-+
-+L42.L46_crit_edge.us:                             ; preds = %L82.us.us.loopexit, %top
-+  %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ]
-+  %0 = sub i64 %value_phi11.us, %b
-+  %1 = add i64 %0, %c
-+  %spec.select = select i1 %g, i64 %d, i64 0
-+  br label %L62.us.us
-+
-+L82.us.us.loopexit:                               ; preds = %L62.us.us
-+  %2 = add i64 %e, %value_phi11.us
-+  br label %L42.L46_crit_edge.us
-+
-+L62.us.us:                                        ; preds = %L62.us.us, %L42.L46_crit_edge.us
-+  %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ]
-+  %3 = add i64 %1, %value_phi21.us.us
-+  %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3
-+  %5 = load i8, i8 addrspace(13)* %4, align 1
-+  %6 = add i64 %f, %value_phi21.us.us
-+  br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1
-+}
-+
-+!1 = distinct !{!1, !2}
-+!2 = !{!"llvm.loop.isvectorized", i32 1}
--- 
-2.31.1
-
diff --git a/deps/patches/llvm-12-D97435-AArch64-movaddrreg.patch b/deps/patches/llvm-12-D97435-AArch64-movaddrreg.patch
deleted file mode 100644
index 01d49a85f1007..0000000000000
--- a/deps/patches/llvm-12-D97435-AArch64-movaddrreg.patch
+++ /dev/null
@@ -1,164 +0,0 @@
-From 3adadbab531e0d7dc17499a6570b129e87f00c77 Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Wed, 21 Apr 2021 12:38:40 -0400
-Subject: [PATCH] [Aarch64] Correct register class for pseudo instructions
-
-This constrains the Mov* and similar pseudo instruction to take
-GPR64common register classes rather than GPR64. GPR64 includs XZR
-which is invalid here, because this pseudo instructions expands
-into an adrp/add pair sharing a destination register. XZR is invalid
-on add and attempting to encode it will instead increment the stack
-pointer causing crashes (downstream report at [1]). The test case
-there reproduces on LLVM11, but I do not have a test case that
-reaches this code path on main, since it is being masked by
-improved dead code elimination introduced in D91513. Nevertheless,
-this seems like a good thing to fix in case there are other cases
-that dead code elimination doesn't clean up (e.g. if `optnone` is
-used and the optimization is skipped).
-
-I think it would be worth auditing uses of GPR64 in pseudo
-instructions to see if there are any similar issues, but I do not
-have a high enough view of the backend or knowledge of the
-Aarch64 architecture to do this quickly.
-
-[1] https://github.com/JuliaLang/julia/issues/39818
-
-Reviewed By: t.p.northover
-
-Differential Revision: https://reviews.llvm.org/D97435
----
- .../AArch64/AArch64ExpandPseudoInsts.cpp      |  1 +
- llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 32 +++++++++----------
- .../GlobalISel/select-blockaddress.mir        |  5 +--
- .../select-jump-table-brjt-constrain.mir      |  2 +-
- .../GlobalISel/select-jump-table-brjt.mir     |  2 +-
- 5 files changed, 22 insertions(+), 20 deletions(-)
-
-diff --git llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
-index e57650ae60b1..612fbeb5f531 100644
---- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
-+++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
-@@ -886,6 +886,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
-   case AArch64::MOVaddrEXT: {
-     // Expand into ADRP + ADD.
-     Register DstReg = MI.getOperand(0).getReg();
-+    assert(DstReg != AArch64::XZR);
-     MachineInstrBuilder MIB1 =
-         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
-             .add(MI.getOperand(1));
-diff --git llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64InstrInfo.td
-index 171d3dbaa814..6fe0bd1ef168 100644
---- llvm/lib/Target/AArch64/AArch64InstrInfo.td
-+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
-@@ -656,40 +656,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in {
- // removed, along with the AArch64Wrapper node.
- 
- let AddedComplexity = 10 in
--def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
--                     [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
-+def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
-+                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
-               Sched<[WriteLDAdr]>;
- 
- // The MOVaddr instruction should match only when the add is not folded
- // into a load or store address.
- def MOVaddr
--    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
--             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
-+    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
-+             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
-                                             tglobaladdr:$low))]>,
-       Sched<[WriteAdrAdr]>;
- def MOVaddrJT
--    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
--             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
-+    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
-+             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
-                                              tjumptable:$low))]>,
-       Sched<[WriteAdrAdr]>;
- def MOVaddrCP
--    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
--             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
-+    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
-+             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
-                                              tconstpool:$low))]>,
-       Sched<[WriteAdrAdr]>;
- def MOVaddrBA
--    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
--             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
-+    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
-+             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
-                                              tblockaddress:$low))]>,
-       Sched<[WriteAdrAdr]>;
- def MOVaddrTLS
--    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
--             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
-+    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
-+             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
-                                             tglobaltlsaddr:$low))]>,
-       Sched<[WriteAdrAdr]>;
- def MOVaddrEXT
--    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
--             [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
-+    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
-+             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
-                                             texternalsym:$low))]>,
-       Sched<[WriteAdrAdr]>;
- // Normally AArch64addlow either gets folded into a following ldr/str,
-@@ -697,8 +697,8 @@ def MOVaddrEXT
- // might appear without either of them, so allow lowering it into a plain
- // add.
- def ADDlowTLS
--    : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low),
--             [(set GPR64:$dst, (AArch64addlow GPR64:$src,
-+    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
-+             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
-                                             tglobaltlsaddr:$low))]>,
-       Sched<[WriteAdr]>;
- 
-diff --git llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
-index 45012f23de62..70cb802ed3a3 100644
---- llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
-+++ llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
-@@ -30,9 +30,10 @@ registers:
- body:             |
-   ; CHECK-LABEL: name: test_blockaddress
-   ; CHECK: bb.0 (%ir-block.0):
--  ; CHECK:   [[MOVaddrBA:%[0-9]+]]:gpr64 = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block)
-+  ; CHECK:   [[MOVaddrBA:%[0-9]+]]:gpr64common = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block)
-   ; CHECK:   [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr
--  ; CHECK:   STRXui [[MOVaddrBA]], [[MOVaddr]], 0 :: (store 8 into @addr)
-+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY [[MOVaddrBA]]
-+  ; CHECK:   STRXui [[COPY]], [[MOVaddr]], 0 :: (store 8 into @addr)
-   ; CHECK:   BR [[MOVaddrBA]]
-   ; CHECK: bb.1.block (address-taken):
-   ; CHECK:   RET_ReallyLR
-diff --git llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir
-index 440a03173c83..59b8dea2d0ce 100644
---- llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir
-+++ llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir
-@@ -30,7 +30,7 @@ body:             |
-   ; CHECK:   Bcc 8, %bb.3, implicit $nzcv
-   ; CHECK: bb.1:
-   ; CHECK:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
--  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
-+  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
-   ; CHECK:   early-clobber %6:gpr64, early-clobber %7:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0
-   ; CHECK:   BR %6
-   ; CHECK: bb.2:
-diff --git llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
-index 6b84c6d10843..b8c9a6c881da 100644
---- llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
-+++ llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
-@@ -65,7 +65,7 @@ body:             |
-   ; CHECK: bb.1.entry:
-   ; CHECK:   successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab)
-   ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
--  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
-+  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
-   ; CHECK:   early-clobber %18:gpr64, early-clobber %19:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0
-   ; CHECK:   BR %18
-   ; CHECK: bb.2.sw.bb:
--- 
-2.31.1
-
diff --git a/deps/patches/llvm-12-fde-symbols-aarch64.patch b/deps/patches/llvm-12-fde-symbols-aarch64.patch
deleted file mode 100644
index c62e65765ae21..0000000000000
--- a/deps/patches/llvm-12-fde-symbols-aarch64.patch
+++ /dev/null
@@ -1,158 +0,0 @@
-From 7133a3d3b0bd639d36d9d40f1135159442ab73c7 Mon Sep 17 00:00:00 2001
-From: Cody Tapscott <cody+github@tapscott.me>
-Date: Mon, 24 May 2021 15:11:39 -0700
-Subject: [PATCH] Do not patch FDE symbols in RuntimeDyld, on targets that use
- non-absolute symbol relocations in `.eh_frame`
-
-Since processFDE adds a delta to the values in the FDE, it assumes that the relocations for the .eh_frame section have not been applied by RuntimeDyld. It expects instead that only the relocation addend has been written to the symbol locations, and that the section-to-section offset needs to be added.
-
-However, there are platform differences that interfere with this:
-1) X86-64 has DwarfFDESymbolsUseAbsDiff enabled in its AsmInfo, causing an absolute symbol to be emitted for the FDE pcStart.  Absolute symbols are skipped as a relocation by RuntimeDyld, so the processFDE function in RuntimeDyldMachO.cpp calculates the relocation correctly.
-2) AArch64 has DwarfFDESymbolsUseAbsDiff disabled, so a relocation is emitted in the eh_frame section. Since this isn't absolute, the relocation is applied by RuntimeDyld. This means that processFDE ends up adding an additional section-to-section offset to the pcStart field, generating an incorrect FDE
-
-Differential Revision: https://reviews.llvm.org/D103052
----
- .../RuntimeDyld/RuntimeDyldMachO.cpp          | 37 +++++++++++--------
- .../RuntimeDyld/RuntimeDyldMachO.h            |  8 +++-
- .../Targets/RuntimeDyldMachOAArch64.h         |  2 +
- .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h |  2 +
- .../Targets/RuntimeDyldMachOI386.h            |  2 +
- .../Targets/RuntimeDyldMachOX86_64.h          |  2 +
- 6 files changed, 35 insertions(+), 18 deletions(-)
-
-diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
-index 9ca76602ea18..e61bfd1bd31c 100644
---- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
-+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
-@@ -272,9 +272,9 @@ RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
- }
- 
- template <typename Impl>
--unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(uint8_t *P,
--                                                          int64_t DeltaForText,
--                                                          int64_t DeltaForEH) {
-+unsigned char *RuntimeDyldMachOCRTPBase<Impl>::patchFDERelocations(uint8_t *P,
-+                                                                   int64_t DeltaForText,
-+                                                                   int64_t DeltaForEH) {
-   typedef typename Impl::TargetPtrT TargetPtrT;
- 
-   LLVM_DEBUG(dbgs() << "Processing FDE: Delta for text: " << DeltaForText
-@@ -324,19 +324,24 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
-       continue;
-     SectionEntry *Text = &Sections[SectionInfo.TextSID];
-     SectionEntry *EHFrame = &Sections[SectionInfo.EHFrameSID];
--    SectionEntry *ExceptTab = nullptr;
--    if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID)
--      ExceptTab = &Sections[SectionInfo.ExceptTabSID];
--
--    int64_t DeltaForText = computeDelta(Text, EHFrame);
--    int64_t DeltaForEH = 0;
--    if (ExceptTab)
--      DeltaForEH = computeDelta(ExceptTab, EHFrame);
--
--    uint8_t *P = EHFrame->getAddress();
--    uint8_t *End = P + EHFrame->getSize();
--    while (P != End) {
--      P = processFDE(P, DeltaForText, DeltaForEH);
-+
-+    // If the FDE includes absolute symbol relocations (not supported
-+    // by RuntimeDyld), we need to manually patch-up the values
-+    if (doDwarfFDESymbolsUseAbsDiff()) {
-+      SectionEntry *ExceptTab = nullptr;
-+      if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID)
-+        ExceptTab = &Sections[SectionInfo.ExceptTabSID];
-+
-+      int64_t DeltaForText = computeDelta(Text, EHFrame);
-+      int64_t DeltaForEH = 0;
-+      if (ExceptTab)
-+        DeltaForEH = computeDelta(ExceptTab, EHFrame);
-+
-+      uint8_t *P = EHFrame->getAddress();
-+      uint8_t *End = P + EHFrame->getSize();
-+      while (P != End) {
-+        P = patchFDERelocations(P, DeltaForText, DeltaForEH);
-+      }
-     }
- 
-     MemMgr.registerEHFrames(EHFrame->getAddress(), EHFrame->getLoadAddress(),
-diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
-index 650e7b79fbb8..a7e5c9cb56e8 100644
---- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
-+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
-@@ -43,6 +43,10 @@ protected:
-     SID ExceptTabSID;
-   };
- 
-+  // Returns true if the FDE section includes absolute symbol relocations
-+  // on this platform.
-+  virtual bool doDwarfFDESymbolsUseAbsDiff() = 0;
-+
-   // When a module is loaded we save the SectionID of the EH frame section
-   // in a table until we receive a request to register all unregistered
-   // EH frame sections with the memory manager.
-@@ -147,8 +151,8 @@ private:
-   Impl &impl() { return static_cast<Impl &>(*this); }
-   const Impl &impl() const { return static_cast<const Impl &>(*this); }
- 
--  unsigned char *processFDE(uint8_t *P, int64_t DeltaForText,
--                            int64_t DeltaForEH);
-+  unsigned char *patchFDERelocations(uint8_t *P, int64_t DeltaForText,
-+                                     int64_t DeltaForEH);
- 
- public:
-   RuntimeDyldMachOCRTPBase(RuntimeDyld::MemoryManager &MemMgr,
-diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
-index f2ee1b06d494..90a9a4c44c84 100644
---- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
-+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
-@@ -30,6 +30,8 @@ public:
- 
-   unsigned getStubAlignment() override { return 8; }
- 
-+  bool doDwarfFDESymbolsUseAbsDiff() override { return false; }
-+  
-   /// Extract the addend encoded in the instruction / memory location.
-   Expected<int64_t> decodeAddend(const RelocationEntry &RE) const {
-     const SectionEntry &Section = Sections[RE.SectionID];
-diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
-index a76958a9e2c2..7281249d25bf 100644
---- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
-+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
-@@ -33,6 +33,8 @@ public:
- 
-   unsigned getStubAlignment() override { return 4; }
- 
-+  bool doDwarfFDESymbolsUseAbsDiff() override { return false; }
-+
-   Expected<JITSymbolFlags> getJITSymbolFlags(const SymbolRef &SR) override {
-     auto Flags = RuntimeDyldImpl::getJITSymbolFlags(SR);
-     if (!Flags)
-diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
-index 523deb29b723..755bc13afeb4 100644
---- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
-+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
-@@ -30,6 +30,8 @@ public:
- 
-   unsigned getStubAlignment() override { return 1; }
- 
-+  bool doDwarfFDESymbolsUseAbsDiff() override { return true; }
-+
-   Expected<relocation_iterator>
-   processRelocationRef(unsigned SectionID, relocation_iterator RelI,
-                        const ObjectFile &BaseObjT,
-diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
-index 28febbdb948c..9854da24a2ce 100644
---- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
-+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
-@@ -30,6 +30,8 @@ public:
- 
-   unsigned getStubAlignment() override { return 8; }
- 
-+  bool doDwarfFDESymbolsUseAbsDiff() override { return true; }
-+
-   Expected<relocation_iterator>
-   processRelocationRef(unsigned SectionID, relocation_iterator RelI,
-                        const ObjectFile &BaseObjT,
--- 
-2.30.1 (Apple Git-130)
-
diff --git a/deps/patches/llvm-12-force-eh_frame-aarch64.patch b/deps/patches/llvm-12-force-eh_frame-aarch64.patch
deleted file mode 100644
index aff55f35e7bd8..0000000000000
--- a/deps/patches/llvm-12-force-eh_frame-aarch64.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 026f3518c4713e388a8ed06fa032e0925d35c6f5 Mon Sep 17 00:00:00 2001
-From: Cody Tapscott <cody+github@tapscott.me>
-Date: Mon, 24 May 2021 16:36:06 -0700
-Subject: [PATCH] Force `.eh_frame` emission on AArch64
-
-We need to force the emission of the EH Frame section (currently done via SupportsCompactUnwindWithoutEHFrame in the MCObjectFileInfo for the target), since libunwind doesn't yet support dynamically registering compact unwind information at run-time.
----
- llvm/lib/MC/MCObjectFileInfo.cpp | 7 ++++---
- 1 file changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
-index 1a448f040b3b..e12154deca5f 100644
---- a/llvm/lib/MC/MCObjectFileInfo.cpp
-+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
-@@ -57,9 +57,10 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
-           MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT,
-       SectionKind::getReadOnly());
- 
--  if (T.isOSDarwin() &&
--      (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
--    SupportsCompactUnwindWithoutEHFrame = true;
-+  // Disabled for now, since we need to emit EH Frames for stack unwinding in the JIT
-+  // if (T.isOSDarwin() &&
-+  //     (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
-+  //   SupportsCompactUnwindWithoutEHFrame = true;
- 
-   if (T.isWatchABI())
-     OmitDwarfIfHaveCompactUnwind = true;
--- 
-2.30.1 (Apple Git-130)
-
diff --git a/deps/patches/llvm-D75072-SCEV-add-type.patch b/deps/patches/llvm-D75072-SCEV-add-type.patch
deleted file mode 100644
index 9a9e801e970a4..0000000000000
--- a/deps/patches/llvm-D75072-SCEV-add-type.patch
+++ /dev/null
@@ -1,415 +0,0 @@
-commit a55a3ab4dc5c66c153b2988fc4fa46b39bfc92fc
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Mon Feb 24 14:18:22 2020 -0500
-
-    [SCEV] Record NI types in add exprs
-    
-    Summary:
-    This fixes a case where loop-reduce introduces ptrtoint/inttoptr for
-    non-integral address space pointers. Over the past several years, we
-    have gradually improved the SCEVExpander to actually do something
-    sensible for non-integral pointer types. However, that obviously
-    relies on the expander knowing what the type of the SCEV expression is.
-    That is usually the case, but there is one important case where it's
-    not: The type of an add expression is just the type of the last operand,
-    so if the non-integral pointer is not the last operand, later uses of
-    that SCEV may not realize that the given add expression contains
-    non-integral pointers and may try to expand it as integers.
-    
-    One interesting observation is that we do get away with this scheme in
-    shockingly many cases. The reason for this is that SCEV expressions
-    often have an `scUnknown` pointer base, which our sort order on the
-    operands of add expressions sort behind basically everything else,
-    so it usually ends up as the last operand.
-    
-    One situation where this fails is included as a test case. This test
-    case was bugpoint-reduced from the issue reported at
-    https://github.com/JuliaLang/julia/issues/31156. What happens here
-    is that the pointer base is an scAddRec from an outer loop, plus an
-    scUnknown integer offset. By our sort order, the scUnknown gets sorted
-    after the scAddRec pointer base, thus making an add expression of these
-    two operands have integer type. This then confuses the expander, into
-    attempting to expand the whole thing as integers, which will obviously
-    fail when reaching the non-integral pointer.
-    
-    I considered a few options to solve this, but here's what I ended up
-    settling on: The AddExpr class gains a new subclass that explicitly
-    stores the type of the expression. This subclass is used whenever one
-    of the operands is a non-integral pointer. To reduce the impact for the
-    regular case (where the SCEV expression contains no non-integral
-    pointers), a bit flag is kept in each flag expression to indicate
-    whether it is of non-integral pointer type (this should give the same
-    answer as asking if getType() is non-integral, but performing that
-    query may involve a pointer chase and requires the DataLayout). For
-    add expressions that flag is also used to indicate whether we're using
-    the subclass or not. This is slightly inefficient, because it uses
-    the subclass even in the (not uncommon) case where the last operand
-    does actually accurately reflect the non-integral pointer type. However,
-    it didn't seem worth the extra flag bit and complexity to do this
-    micro-optimization.
-    
-    I had hoped that we could additionally restrict mul exprs from
-    containing any non-integral pointers, and also require add exprs to
-    only have one operand containg such pointers (but not more), but this
-    turned out not to work. The reason for this is that SCEV wants to
-    form differences between pointers, which it represents as `A + B*-1`,
-    so we need to allow both multiplication by `-1` and addition with
-    multiple non-integral pointer arguments. I'm not super happy with
-    that situation, but I think it exposes a more general problem with
-    non-integral pointers in LLVM. We don't actually have a way to express
-    the difference between two non-integral pointers at the IR level.
-    In theory this is a problem for SCEV, because it means that we can't
-    materialize such SCEV expression. However, in practice, these
-    expressions generally have the same base pointer, so SCEV will
-    appropriately simplify them to just the integer components.
-    Nevertheless it is a bit unsatisfying. Perhaps we could have an
-    intrinsic that takes the byte difference between two pointers to the
-    same allocated object (in the same sense as is used in getelementptr),
-    which should be a sensible operation even for non-integral pointers.
-    However, given the practical considerations above, that's a project
-    for another time. For now, simply allowing the existing pointer-diff
-    pattern for non-integral pointers seems to work ok.
-    
-    Reviewers: sanjoy, reames, vtjnash, vchuravy
-    
-    Subscribers: hiraditya, javed.absar, llvm-commits
-    
-    Tags: #llvm
-    
-    Differential Revision: https://reviews.llvm.org/D75072
-
-diff --git llvm/include/llvm/Analysis/ScalarEvolution.h llvm/include/llvm/Analysis/ScalarEvolution.h
-index 0bd98ef37e7..317bdeac3f0 100644
---- llvm/include/llvm/Analysis/ScalarEvolution.h
-+++ llvm/include/llvm/Analysis/ScalarEvolution.h
-@@ -118,6 +118,19 @@ public:
-     NoWrapMask = (1 << 3) - 1
-   };
- 
-+  /// HasNonIntegralPointerFlag are bitfield indices into SubclassData.
-+  ///
-+  /// When constructing SCEV expressions for LLVM expressions with non-integral
-+  /// pointer types, some additional processing is required to ensure that we
-+  /// don't introduce any illegal transformations. However, non-integral pointer
-+  /// types are a very rarely used feature, so we want to make sure to only do
-+  /// such processing if they are actually used. To ensure minimal performance
-+  /// impact, we memoize that fact in using these flags.
-+  enum HasNonIntegralPointerFlag {
-+    FlagNoNIPointers = 0,
-+    FlagHasNIPointers = (1 << 3)
-+  };
-+
-   explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy,
-                 unsigned short ExpressionSize)
-       : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {}
-@@ -154,6 +167,10 @@ public:
-     return ExpressionSize;
-   }
- 
-+  bool hasNonIntegralPointers() const {
-+    return SubclassData & FlagHasNIPointers;
-+  }
-+
-   /// Print out the internal representation of this scalar to the specified
-   /// stream.  This should really only be used for debugging purposes.
-   void print(raw_ostream &OS) const;
-@@ -747,7 +764,7 @@ public:
-                                         BasicBlock *ExitingBlock);
- 
-   /// Return the number of times the backedge executes before the given exit
--  /// would be taken; if not exactly computable, return SCEVCouldNotCompute. 
-+  /// would be taken; if not exactly computable, return SCEVCouldNotCompute.
-   /// For a single exit loop, this value is equivelent to the result of
-   /// getBackedgeTakenCount.  The loop is guaranteed to exit (via *some* exit)
-   /// before the backedge is executed (ExitCount + 1) times.  Note that there
-diff --git llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-index d008af7b7e6..39ab35a8b8c 100644
---- llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-+++ llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-@@ -188,6 +188,13 @@ class Type;
-       return getNoWrapFlags(FlagNW) != FlagAnyWrap;
-     }
- 
-+    void setHasNIPtr(bool HasNIPtr) {
-+      if (HasNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+      else
-+        SubclassData &= ~FlagHasNIPointers;
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-@@ -222,24 +229,54 @@ class Type;
-   class SCEVAddExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
- 
-+  protected:
-     SCEVAddExpr(const FoldingSetNodeIDRef ID,
-                 const SCEV *const *O, size_t N)
-       : SCEVCommutativeExpr(ID, scAddExpr, O, N) {}
- 
-   public:
--    Type *getType() const {
--      // Use the type of the last operand, which is likely to be a pointer
--      // type, if there is one. This doesn't usually matter, but it can help
--      // reduce casts when the expressions are expanded.
--      return getOperand(getNumOperands() - 1)->getType();
-+    /// Returns the type of the add expression, by looking either at the last
-+    /// operand or deferring to the SCEVAddNIExpr subclass for non-integral
-+    /// pointers.
-+    Type *getType() const;
-+
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr; }
-+  };
-+
-+  /// This node represents an addition of some number of SCEVs, one which
-+  /// is a non-integral pointer type, requiring us to know the type exactly for
-+  /// correctness.
-+  class SCEVAddNIExpr : public SCEVAddExpr {
-+    friend class ScalarEvolution;
-+    PointerType *NIType;
-+
-+    SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N,
-+                  PointerType *NIType)
-+        : SCEVAddExpr(ID, O, N), NIType(NIType) {
-+      SubclassData |= FlagHasNIPointers;
-     }
- 
-+  public:
-+    Type *getType() const { return NIType; }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr;
-+      return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers();
-     }
-   };
- 
-+  inline Type *SCEVAddExpr::getType() const {
-+    // In general, use the type of the last operand, which is likely to be a
-+    // pointer type, if there is one. This doesn't usually matter, but it can
-+    // help reduce casts when the expressions are expanded. In the (unusual)
-+    // case that we're working with non-integral pointers, we have a subclass
-+    // that stores that type explicitly.
-+    if (hasNonIntegralPointers())
-+      return cast<SCEVAddNIExpr>(this)->getType();
-+    return getOperand(getNumOperands() - 1)->getType();
-+  }
-+
-   /// This node represents multiplication of some number of SCEVs.
-   class SCEVMulExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
-@@ -249,6 +286,18 @@ class Type;
-       : SCEVCommutativeExpr(ID, scMulExpr, O, N) {}
- 
-   public:
-+    Type *getType() const {
-+      // In general, we can't form SCEVMulExprs with non-integral pointer types,
-+      // but for the moment we need to allow a special case: Multiplying by
-+      // -1 to be able express the difference between two pointers. In order
-+      // to maintain the invariant that SCEVs with the NI flag set should have
-+      // a type corresponding to the contained NI ptr, we need to return the
-+      // type of the pointer here.
-+      if (hasNonIntegralPointers())
-+        return getOperand(getNumOperands() - 1)->getType();
-+      return SCEVCommutativeExpr::getType();
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scMulExpr;
-@@ -475,9 +524,12 @@ class Type;
-     /// instances owned by a ScalarEvolution.
-     SCEVUnknown *Next;
- 
--    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
--                ScalarEvolution *se, SCEVUnknown *next) :
--      SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {}
-+    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se,
-+                SCEVUnknown *next, bool ValueIsNIPtr)
-+        : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {
-+      if (ValueIsNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+    }
- 
-     // Implement CallbackVH.
-     void deleted() override;
-diff --git llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/Analysis/ScalarEvolution.cpp
-index bc2cfd6fcc4..2f8eb665c5d 100644
---- llvm/lib/Analysis/ScalarEvolution.cpp
-+++ llvm/lib/Analysis/ScalarEvolution.cpp
-@@ -358,12 +358,13 @@ Type *SCEV::getType() const {
-   case scSignExtend:
-     return cast<SCEVCastExpr>(this)->getType();
-   case scAddRecExpr:
--  case scMulExpr:
-   case scUMaxExpr:
-   case scSMaxExpr:
-   case scUMinExpr:
-   case scSMinExpr:
-     return cast<SCEVNAryExpr>(this)->getType();
-+  case scMulExpr:
-+    return cast<SCEVMulExpr>(this)->getType();
-   case scAddExpr:
-     return cast<SCEVAddExpr>(this)->getType();
-   case scUDivExpr:
-@@ -2441,8 +2442,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-   }
- 
-   // Limit recursion calls depth.
--  if (Depth > MaxArithDepth || hasHugeExpression(Ops))
-+  if (Depth > MaxArithDepth || hasHugeExpression(Ops)) {
-     return getOrCreateAddExpr(Ops, Flags);
-+  }
- 
-   // Okay, check to see if the same value occurs in the operand list more than
-   // once.  If so, merge them together into an multiply expression.  Since we
-@@ -2783,16 +2785,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddExpr);
--  for (const SCEV *Op : Ops)
--    ID.AddPointer(Op);
-+  bool HasNIPtr = false;
-+  PointerType *NIPtrType = nullptr;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    ID.AddPointer(Ops[i]);
-+    if (Ops[i]->hasNonIntegralPointers()) {
-+      HasNIPtr = true;
-+      NIPtrType = cast<PointerType>(Ops[i]->getType());
-+    }
-+  }
-   void *IP = nullptr;
-   SCEVAddExpr *S =
-       static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-   if (!S) {
-     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--    S = new (SCEVAllocator)
--        SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-+    if (HasNIPtr)
-+      S = new (SCEVAllocator)
-+          SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType);
-+    else
-+      S = new (SCEVAllocator)
-+          SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-     UniqueSCEVs.InsertNode(S, IP);
-     addToLoopUseLists(S);
-   }
-@@ -2805,8 +2818,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-                                        const Loop *L, SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddRecExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    assert(i == 0 || !Ops[i]->hasNonIntegralPointers());
-     ID.AddPointer(Ops[i]);
-+  }
-   ID.AddPointer(L);
-   void *IP = nullptr;
-   SCEVAddRecExpr *S =
-@@ -2820,6 +2835,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   S->setNoWrapFlags(Flags);
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
-   return S;
- }
- 
-@@ -2828,8 +2844,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scMulExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  bool HasNIPtr = false;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    HasNIPtr |= Ops[i]->hasNonIntegralPointers();
-     ID.AddPointer(Ops[i]);
-+  }
-   void *IP = nullptr;
-   SCEVMulExpr *S =
-     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-@@ -2842,6 +2861,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   S->setNoWrapFlags(Flags);
-+  S->setHasNIPtr(HasNIPtr);
-   return S;
- }
- 
-@@ -3666,8 +3686,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
-     return ExistingSCEV;
-   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
-+  SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr(
-       ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
-+  // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the
-+  // operands all need to be of the same type.
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
- 
-   UniqueSCEVs.InsertNode(S, IP);
-   addToLoopUseLists(S);
-@@ -3744,8 +3767,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
-            "Stale SCEVUnknown in uniquing map!");
-     return S;
-   }
-+  bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType());
-   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
--                                            FirstUnknown);
-+                                            FirstUnknown, ValueIsNIPtr);
-   FirstUnknown = cast<SCEVUnknown>(S);
-   UniqueSCEVs.InsertNode(S, IP);
-   return S;
-diff --git llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-index 5648e3aa74a..6936521f3a6 100644
---- llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-+++ llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-@@ -2,7 +2,7 @@
- 
- ; Address Space 10 is non-integral. The optimizer is not allowed to use
- ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12"
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
- target triple = "x86_64-unknown-linux-gnu"
- 
- define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) {
-@@ -43,3 +43,36 @@ if38:                                             ; preds = %L119
- done:                                             ; preds = %if38
-   ret void
- }
-+
-+; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself,
-+; but creates the correct SCEV expressions to reproduce the issue. See
-+; https://github.com/JuliaLang/julia/issues/31156 for the original bug report.
-+define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 {
-+; CHECK-NOT: inttoptr
-+; CHECK-NOT: ptrtoint
-+; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}}
-+top:
-+  br label %L42.L46_crit_edge.us
-+
-+L42.L46_crit_edge.us:                             ; preds = %L82.us.us.loopexit, %top
-+  %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ]
-+  %0 = sub i64 %value_phi11.us, %b
-+  %1 = add i64 %0, %c
-+  %spec.select = select i1 %g, i64 %d, i64 0
-+  br label %L62.us.us
-+
-+L82.us.us.loopexit:                               ; preds = %L62.us.us
-+  %2 = add i64 %e, %value_phi11.us
-+  br label %L42.L46_crit_edge.us
-+
-+L62.us.us:                                        ; preds = %L62.us.us, %L42.L46_crit_edge.us
-+  %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ]
-+  %3 = add i64 %1, %value_phi21.us.us
-+  %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3
-+  %5 = load i8, i8 addrspace(13)* %4, align 1
-+  %6 = add i64 %f, %value_phi21.us.us
-+  br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1
-+}
-+
-+!1 = distinct !{!1, !2}
-+!2 = !{!"llvm.loop.isvectorized", i32 1}
diff --git a/deps/patches/llvm-libunwind-force-dwarf.patch b/deps/patches/llvm-libunwind-force-dwarf.patch
deleted file mode 100644
index 697782afe5e07..0000000000000
--- a/deps/patches/llvm-libunwind-force-dwarf.patch
+++ /dev/null
@@ -1,181 +0,0 @@
-An updated version of this libosxunwind commit:
-
-Author: Keno Fischer <kfischer@college.harvard.edu>
-Date:   Tue Aug 27 15:01:22 2013 -0400
-
-    Add option to step with DWARF
-
----
-diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
-index 23ef47f4ac83..ea6c5cb86438 100644
---- a/libunwind/include/libunwind.h
-+++ b/libunwind/include/libunwind.h
-@@ -102,6 +102,7 @@ extern "C" {
- 
- extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL;
- extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
-+extern int unw_init_local_dwarf(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
- extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL;
- extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL;
- extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL;
-diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
-index f346c720d22c..e44f22a91513 100644
---- a/libunwind/src/UnwindCursor.hpp
-+++ b/libunwind/src/UnwindCursor.hpp
-@@ -436,6 +436,9 @@ public:
-   virtual bool isSignalFrame() {
-     _LIBUNWIND_ABORT("isSignalFrame not implemented");
-   }
-+  virtual void setForceDWARF(bool) {
-+    _LIBUNWIND_ABORT("setForceDWARF not implemented");
-+  }
-   virtual bool getFunctionName(char *, size_t, unw_word_t *) {
-     _LIBUNWIND_ABORT("getFunctionName not implemented");
-   }
-@@ -891,6 +894,7 @@ public:
-   virtual void        getInfo(unw_proc_info_t *);
-   virtual void        jumpto();
-   virtual bool        isSignalFrame();
-+  virtual void        setForceDWARF(bool force);
-   virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
-   virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
-   virtual const char *getRegisterName(int num);
-@@ -938,7 +942,7 @@ private:
-                                             const UnwindInfoSections &sects);
-   int stepWithCompactEncoding() {
-   #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
--    if ( compactSaysUseDwarf() )
-+    if ( _forceDwarf || compactSaysUseDwarf() )
-       return stepWithDwarfFDE();
-   #endif
-     R dummy;
-@@ -1173,13 +1177,14 @@ private:
-   unw_proc_info_t  _info;
-   bool             _unwindInfoMissing;
-   bool             _isSignalFrame;
-+  bool             _forceDwarf;
- };
- 
- 
- template <typename A, typename R>
- UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
-     : _addressSpace(as), _registers(context), _unwindInfoMissing(false),
--      _isSignalFrame(false) {
-+      _isSignalFrame(false), _forceDwarf(false) {
-   static_assert((check_fit<UnwindCursor<A, R>, unw_cursor_t>::does_fit),
-                 "UnwindCursor<> does not fit in unw_cursor_t");
-   memset(&_info, 0, sizeof(_info));
-@@ -1187,7 +1192,8 @@ UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
- 
- template <typename A, typename R>
- UnwindCursor<A, R>::UnwindCursor(A &as, void *)
--    : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) {
-+    : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false),
-+    _forceDwarf(false) {
-   memset(&_info, 0, sizeof(_info));
-   // FIXME
-   // fill in _registers from thread arg
-@@ -1243,6 +1249,10 @@ template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() {
-   return _isSignalFrame;
- }
- 
-+template <typename A, typename R> void UnwindCursor<A, R>::setForceDWARF(bool force) {
-+  _forceDwarf = force;
-+}
-+
- #endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
- 
- #if defined(_LIBUNWIND_ARM_EHABI)
-@@ -1895,7 +1905,13 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
-         // record that we have no unwind info.
-         if (_info.format == 0)
-           _unwindInfoMissing = true;
-+  #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-+        if (!(_forceDwarf || compactSaysUseDwarf(&dwarfOffset)))
-+          return;
-+  #else
-         return;
-+  #endif
-+
-       }
-     }
- #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
-index fd079da30895..206afcbbaf78 100644
---- a/libunwind/src/libunwind.cpp
-+++ b/libunwind/src/libunwind.cpp
-@@ -69,6 +69,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
-   new (reinterpret_cast<UnwindCursor<LocalAddressSpace, REGISTER_KIND> *>(cursor))
-       UnwindCursor<LocalAddressSpace, REGISTER_KIND>(
-           context, LocalAddressSpace::sThisAddressSpace);
-+  static_assert(sizeof(unw_cursor_t) >= sizeof(UnwindCursor<LocalAddressSpace,REGISTER_KIND>), "libunwind header outdated");
- #undef REGISTER_KIND
-   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
-   co->setInfoBasedOnIPRegister();
-@@ -77,6 +78,54 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
- }
- _LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local)
- 
-+_LIBUNWIND_HIDDEN int __unw_init_local_dwarf(unw_cursor_t *cursor,
-+                                       unw_context_t *context) {
-+  _LIBUNWIND_TRACE_API("__unw_init_local_dwarf(cursor=%p, context=%p)",
-+                       static_cast<void *>(cursor),
-+                       static_cast<void *>(context));
-+#if defined(__i386__)
-+# define REGISTER_KIND Registers_x86
-+#elif defined(__x86_64__)
-+# define REGISTER_KIND Registers_x86_64
-+#elif defined(__powerpc64__)
-+# define REGISTER_KIND Registers_ppc64
-+#elif defined(__ppc__)
-+# define REGISTER_KIND Registers_ppc
-+#elif defined(__aarch64__)
-+# define REGISTER_KIND Registers_arm64
-+#elif defined(__arm__)
-+# define REGISTER_KIND Registers_arm
-+#elif defined(__or1k__)
-+# define REGISTER_KIND Registers_or1k
-+#elif defined(__hexagon__)
-+# define REGISTER_KIND Registers_hexagon
-+#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32
-+# define REGISTER_KIND Registers_mips_o32
-+#elif defined(__mips64)
-+# define REGISTER_KIND Registers_mips_newabi
-+#elif defined(__mips__)
-+# warning The MIPS architecture is not supported with this ABI and environment!
-+#elif defined(__sparc__)
-+# define REGISTER_KIND Registers_sparc
-+#elif defined(__riscv) && __riscv_xlen == 64
-+# define REGISTER_KIND Registers_riscv
-+#else
-+# error Architecture not supported
-+#endif
-+  // Use "placement new" to allocate UnwindCursor in the cursor buffer.
-+  new (reinterpret_cast<UnwindCursor<LocalAddressSpace, REGISTER_KIND> *>(cursor))
-+      UnwindCursor<LocalAddressSpace, REGISTER_KIND>(
-+          context, LocalAddressSpace::sThisAddressSpace);
-+  static_assert(sizeof(unw_cursor_t) >= sizeof(UnwindCursor<LocalAddressSpace,REGISTER_KIND>), "libunwind header outdated");
-+#undef REGISTER_KIND
-+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
-+  co->setForceDWARF(true);
-+  co->setInfoBasedOnIPRegister();
-+
-+  return UNW_ESUCCESS;
-+}
-+_LIBUNWIND_WEAK_ALIAS(__unw_init_local_dwarf, unw_init_local_dwarf)
-+
- /// Get value of specified register at cursor position in stack frame.
- _LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
-                                     unw_word_t *value) {
-diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
-index 316dee298246..5b9f7e2f56cd 100644
---- a/libunwind/src/libunwind_ext.h
-+++ b/libunwind/src/libunwind_ext.h
-@@ -25,6 +25,7 @@ extern "C" {
- 
- extern int __unw_getcontext(unw_context_t *);
- extern int __unw_init_local(unw_cursor_t *, unw_context_t *);
-+extern int __unw_init_local_dwarf(unw_cursor_t *, unw_context_t *);
- extern int __unw_step(unw_cursor_t *);
- extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *);
- extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *);
diff --git a/deps/patches/llvm-libunwind-prologue-epilogue.patch b/deps/patches/llvm-libunwind-prologue-epilogue.patch
deleted file mode 100644
index 7dadca728f9cf..0000000000000
--- a/deps/patches/llvm-libunwind-prologue-epilogue.patch
+++ /dev/null
@@ -1,183 +0,0 @@
-An updated version of this libosxunwind commit:
-
-commit ca57a5b60de4cd1daa42ed2e5d1d4aa3e96a09d1
-Author: Keno Fischer <kfischer@college.harvard.edu>
-Date:   Mon Aug 26 15:28:08 2013 -0400
-
-    Add support for unwinding during prologue/epilogue
-
----
-diff --git a/libunwind/src/CompactUnwinder.hpp b/libunwind/src/CompactUnwinder.hpp
-index 1c3175dff50a..78a658ccbc27 100644
---- a/libunwind/src/CompactUnwinder.hpp
-+++ b/libunwind/src/CompactUnwinder.hpp
-@@ -310,6 +310,50 @@ int CompactUnwinder_x86_64<A>::stepWithCompactEncodingRBPFrame(
-   uint32_t savedRegistersLocations =
-       EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
- 
-+  // If we have not stored EBP yet
-+  if (functionStart == registers.getIP()) {
-+    uint64_t rsp = registers.getSP();
-+    // old esp is ebp less return address
-+    registers.setSP(rsp+8);
-+    // pop return address into eip
-+    registers.setIP(addressSpace.get64(rsp));
-+
-+    return UNW_STEP_SUCCESS;
-+  } else if (functionStart + 1 == registers.getIP()) {
-+    uint64_t rsp = registers.getSP();
-+    // old esp is ebp less return address
-+    registers.setSP(rsp + 16);
-+    // pop return address into eip
-+    registers.setIP(addressSpace.get64(rsp + 8));
-+
-+    return UNW_STEP_SUCCESS;
-+  }
-+
-+  // If we're about to return, we've already popped the base pointer
-+  uint8_t b = addressSpace.get8(registers.getIP());
-+
-+  // This is a hack to detect VZEROUPPER but in between popq rbp and ret
-+  // It's not pretty but it works
-+  if (b == 0xC5) {
-+    if ((b = addressSpace.get8(registers.getIP() + 1)) == 0xF8 &&
-+        (b = addressSpace.get8(registers.getIP() + 2)) == 0x77)
-+      b = addressSpace.get8(registers.getIP() + 3);
-+    else
-+      goto skip_ret;
-+  }
-+
-+  if (b == 0xC3 || b == 0xCB || b == 0xC2 || b == 0xCA) {
-+    uint64_t rbp = registers.getSP();
-+    // old esp is ebp less return address
-+    registers.setSP(rbp + 16);
-+    // pop return address into eip
-+    registers.setIP(addressSpace.get64(rbp + 8));
-+
-+    return UNW_STEP_SUCCESS;
-+  }
-+
-+  skip_ret:
-+
-   uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset;
-   for (int i = 0; i < 5; ++i) {
-     switch (savedRegistersLocations & 0x7) {
-@@ -430,6 +474,118 @@ int CompactUnwinder_x86_64<A>::stepWithCompactEncodingFrameless(
-       }
-     }
-   }
-+
-+  // Note that the order of these registers is so that
-+  // registersSaved[0] is the one that will be pushed onto the stack last.
-+  // Thus, if we want to walk this from the top, we need to go in reverse.
-+  assert(regCount <= 6);
-+
-+  // check whether we are still in the prologue
-+  uint64_t curAddr = functionStart;
-+  if (regCount > 0) {
-+    for (int8_t i = (int8_t)(regCount) - 1; i >= 0; --i) {
-+      if (registers.getIP() == curAddr) {
-+        // None of the registers have been modified yet, so we don't need to reload them
-+        framelessUnwind(addressSpace, registers.getSP() + 8 * (regCount - (uint64_t)(i + 1)), registers);
-+        return UNW_STEP_SUCCESS;
-+      } else {
-+        assert(curAddr < registers.getIP());
-+      }
-+
-+
-+      // pushq %rbp and pushq %rbx is 1 byte. Everything else 2
-+      if ((UNWIND_X86_64_REG_RBP == registersSaved[i]) ||
-+          (UNWIND_X86_64_REG_RBX == registersSaved[i]))
-+        curAddr += 1;
-+      else
-+        curAddr += 2;
-+    }
-+  }
-+  if (registers.getIP() == curAddr) {
-+    // None of the registers have been modified yet, so we don't need to reload them
-+    framelessUnwind(addressSpace, registers.getSP() + 8*regCount, registers);
-+    return UNW_STEP_SUCCESS;
-+  } else {
-+    assert(curAddr < registers.getIP());
-+  }
-+
-+
-+  // And now for the epilogue
-+  {
-+    uint8_t  i  = 0;
-+    uint64_t p  = registers.getIP();
-+    uint8_t  b  = 0;
-+
-+    while (true) {
-+      b = addressSpace.get8(p++);
-+      // This is a hack to detect VZEROUPPER but in between the popq's and ret
-+      // It's not pretty but it works
-+      if (b == 0xC5) {
-+        if ((b = addressSpace.get8(p++)) == 0xF8 && (b = addressSpace.get8(p++)) == 0x77)
-+          b = addressSpace.get8(p++);
-+        else
-+          break;
-+      }
-+      //  popq %rbx    popq %rbp
-+      if (b == 0x5B || b == 0x5D) {
-+        i++;
-+      } else if (b == 0x41) {
-+        b = addressSpace.get8(p++);
-+        if (b == 0x5C || b == 0x5D || b == 0x5E || b == 0x5F)
-+          i++;
-+        else
-+          break;
-+      } else if (b == 0xC3 || b == 0xCB || b == 0xC2 || b == 0xCA) {
-+        // i pop's haven't happened yet
-+        uint64_t savedRegisters = registers.getSP() + 8 * i;
-+        if (regCount > 0) {
-+          for (int8_t j = (int8_t)(regCount) - 1; j >= (int8_t)(regCount) - i; --j) {
-+            uint64_t addr = savedRegisters - 8 * (regCount - (uint64_t)(j));
-+            switch (registersSaved[j]) {
-+              case UNWIND_X86_64_REG_RBX:
-+                registers.setRBX(addressSpace.get64(addr));
-+                break;
-+              case UNWIND_X86_64_REG_R12:
-+                registers.setR12(addressSpace.get64(addr));
-+                break;
-+              case UNWIND_X86_64_REG_R13:
-+                registers.setR13(addressSpace.get64(addr));
-+                break;
-+              case UNWIND_X86_64_REG_R14:
-+                registers.setR14(addressSpace.get64(addr));
-+                break;
-+              case UNWIND_X86_64_REG_R15:
-+                registers.setR15(addressSpace.get64(addr));
-+                break;
-+              case UNWIND_X86_64_REG_RBP:
-+                registers.setRBP(addressSpace.get64(addr));
-+                break;
-+              default:
-+                _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for "
-+                             "function starting at 0x%llX",
-+                              encoding, functionStart);
-+                _LIBUNWIND_ABORT("invalid compact unwind encoding");
-+            }
-+          }
-+        }
-+        framelessUnwind(addressSpace, savedRegisters, registers);
-+        return UNW_STEP_SUCCESS;
-+      } else {
-+        break;
-+      }
-+    }
-+  }
-+
-+  /*
-+   0x10fe2733a:  5b                             popq   %rbx
-+   0x10fe2733b:  41 5c                          popq   %r12
-+   0x10fe2733d:  41 5d                          popq   %r13
-+   0x10fe2733f:  41 5e                          popq   %r14
-+   0x10fe27341:  41 5f                          popq   %r15
-+   0x10fe27343:  5d                             popq   %rbp
-+   */
-+
-+
-   uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount;
-   for (uint32_t i = 0; i < regCount; ++i) {
-     switch (registersSaved[i]) {
diff --git a/deps/patches/neoverse-generic-kernels.patch b/deps/patches/neoverse-generic-kernels.patch
new file mode 100644
index 0000000000000..ab37e3783bf3e
--- /dev/null
+++ b/deps/patches/neoverse-generic-kernels.patch
@@ -0,0 +1,19 @@
+diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1
+index ea010db4..074d7215 100644
+--- a/kernel/arm64/KERNEL.NEOVERSEN1
++++ b/kernel/arm64/KERNEL.NEOVERSEN1
+@@ -91,10 +91,10 @@ IDAMAXKERNEL   = iamax_thunderx2t99.c
+ ICAMAXKERNEL   = izamax_thunderx2t99.c
+ IZAMAXKERNEL   = izamax_thunderx2t99.c
+ 
+-SNRM2KERNEL    = scnrm2_thunderx2t99.c
+-DNRM2KERNEL    = dznrm2_thunderx2t99.c
+-CNRM2KERNEL    = scnrm2_thunderx2t99.c
+-ZNRM2KERNEL    = dznrm2_thunderx2t99.c
++SNRM2KERNEL    = nrm2.S
++DNRM2KERNEL    = nrm2.S
++CNRM2KERNEL    = znrm2.S
++ZNRM2KERNEL    = znrm2.S
+ 
+ DDOTKERNEL     = dot_thunderx2t99.c
+ SDOTKERNEL     = dot_thunderx2t99.c
diff --git a/deps/patches/openblas-Only-filter-out-mavx-on-Sandybridge.patch b/deps/patches/openblas-Only-filter-out-mavx-on-Sandybridge.patch
deleted file mode 100644
index 10ba3b4a5c6b5..0000000000000
--- a/deps/patches/openblas-Only-filter-out-mavx-on-Sandybridge.patch
+++ /dev/null
@@ -1,221 +0,0 @@
-From c4da892ba0798f8697e7b3219fd631651647e45f Mon Sep 17 00:00:00 2001
-From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date: Fri, 14 May 2021 23:19:10 +0200
-Subject: [PATCH 2/2] Only filter out -mavx on Sandybridge ZGEMM/ZTRMM kernels
-
----
- kernel/Makefile.L3 | 86 ++++++++++++++++++++++++++++++++++++----------
- 1 file changed, 68 insertions(+), 18 deletions(-)
-
-diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3
-index be10ee01..2d9e3ec3 100644
---- a/kernel/Makefile.L3
-+++ b/kernel/Makefile.L3
-@@ -818,8 +818,10 @@ ifeq ($(OS), AIX)
- 	m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@
- 	rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s
--else
-+else ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@
- endif
- 
- $(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
-@@ -828,8 +830,10 @@ ifeq ($(OS), AIX)
- 	m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@
- 	rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s
--else
-+else ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@
- endif
- 
- $(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
-@@ -838,8 +842,10 @@ ifeq ($(OS), AIX)
- 	m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@
- 	rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s
--else
-+else ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@
- endif
- 
- $(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
-@@ -848,8 +854,10 @@ ifeq ($(OS), AIX)
- 	m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@
- 	rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s
--else
-+else ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@
- endif
- 
- $(KDIR)xgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND)
-@@ -1044,8 +1052,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@
- 	rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1054,8 +1064,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@
- 	rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1064,8 +1076,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@
- 	rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1074,8 +1088,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@
- 	rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s 
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1084,8 +1100,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@
- 	rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1094,8 +1112,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@
- 	rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1104,8 +1124,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@
- 	rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1114,8 +1136,10 @@ ifeq ($(OS), AIX)
- 	m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@
- 	rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s
--else
-+else ifeq ($(CORE), SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
- endif
- 
- else
-@@ -1187,28 +1211,54 @@ $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
- 
- $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
-+endif
- 
- $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
--
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
-+endif
- $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
--
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
-+endif
- $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
--
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
-+endif
- $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
--
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
-+endif
- $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
--
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
-+endif
- $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
--
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
-+endif
- $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
-+ifeq ($(CORE),SANDYBRIDGE)
- 	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
-+else
-+	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
-+endif
- endif
- 
- 
--- 
-2.31.1
-
diff --git a/deps/patches/openblas-exshift.patch b/deps/patches/openblas-exshift.patch
deleted file mode 100644
index 4a0016ef66252..0000000000000
--- a/deps/patches/openblas-exshift.patch
+++ /dev/null
@@ -1,149 +0,0 @@
-commit c4b5abbe43d7c22215ef36ef4f7c1413c975678c
-Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date:   Fri Jan 29 10:45:36 2021 +0100
-
-    fix data type
-
-commit f87842483eee9d158f44d51d4c09662c3cff7526
-Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date:   Fri Jan 29 09:56:12 2021 +0100
-
-    fix calculation of non-exceptional shift (from Reference-LAPACK PR 477)
-
-commit 856bc365338f7559639f341d76ca8746d1628ee5
-Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date:   Wed Jan 27 13:41:45 2021 +0100
-
-    Add exceptional shift to fix rare convergence problems
-
----
-diff --git a/lapack-netlib/SRC/chgeqz.f b/lapack-netlib/SRC/chgeqz.f
-index 73d35621..4725e716 100644
---- a/lapack-netlib/SRC/chgeqz.f
-+++ b/lapack-netlib/SRC/chgeqz.f
-@@ -320,12 +320,13 @@
-      $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
-       COMPLEX            ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
-      $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
--     $                   U12, X
-+     $                   U12, X, ABI12, Y
- *     ..
- *     .. External Functions ..
-+      COMPLEX            CLADIV
-       LOGICAL            LSAME
-       REAL               CLANHS, SLAMCH
--      EXTERNAL           LSAME, CLANHS, SLAMCH
-+      EXTERNAL           CLADIV, LLSAME, CLANHS, SLAMCH
- *     ..
- *     .. External Subroutines ..
-       EXTERNAL           CLARTG, CLASET, CROT, CSCAL, XERBLA
-@@ -729,22 +730,34 @@
-             AD22 = ( ASCALE*H( ILAST, ILAST ) ) /
-      $             ( BSCALE*T( ILAST, ILAST ) )
-             ABI22 = AD22 - U12*AD21
-+            ABI12 = AD12 - U12*AD11
- *
--            T1 = HALF*( AD11+ABI22 )
--            RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 )
--            TEMP = REAL( T1-ABI22 )*REAL( RTDISC ) +
--     $             AIMAG( T1-ABI22 )*AIMAG( RTDISC )
--            IF( TEMP.LE.ZERO ) THEN
--               SHIFT = T1 + RTDISC
--            ELSE
--               SHIFT = T1 - RTDISC
-+            SHIFT = ABI22
-+            CTEMP = SQRT( ABI12 )*SQRT( AD21 )
-+            TEMP = ABS1( CTEMP )
-+            IF( CTEMP.NE.ZERO ) THEN
-+               X = HALF*( AD11-SHIFT )
-+               TEMP2 = ABS1( X )
-+               TEMP = MAX( TEMP, ABS1( X ) )
-+               Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 )
-+               IF( TEMP2.GT.ZERO ) THEN
-+                  IF( REAL( X / TEMP2 )*REAL( Y )+
-+     $                AIMAG( X / TEMP2 )*AIMAG( Y ).LT.ZERO )Y = -Y
-+               END IF
-+               SHIFT = SHIFT - CTEMP*CLADIV( CTEMP, ( X+Y ) )
-             END IF
-          ELSE
- *
- *           Exceptional shift.  Chosen for no particularly good reason.
- *
--            ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/
--     $                        (BSCALE*T(ILAST-1,ILAST-1))
-+            IF( ( IITER / 20 )*20.EQ.IITER .AND. 
-+     $         BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST ) )/( BSCALE*T( ILAST, ILAST ) )
-+            ELSE
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) )
-+            END IF
-             SHIFT = ESHIFT
-          END IF
- *
-diff --git a/lapack-netlib/SRC/zhgeqz.f b/lapack-netlib/SRC/zhgeqz.f
-index b51cba4f..b28ae47a 100644
---- a/lapack-netlib/SRC/zhgeqz.f
-+++ b/lapack-netlib/SRC/zhgeqz.f
-@@ -320,12 +320,13 @@
-      $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
-       COMPLEX*16         ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
-      $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
--     $                   U12, X
-+     $                   U12, X, ABI12, Y
- *     ..
- *     .. External Functions ..
-+      COMPLEX*16         ZLADIV
-       LOGICAL            LSAME
-       DOUBLE PRECISION   DLAMCH, ZLANHS
--      EXTERNAL           LSAME, DLAMCH, ZLANHS
-+      EXTERNAL           ZLADIV, LSAME, DLAMCH, ZLANHS
- *     ..
- *     .. External Subroutines ..
-       EXTERNAL           XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL
-@@ -730,22 +731,34 @@
-             AD22 = ( ASCALE*H( ILAST, ILAST ) ) /
-      $             ( BSCALE*T( ILAST, ILAST ) )
-             ABI22 = AD22 - U12*AD21
-+            ABI12 = AD12 - U12*AD11
- *
--            T1 = HALF*( AD11+ABI22 )
--            RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 )
--            TEMP = DBLE( T1-ABI22 )*DBLE( RTDISC ) +
--     $             DIMAG( T1-ABI22 )*DIMAG( RTDISC )
--            IF( TEMP.LE.ZERO ) THEN
--               SHIFT = T1 + RTDISC
--            ELSE
--               SHIFT = T1 - RTDISC
-+            SHIFT = ABI22
-+            CTEMP = SQRT( ABI12 )*SQRT( AD21 )
-+            TEMP = ABS1( CTEMP )
-+            IF( CTEMP.NE.ZERO ) THEN
-+               X = HALF*( AD11-SHIFT )
-+               TEMP2 = ABS1( X )
-+               TEMP = MAX( TEMP, ABS1( X ) )
-+               Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 )
-+               IF( TEMP2.GT.ZERO ) THEN
-+                  IF( DBLE( X / TEMP2 )*DBLE( Y )+
-+     $                DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y
-+               END IF
-+               SHIFT = SHIFT - CTEMP*ZLADIV( CTEMP, ( X+Y ) )
-             END IF
-          ELSE
- *
- *           Exceptional shift.  Chosen for no particularly good reason.
- *
--            ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/
--     $                        (BSCALE*T(ILAST-1,ILAST-1))
-+            IF( ( IITER / 20 )*20.EQ.IITER .AND. 
-+     $         BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST ) )/( BSCALE*T( ILAST, ILAST ) )
-+            ELSE
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) )
-+            END IF
-             SHIFT = ESHIFT
-          END IF
- *
diff --git a/deps/patches/openblas-filter-out-mavx-flag-on-zgemm-kernels.patch b/deps/patches/openblas-filter-out-mavx-flag-on-zgemm-kernels.patch
deleted file mode 100644
index 5c9fa4aaa59e9..0000000000000
--- a/deps/patches/openblas-filter-out-mavx-flag-on-zgemm-kernels.patch
+++ /dev/null
@@ -1,162 +0,0 @@
-From bd60fb6ffc9d14834ed03bed0f7e6e44126c6c05 Mon Sep 17 00:00:00 2001
-From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date: Thu, 13 May 2021 23:05:00 +0200
-Subject: [PATCH 1/2] filter out -mavx flag on zgemm kernels as it can cause
- problems with older gcc
-
----
- kernel/Makefile.L3 | 40 ++++++++++++++++++++--------------------
- 1 file changed, 20 insertions(+), 20 deletions(-)
-
-diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3
-index d8d73996..be10ee01 100644
---- a/kernel/Makefile.L3
-+++ b/kernel/Makefile.L3
-@@ -819,7 +819,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@
- 	rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DNN $< -o $@
- endif
- 
- $(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
-@@ -829,7 +829,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@
- 	rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DCN $< -o $@
- endif
- 
- $(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
-@@ -839,7 +839,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@
- 	rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DNC $< -o $@
- endif
- 
- $(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
-@@ -849,7 +849,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@
- 	rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DDOUBLE -DCOMPLEX -DCC $< -o $@
- endif
- 
- $(KDIR)xgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND)
-@@ -1045,7 +1045,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@
- 	rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1055,7 +1055,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@
- 	rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1065,7 +1065,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@
- 	rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1075,7 +1075,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@
- 	rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s 
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1085,7 +1085,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@
- 	rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1095,7 +1095,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@
- 	rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1105,7 +1105,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@
- 	rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
- endif
- 
- $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
-@@ -1115,7 +1115,7 @@ ifeq ($(OS), AIX)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@
- 	rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s
- else
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
- endif
- 
- else
-@@ -1187,28 +1187,28 @@ $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL)
- 	$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
- 
- $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
- 
- $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
- 
- $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
- 
- $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
- 
- $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
- 
- $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
- 
- $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
- 
- $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
--	$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
-+	$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
- endif
- 
- 
--- 
-2.31.1
-
diff --git a/deps/patches/openblas-julia42415-lapack625-openblas3392.patch b/deps/patches/openblas-julia42415-lapack625-openblas3392.patch
new file mode 100644
index 0000000000000..e7b874b961cca
--- /dev/null
+++ b/deps/patches/openblas-julia42415-lapack625-openblas3392.patch
@@ -0,0 +1,95 @@
+From 2be5ee3cca97a597f2ee2118808a2d5eacea050c Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Fri, 1 Oct 2021 11:17:21 +0200
+Subject: [PATCH 1/4] Fix out of bounds read in ?llarv (Reference-LAPACK PR
+ 625)
+
+---
+ lapack-netlib/SRC/clarrv.f | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lapack-netlib/SRC/clarrv.f b/lapack-netlib/SRC/clarrv.f
+index a45f55ac3b..26a9febc87 100644
+--- a/lapack-netlib/SRC/clarrv.f
++++ b/lapack-netlib/SRC/clarrv.f
+@@ -351,7 +351,7 @@ SUBROUTINE CLARRV( N, VL, VU, D, L, PIVMIN,
+ *
+ *     Quick return if possible
+ *
+-      IF( N.LE.0 ) THEN
++      IF( (N.LE.0) .OR. (M.LE.0) ) THEN
+          RETURN
+       END IF
+ *
+
+From fe497efa0510466fd93578aaf9da1ad8ed4edbe7 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Fri, 1 Oct 2021 11:18:20 +0200
+Subject: [PATCH 2/4] Fix out of bounds read in ?llarv (Reference-LAPACK PR
+ 625)
+
+---
+ lapack-netlib/SRC/dlarrv.f | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lapack-netlib/SRC/dlarrv.f b/lapack-netlib/SRC/dlarrv.f
+index 4a59a2bbf9..a1c6e9c9d7 100644
+--- a/lapack-netlib/SRC/dlarrv.f
++++ b/lapack-netlib/SRC/dlarrv.f
+@@ -353,7 +353,7 @@ SUBROUTINE DLARRV( N, VL, VU, D, L, PIVMIN,
+ *
+ *     Quick return if possible
+ *
+-      IF( N.LE.0 ) THEN
++      IF( (N.LE.0).OR.(M.LE.0) ) THEN
+          RETURN
+       END IF
+ *
+
+From ddb0ff5353637bb5f5ad060c9620e334c143e3d7 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Fri, 1 Oct 2021 11:19:07 +0200
+Subject: [PATCH 3/4] Fix out of bounds read in ?llarv (Reference-LAPACK PR
+ 625)
+
+---
+ lapack-netlib/SRC/slarrv.f | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lapack-netlib/SRC/slarrv.f b/lapack-netlib/SRC/slarrv.f
+index 04519fde8c..9448b2fd92 100644
+--- a/lapack-netlib/SRC/slarrv.f
++++ b/lapack-netlib/SRC/slarrv.f
+@@ -353,7 +353,7 @@ SUBROUTINE SLARRV( N, VL, VU, D, L, PIVMIN,
+ *
+ *     Quick return if possible
+ *
+-      IF( N.LE.0 ) THEN
++      IF( (N.LE.0).OR.(M.LE.0) ) THEN
+          RETURN
+       END IF
+ *
+
+From 337b65133df174796794871b3988cd03426e6d41 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Fri, 1 Oct 2021 11:19:53 +0200
+Subject: [PATCH 4/4] Fix out of bounds read in ?llarv (Reference-LAPACK PR
+ 625)
+
+---
+ lapack-netlib/SRC/zlarrv.f | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lapack-netlib/SRC/zlarrv.f b/lapack-netlib/SRC/zlarrv.f
+index 23976dbefe..8d10e3c2e3 100644
+--- a/lapack-netlib/SRC/zlarrv.f
++++ b/lapack-netlib/SRC/zlarrv.f
+@@ -351,7 +351,7 @@ SUBROUTINE ZLARRV( N, VL, VU, D, L, PIVMIN,
+ *
+ *     Quick return if possible
+ *
+-      IF( N.LE.0 ) THEN
++      IF( (N.LE.0).OR.(M.LE.0) ) THEN
+          RETURN
+       END IF
+ *
diff --git a/deps/patches/openblas-ofast-power.patch b/deps/patches/openblas-ofast-power.patch
index 2bb01c9b08115..c741496cae757 100644
--- a/deps/patches/openblas-ofast-power.patch
+++ b/deps/patches/openblas-ofast-power.patch
@@ -1,19 +1,17 @@
- Makefile.power | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
 diff --git a/Makefile.power b/Makefile.power
-index c7e97229..8426e816 100644
+index 946f5523..19593050 100644
 --- a/Makefile.power
 +++ b/Makefile.power
-@@ -10,13 +10,13 @@ USE_OPENMP = 1
- endif
- 
+@@ -11,14 +11,14 @@ endif
+
  ifeq ($(CORE), POWER10)
+ ifneq ($(C_COMPILER), PGI)
 -CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
 +CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
  FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10  -fno-fast-math
  endif
- 
+ endif
+
  ifeq ($(CORE), POWER9)
  ifneq ($(C_COMPILER), PGI)
 -CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
@@ -21,8 +19,8 @@ index c7e97229..8426e816 100644
  ifeq ($(C_COMPILER), GCC)
  ifneq ($(GCCVERSIONGT4), 1)
  $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
-@@ -49,7 +49,7 @@ endif
- 
+@@ -51,7 +51,7 @@ endif
+
  ifeq ($(CORE), POWER8)
  ifneq ($(C_COMPILER), PGI)
 -CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx  -fno-fast-math
diff --git a/deps/patches/openblas-winexit.patch b/deps/patches/openblas-winexit.patch
index 01085102f331a..33389f34a4074 100644
--- a/deps/patches/openblas-winexit.patch
+++ b/deps/patches/openblas-winexit.patch
@@ -1,13 +1,5 @@
-From f919c3301fabbaa5d965dcc7b1c3d6892a8c730a Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Sat, 14 Mar 2020 12:05:19 +0100
-
----
- driver/others/memory.c | 131 +------------------------------------------------
- 1 file changed, 2 insertions(+), 129 deletions(-)
-
 diff --git a/driver/others/memory.c b/driver/others/memory.c
-index ba2bb55b..bf6b5529 100644
+index 6e654ccf..1d2f9f12 100644
 --- a/driver/others/memory.c
 +++ b/driver/others/memory.c
 @@ -1534,7 +1534,7 @@ void CONSTRUCTOR gotoblas_init(void) {
@@ -19,11 +11,10 @@ index ba2bb55b..bf6b5529 100644
  
    if (gotoblas_initialized == 0) return;
  
-@@ -1571,74 +1571,12 @@ void DESTRUCTOR gotoblas_quit(void) {
- #endif
+@@ -1572,75 +1572,11 @@ void DESTRUCTOR gotoblas_quit(void) {
  }
  
--#if defined(_MSC_VER) && !defined(__clang__)
+ #if defined(_MSC_VER) && !defined(__clang__)
 -BOOL APIENTRY DllMain(HMODULE hModule, DWORD  ul_reason_for_call, LPVOID lpReserved)
 -{
 -  switch (ul_reason_for_call)
@@ -65,16 +56,18 @@ index ba2bb55b..bf6b5529 100644
  #else
  #pragma comment(linker, "/INCLUDE:__tls_used")
  #endif
- 
+-
 -#ifdef _WIN64
 -#pragma const_seg(".CRT$XLB")
 -#else
 -#pragma data_seg(".CRT$XLB")
 -#endif
--static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
+-
 -#ifdef _WIN64
+-static const PIMAGE_TLS_CALLBACK dll_callback(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
 -#pragma const_seg()
 -#else
+-static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
 -#pragma data_seg()
 -#endif
 -
@@ -83,18 +76,18 @@ index ba2bb55b..bf6b5529 100644
 -#else
 -#pragma data_seg(".CRT$XTU")
 -#endif
--static int(*p_process_term)(void) = on_process_term;
+-
 -#ifdef _WIN64
+-static const int(*p_process_term)(void) = on_process_term;
 -#pragma const_seg()
 -#else
+-static int(*p_process_term)(void) = on_process_term;
 -#pragma data_seg()
 -#endif
--#endif
--
+ #endif
+ 
  #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
- /* Don't call me; this is just work around for PGI / Sun bug */
- void gotoblas_dummy_for_PGI(void) {
-@@ -3136,7 +3074,7 @@ void CONSTRUCTOR gotoblas_init(void) {
+@@ -3146,7 +3082,7 @@ void CONSTRUCTOR gotoblas_init(void) {
  
  }
  
@@ -103,7 +96,7 @@ index ba2bb55b..bf6b5529 100644
  
    if (gotoblas_initialized == 0) return;
  
-@@ -3165,71 +3103,6 @@ void DESTRUCTOR gotoblas_quit(void) {
+@@ -3175,71 +3111,6 @@ void DESTRUCTOR gotoblas_quit(void) {
  #endif
  }
  
diff --git a/deps/tools/common.mk b/deps/tools/common.mk
index aacae86139ee6..642528376d457 100644
--- a/deps/tools/common.mk
+++ b/deps/tools/common.mk
@@ -4,14 +4,12 @@
 # it will make its way into the LLVM build flags, and LLVM is picky about RPATH (though
 # apparently not on FreeBSD). Ref PR #22352
 
-CONFIGURE_COMMON := --prefix=$(abspath $(build_prefix)) --build=$(BUILD_MACHINE) --libdir=$(abspath $(build_libdir)) --bindir=$(abspath $(build_depsbindir)) $(CUSTOM_LD_LIBRARY_PATH)
+CONFIGURE_COMMON = --prefix=$(abspath $(build_prefix)) --build=$(BUILD_MACHINE) --libdir=$(abspath $(build_libdir)) --bindir=$(abspath $(build_depsbindir)) $(CUSTOM_LD_LIBRARY_PATH)
 ifneq ($(XC_HOST),)
 CONFIGURE_COMMON += --host=$(XC_HOST)
 endif
 ifeq ($(OS),WINNT)
-ifneq ($(USEMSVC), 1)
 CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) -Wl,--stack,8388608"
-endif
 else
 CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN)"
 endif
diff --git a/deps/unwind.mk b/deps/unwind.mk
index 5db0ee2f99b85..c20ce0aa91a23 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -13,7 +13,7 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted: $(SRCCACHE)/libunwind-$(UN
 	touch -c $(SRCCACHE)/libunwind-$(UNWIND_VER)/configure # old target
 	echo 1 > $@
 
-checksum-libunwind: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
+checksum-unwind: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
 	$(JLCHECKSUM) $<
 
 $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
@@ -28,7 +28,11 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied: $(SRCCACHE)
 	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u < $(SRCDIR)/patches/libunwind-cfa-rsp.patch
 	echo 1 > $@
 
-$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-dwarf-table.patch
+	echo 1 > $@
+
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests
diff --git a/deps/utf8proc.mk b/deps/utf8proc.mk
index b368e1c12b0da..52775a2dff5e6 100644
--- a/deps/utf8proc.mk
+++ b/deps/utf8proc.mk
@@ -1,5 +1,5 @@
 ## UTF8PROC ##
-UTF8PROC_GIT_URL := git://github.com/JuliaLang/utf8proc.git
+UTF8PROC_GIT_URL := https://github.com/JuliaLang/utf8proc.git
 UTF8PROC_TAR_URL = https://api.github.com/repos/JuliaLang/utf8proc/tarball/$1
 $(eval $(call git-external,utf8proc,UTF8PROC,,,$(BUILDDIR)))
 
diff --git a/deps/zlib.mk b/deps/zlib.mk
index abc626fd2f79f..b31ab425ccfc3 100644
--- a/deps/zlib.mk
+++ b/deps/zlib.mk
@@ -1,6 +1,6 @@
 ## Zlib ##
 ifneq ($(USE_BINARYBUILDER_ZLIB), 1)
-ZLIB_GIT_URL := git://github.com/madler/zlib.git
+ZLIB_GIT_URL := https://github.com/madler/zlib.git
 ZLIB_TAR_URL = https://api.github.com/repos/madler/zlib/tarball/$1
 $(eval $(call git-external,zlib,ZLIB,,,$(SRCCACHE)))
 
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
index 468cb3bfbc3cb..3d45af2116c50 100644
--- a/doc/Manifest.toml
+++ b/doc/Manifest.toml
@@ -1,6 +1,13 @@
 # This file is machine-generated - editing it directly is not advised
 
+julia_version = "1.8.0-DEV.1110"
 manifest_format = "2.0"
+project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f"
+
+[[deps.ANSIColoredPrinters]]
+git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
+uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
+version = "0.0.1"
 
 [[deps.Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
@@ -11,15 +18,15 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
-git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
+git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.5"
+version = "0.8.6"
 
 [[deps.Documenter]]
-deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "621850838b3e74dd6dd047b5432d2e976877104e"
+deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
+git-tree-sha1 = "f425293f7e0acaf9144de6d731772de156676233"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.27.2"
+version = "0.27.10"
 
 [[deps.IOCapture]]
 deps = ["Logging", "Random"]
@@ -33,9 +40,9 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
 [[deps.JSON]]
 deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
+git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37"
 uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.1"
+version = "0.21.2"
 
 [[deps.LibGit2]]
 deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
@@ -53,12 +60,13 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
 
 [[deps.Parsers]]
 deps = ["Dates"]
-git-tree-sha1 = "c8abc88faa3f7a3950832ac5d6e690881590d6dc"
+git-tree-sha1 = "ae4bbcadb2906ccc085cf52ac286dc1377dceccc"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "1.1.0"
+version = "2.1.2"
 
 [[deps.Printf]]
 deps = ["Unicode"]
@@ -69,11 +77,12 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 
 [[deps.Random]]
-deps = ["Serialization"]
+deps = ["SHA", "Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
 
 [[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
diff --git a/doc/build/README.md b/doc/build/README.md
deleted file mode 100644
index 8dbef4232e5ad..0000000000000
--- a/doc/build/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-This directory contains various details related to building Julia:
-
-* [Detailed build instructions](build.md)
-
-Notes for various OSes:
-
-* [Linux](linux.md)
-* [macOS](macos.md)
-* [Windows](windows.md)
-* [FreeBSD](freebsd.md)
-
-Notes for various architectures:
-
-* [ARM](arm.md)
-
-Notes for building Julia for distribution:
-
-* [Distribution Notes](distributing.md)
diff --git a/doc/make.jl b/doc/make.jl
index c58b3eebed716..8be3b807400d1 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -155,6 +155,16 @@ DevDocs = [
         "devdocs/debuggingtips.md",
         "devdocs/valgrind.md",
         "devdocs/sanitizers.md",
+        "devdocs/probes.md",
+    ],
+    "Building Julia" => [
+        "devdocs/build/build.md",
+        "devdocs/build/linux.md",
+        "devdocs/build/macos.md",
+        "devdocs/build/windows.md",
+        "devdocs/build/freebsd.md",
+        "devdocs/build/arm.md",
+        "devdocs/build/distributing.md",
     ]
 ]
 
@@ -284,8 +294,9 @@ else
     )
 end
 
+const output_path = joinpath(buildroot, "doc", "_build", (render_pdf ? "pdf" : "html"), "en")
 makedocs(
-    build     = joinpath(buildroot, "doc", "_build", (render_pdf ? "pdf" : "html"), "en"),
+    build     = output_path,
     modules   = [Main, Base, Core, [Base.root_module(Base, stdlib.stdlib) for stdlib in STDLIB_DOCS]...],
     clean     = true,
     doctest   = ("doctest=fix" in ARGS) ? (:fix) : ("doctest=only" in ARGS) ? (:only) : ("doctest=true" in ARGS) ? true : false,
@@ -299,6 +310,32 @@ makedocs(
     pages     = PAGES,
 )
 
+# Update URLs to external stdlibs (JuliaLang/julia#43199)
+for (root, _, files) in walkdir(output_path), file in joinpath.(root, files)
+    endswith(file, ".html") || continue
+    local str
+    str = read(file, String)
+    # Index page links, update
+    #   https://github.com/JuliaLang/julia/blob/master/stdlib/${STDLIB_NAME}-${STDLIB_COMMIT}/path/to.md
+    # to
+    #   https://github.com/JuliaLang/${STDLIB_NAME}.jl/blob/master/docs/src/index.md
+    str = replace(str, r"https://github.com/JuliaLang/julia/blob/master/stdlib/(.*)-\w{40}/(.*\.md)" =>
+                       s"https://github.com/JuliaLang/\1.jl/blob/master/\2")
+    # Link to source links, update
+    #   https://github.com/JuliaLang/julia/blob/${JULIA_COMMIT}/stdlib/${STDLIB_NAME}-${STDLIB_COMMIT}/path/to.jl#${LINES}
+    # to
+    #   https://github.com/JuliaLang/${STDLIB_NAME}.jl/blob/${STDLIB_COMMIT}/path/to.jl#${LINES}
+    str = replace(str, r"https://github\.com/JuliaLang/julia/blob/\w{40}/stdlib/(.*)-(\w{40})/(.*\.jl#L\d+(?:-L\d+)?)" =>
+                       s"https://github.com/JuliaLang/\1.jl/blob/\2/\3")
+    # Some stdlibs are not hosted by JuliaLang
+    str = replace(str, r"(https://github\.com)/JuliaLang/(ArgTools\.jl/blob)" => s"\1/JuliaIO/\2")
+    str = replace(str, r"(https://github\.com)/JuliaLang/(LibCURL\.jl/blob)" => s"\1/JuliaWeb/\2")
+    str = replace(str, r"(https://github\.com)/JuliaLang/(SHA\.jl/blob)" => s"\1/JuliaCrypto/\2")
+    str = replace(str, r"(https://github\.com)/JuliaLang/(Tar\.jl/blob)" => s"\1/JuliaIO/\2")
+    # Write back to the file
+    write(file, str)
+end
+
 # Define our own DeployConfig
 struct BuildBotConfig <: Documenter.DeployConfig end
 function Documenter.deploy_folder(::BuildBotConfig; devurl, repo, branch, kwargs...)
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index 76277e39bc079..0b008619014e1 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -25,7 +25,7 @@
 
 .\" from the front page of https://julialang.org/
 .SH NAME
-julia - high-level, high-performance dynamic programming language for technical computing
+julia - a high-level, high-performance dynamic programming language for technical computing
 
 .SH SYNOPSIS
 julia [option] [program] [args..]
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index 1e419460bef53..f35e49bffcfaf 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -39,9 +39,10 @@ Base.methods
 Base.@show
 ans
 Base.active_project
+Base.set_active_project
 ```
 
-## Keywords
+## [Keywords](@id Keywords)
 
 This is the list of reserved keywords in Julia:
 `baremodule`, `begin`, `break`, `catch`, `const`, `continue`, `do`,
@@ -320,6 +321,7 @@ Base.Libc.getpid
 Base.Libc.time()
 Base.time_ns
 Base.@time
+Base.@showtime
 Base.@timev
 Base.@timed
 Base.@elapsed
@@ -443,6 +445,7 @@ Base.@macroexpand1
 Base.code_lowered
 Base.code_typed
 Base.precompile
+Base.jit_total_bytes
 ```
 
 ## Meta
diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md
index 84e5702e0e396..d329ce6ef6119 100644
--- a/doc/src/base/collections.md
+++ b/doc/src/base/collections.md
@@ -119,8 +119,6 @@ Base.all(::Any)
 Base.all(::AbstractArray, ::Any)
 Base.all!
 Base.count
-Base.any(::Any, ::Any)
-Base.all(::Any, ::Any)
 Base.foreach
 Base.map
 Base.map!
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index 93b5be617ad4b..86a1f2bab5dcd 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -8,12 +8,14 @@ Base.Filesystem.readdir
 Base.Filesystem.walkdir
 Base.Filesystem.mkdir
 Base.Filesystem.mkpath
+Base.Filesystem.hardlink
 Base.Filesystem.symlink
 Base.Filesystem.readlink
 Base.Filesystem.chmod
 Base.Filesystem.chown
 Base.RawFD
 Base.stat
+Base.Filesystem.diskstat
 Base.Filesystem.lstat
 Base.Filesystem.ctime
 Base.Filesystem.mtime
diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md
index 2d6a462400813..acee9b5dac60b 100644
--- a/doc/src/base/io-network.md
+++ b/doc/src/base/io-network.md
@@ -13,6 +13,7 @@ Base.take!(::Base.GenericIOBuffer)
 Base.fdio
 Base.flush
 Base.close
+Base.closewrite
 Base.write
 Base.read
 Base.read!
@@ -73,7 +74,7 @@ Base.eachline
 Base.displaysize
 ```
 
-## Multimedia I/O
+## [Multimedia I/O](@id Multimedia-I/O)
 
 Just as text output is performed by [`print`](@ref) and user-defined types can indicate their textual
 representation by overloading [`show`](@ref), Julia provides a standardized mechanism for rich multimedia
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index 177324abccfaa..bdf91c991183f 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -156,10 +156,10 @@ Base.copysign
 Base.sign
 Base.signbit
 Base.flipsign
-Base.sqrt(::Real)
+Base.sqrt(::Number)
 Base.isqrt
 Base.Math.cbrt
-Base.real(::Complex)
+Base.real
 Base.imag
 Base.reim
 Base.conj
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index cb8ad06488f1f..6760d3f25f5d4 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -8,14 +8,7 @@ Base.Threads.threadid
 Base.Threads.nthreads
 ```
 
-## Synchronization
-
-```@docs
-Base.Threads.Condition
-Base.Threads.Event
-```
-
-See also [Synchronization](@ref lib-task-sync).
+See also [Multi-Threading](@ref man-multithreading).
 
 ## Atomic operations
 
diff --git a/doc/src/base/numbers.md b/doc/src/base/numbers.md
index 47a33b4447264..b92bf6a1d8768 100644
--- a/doc/src/base/numbers.md
+++ b/doc/src/base/numbers.md
@@ -111,7 +111,7 @@ Base.@int128_str
 Base.@uint128_str
 ```
 
-## BigFloats and BigInts
+## [BigFloats and BigInts](@id BigFloats-and-BigInts)
 
 The [`BigFloat`](@ref) and [`BigInt`](@ref) types implements
 arbitrary-precision floating point and integer arithmetic, respectively. For
@@ -122,7 +122,6 @@ and for [`BigInt`](@ref) the [GNU Multiple Precision Arithmetic Library (GMP)]
 ```@docs
 Base.MPFR.BigFloat(::Any, rounding::RoundingMode)
 Base.precision
-Base.MPFR.precision(::Type{BigFloat})
 Base.MPFR.setprecision
 Base.GMP.BigInt(::Any)
 Base.@big_str
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index a508603a2d220..f687fe0f83622 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -26,6 +26,8 @@ Base.schedule
 
 ## [Synchronization](@id lib-task-sync)
 
+## Synchronization
+
 ```@docs
 Base.errormonitor
 Base.@sync
@@ -34,6 +36,8 @@ Base.fetch(t::Task)
 Base.timedwait
 
 Base.Condition
+Base.Threads.Condition
+Base.Threads.Event
 Base.notify
 
 Base.Semaphore
diff --git a/doc/src/base/punctuation.md b/doc/src/base/punctuation.md
index 69b72e467e999..526f11d831127 100644
--- a/doc/src/base/punctuation.md
+++ b/doc/src/base/punctuation.md
@@ -56,5 +56,5 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | [`===`](@ref) | triple equals sign is programmatically identical equality comparison                      |
 | [`=>`](@ref Pair) | right arrow using an equals sign defines a [`Pair`](@ref) typically used to populate [dictionaries](@ref Dictionaries) |
 | `->`        | right arrow using a hyphen defines an [anonymous function](@ref man-anonymous-functions) on a single line |
-| `\|>`        | pipe operator passes output from the left argument to input of the right argument, usually a [function](@ref Function-composition-and-piping) |
+| [`\|>`](@ref)       | pipe operator passes output from the left argument to input of the right argument, usually a [function](@ref Function-composition-and-piping) |
 | `∘`         | function composition operator (typed with \circ{tab}) combines two functions as though they are a single larger [function](@ref Function-composition-and-piping) |
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index a7e9a8ee4eeee..45ff90b6d908d 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -48,7 +48,7 @@ Base.findlast(::AbstractChar, ::AbstractString)
 Base.findprev(::AbstractString, ::AbstractString, ::Integer)
 Base.occursin
 Base.reverse(::Union{String,SubString{String}})
-Base.replace(s::AbstractString, ::Pair)
+Base.replace(s::AbstractString, ::Pair...)
 Base.split
 Base.rsplit
 Base.strip
@@ -66,6 +66,8 @@ Base.uppercasefirst
 Base.lowercasefirst
 Base.join
 Base.chop
+Base.chopprefix
+Base.chopsuffix
 Base.chomp
 Base.thisind
 Base.nextind
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index 8a2c93e71ced8..83f8c1cb2b695 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -53,8 +53,6 @@ call. Finally, chains of comparisons have their own special expression structure
 | `a&&b`      | `(&& a b)`                |
 | `x += 1`    | `(+= x 1)`                |
 | `a ? 1 : 2` | `(if a 1 2)`              |
-| `a:b`       | `(: a b)`                 |
-| `a:b:c`     | `(: a b c)`               |
 | `a,b`       | `(tuple a b)`             |
 | `a==b`      | `(call == a b)`           |
 | `1<i<=n`    | `(comparison 1 < i <= n)` |
@@ -78,10 +76,10 @@ call. Finally, chains of comparisons have their own special expression structure
 | `[x y]`                  | `(hcat x y)`                                      |
 | `[x y; z t]`             | `(vcat (row x y) (row z t))`                      |
 | `[x;y;; z;t;;;]`         | `(ncat 3 (nrow 2 (nrow 1 x y) (nrow 1 z t)))`     |
-| `[x for y in z, a in b]` | `(comprehension x (= y z) (= a b))`               |
-| `T[x for y in z]`        | `(typed_comprehension T x (= y z))`               |
+| `[x for y in z, a in b]` | `(comprehension (generator x (= y z) (= a b)))`   |
+| `T[x for y in z]`        | `(typed_comprehension T (generator x (= y z)))`   |
 | `(a, b, c)`              | `(tuple a b c)`                                   |
-| `(a; b; c)`              | `(block a (block b c))`                           |
+| `(a; b; c)`              | `(block a b c)`                                   |
 
 ### Macros
 
@@ -130,11 +128,11 @@ instead of `:import`.
 Julia supports more number types than many scheme implementations, so not all numbers are represented
 directly as scheme numbers in the AST.
 
-| Input                   | AST                                                     |
-|:----------------------- |:------------------------------------------------------- |
-| `11111111111111111111`  | `(macrocall @int128_str (null) "11111111111111111111")` |
-| `0xfffffffffffffffff`   | `(macrocall @uint128_str (null) "0xfffffffffffffffff")` |
-| `1111...many digits...` | `(macrocall @big_str (null) "1111....")`                |
+| Input                   | AST                                                      |
+|:----------------------- |:-------------------------------------------------------- |
+| `11111111111111111111`  | `(macrocall @int128_str nothing "11111111111111111111")` |
+| `0xfffffffffffffffff`   | `(macrocall @uint128_str nothing "0xfffffffffffffffff")` |
+| `1111...many digits...` | `(macrocall @big_str nothing "1111....")`                |
 
 ### Block forms
 
@@ -157,7 +155,7 @@ parses as:
 ```
 (if a (block (line 2) b)
     (elseif (block (line 3) c) (block (line 4) d)
-            (block (line 5 e))))
+            (block (line 6 e))))
 ```
 
 A `while` loop parses as `(while condition body)`.
@@ -438,6 +436,10 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
     Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`.
 
+  * `undefcheck`
+
+    Temporary node inserted by the compiler and will be processed in `type_lift_pass!`.
+
   * `enter`
 
     Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on
@@ -507,15 +509,46 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The calling convention for the call.
 
-      * `args[6:6+length(args[3])]` : arguments
+      * `args[6:5+length(args[3])]` : arguments
 
         The values for all the arguments (with types of each given in args[3]).
 
-      * `args[6+(length(args[3])+1):end]` : gc-roots
+      * `args[6+length(args[3])+1:end]` : gc-roots
 
         The additional objects that may need to be gc-rooted for the duration of the call.
         See [Working with LLVM](@ref Working-with-LLVM) for where these are derived from and how they get handled.
 
+  * `new_opaque_closure`
+
+    Constructs a new opaque closure. The fields are:
+
+      * `args[1]` : signature
+
+        The function signature of the opaque closure. Opaque closures don't participate in dispatch, but the input types can be restricted.
+
+      * `args[2]` : isva
+
+        Indicates whether the closure accepts varargs.
+
+      * `args[3]` : lb
+
+        Lower bound on the output type. (Defaults to `Union{}`)
+
+      * `args[4]` : ub
+
+        Upper bound on the output type. (Defaults to `Any`)
+
+      * `args[5]` : method
+
+        The actual method as an `opaque_closure_method` expression.
+
+      * `args[6:end]` : captures
+
+        The values captured by the opaque closure.
+
+    !!! compat "Julia 1.7"
+        Opaque closures were added in Julia 1.7
+
 
 ### [Method](@id ast-lowered-method)
 
@@ -559,7 +592,8 @@ A unique'd container describing the shared metadata for a single method.
 
 ### MethodInstance
 
-A unique'd container describing a single callable signature for a Method. See especially [Proper maintenance and care of multi-threading locks](@ref)
+A unique'd container describing a single callable signature for a Method.
+See especially [Proper maintenance and care of multi-threading locks](@ref Proper-maintenance-and-care-of-multi-threading-locks)
 for important details on how to modify these fields safely.
 
   * `specTypes`
diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md
index 4f9c247db8c80..f840a0283ea15 100644
--- a/doc/src/devdocs/boundscheck.md
+++ b/doc/src/devdocs/boundscheck.md
@@ -36,6 +36,37 @@ your function contains multiple layers of inlining, only `@boundscheck` blocks a
 of inlining deeper are eliminated. The rule prevents unintended changes in program behavior from
 code further up the stack.
 
+### Caution!
+
+It is easy to accidentally expose unsafe operations with `@inbounds`. You might be tempted
+to write the above example as
+
+```julia
+function sum(A::AbstractArray)
+    r = zero(eltype(A))
+    for i in 1:length(A)
+        @inbounds r += A[i]
+    end
+	return r
+end
+```
+
+Which quietly assumes 1-based indexing and therefore exposes unsafe memory access when used
+with [`OffsetArrays`](@ref man-custom-indice):
+
+```julia-repl
+julia> using OffsetArrays
+
+julia> sum(OffsetArray([1,2,3], -10))
+9164911648 # inconsistent results or segfault
+```
+
+While the original source of the error here is `1:length(A)`, the use of `@inbounds`
+increases the consequences from a bounds error to a less easily caught and debugged unsafe
+memory access. It is often difficult or impossible to prove that a method which uses
+`@inbounds` is safe, so one must weigh the benefits of performance improvements against the
+risk of segfaults and silent misbehavior, especially in public facing APIs.
+
 ## Propagating inbounds
 
 There may be certain scenarios where for code-organization reasons you want more than one layer
diff --git a/doc/build/arm.md b/doc/src/devdocs/build/arm.md
similarity index 100%
rename from doc/build/arm.md
rename to doc/src/devdocs/build/arm.md
diff --git a/doc/build/build.md b/doc/src/devdocs/build/build.md
similarity index 95%
rename from doc/build/build.md
rename to doc/src/devdocs/build/build.md
index 89b293a57df30..b071801260cdc 100644
--- a/doc/build/build.md
+++ b/doc/src/devdocs/build/build.md
@@ -66,7 +66,7 @@ Now you should be able to run Julia like this:
 
 If you are building a Julia package for distribution on Linux, macOS,
 or Windows, take a look at the detailed notes in
-[distributing.md](https://github.com/JuliaLang/julia/blob/master/doc/build/distributing.md).
+[distributing.md](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/distributing.md).
 
 ## Updating an existing source tree
 
@@ -130,14 +130,14 @@ latest version.
 
 Notes for various operating systems:
 
-* [Linux](https://github.com/JuliaLang/julia/blob/master/doc/build/linux.md)
-* [macOS](https://github.com/JuliaLang/julia/blob/master/doc/build/macos.md)
-* [Windows](https://github.com/JuliaLang/julia/blob/master/doc/build/windows.md)
-* [FreeBSD](https://github.com/JuliaLang/julia/blob/master/doc/build/freebsd.md)
+* [Linux](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/linux.md)
+* [macOS](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/macos.md)
+* [Windows](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/windows.md)
+* [FreeBSD](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/freebsd.md)
 
 Notes for various architectures:
 
-* [ARM](https://github.com/JuliaLang/julia/blob/master/doc/build/arm.md)
+* [ARM](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/arm.md)
 
 ## Required Build Tools and External Libraries
 
@@ -202,11 +202,11 @@ uses are listed in [`deps/Versions.make`](https://github.com/JuliaLang/julia/blo
 [perl]:         https://www.perl.org
 [cmake]:        https://www.cmake.org
 [OpenLibm]:     https://github.com/JuliaLang/openlibm
-[DSFMT]:        http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/#dSFMT
+[DSFMT]:        https://github.com/MersenneTwister-Lab/dSFMT
 [OpenBLAS]:     https://github.com/xianyi/OpenBLAS
 [LAPACK]:       https://www.netlib.org/lapack
 [MKL]:          https://software.intel.com/en-us/articles/intel-mkl
-[SuiteSparse]:  http://faculty.cse.tamu.edu/davis/suitesparse.html
+[SuiteSparse]:  https://people.engr.tamu.edu/davis/suitesparse.html
 [PCRE]:         https://www.pcre.org
 [LLVM]:         https://www.llvm.org
 [LLVM libunwind]: https://github.com/llvm/llvm-project/tree/main/libunwind
diff --git a/doc/build/distributing.md b/doc/src/devdocs/build/distributing.md
similarity index 97%
rename from doc/build/distributing.md
rename to doc/src/devdocs/build/distributing.md
index 8e74568c54b59..71d780ac408ea 100644
--- a/doc/build/distributing.md
+++ b/doc/src/devdocs/build/distributing.md
@@ -1,4 +1,4 @@
-Notes for building binary distributions
+Binary distributions
 =======================================
 
 These notes are for those wishing to compile a binary distribution of Julia
@@ -108,14 +108,8 @@ Alternatively, Julia may be built as a framework by invoking `make` with the
 Windows
 -------
 
-The best supported method of creating a Julia distribution on Windows
-is to cross-compile from a Linux distribution such as Ubuntu. In-depth
-compilation instructions [are
-available](https://github.com/JuliaLang/julia/blob/master/doc/build/windows.md).
-However the important steps for redistribution are to ensure to `make
-win-extras` in between `make` and `make binary-dist`.  After that process is
-completed, the `.zip` file created in the head Julia directory will
-hold a completely self-contained Julia.
+Instructions for reating a Julia distribution on Windows are described in the
+[build devdocs for Windows](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/windows.md).
 
 Notes on BLAS and LAPACK
 ------------------------
@@ -157,6 +151,9 @@ set `USE_SYSTEM_BLAS=1` and `USE_SYSTEM_LAPACK=1`, you should also set
 `LIBLAPACK=-l$(YOURBLAS)` and `LIBLAPACKNAME=lib$(YOURBLAS)`. Else, the
 reference LAPACK will be used and performance will typically be much lower.
 
+Starting with Julia 1.7, Julia uses [libblastrampoline](https://github.com/JuliaLinearAlgebra/libblastrampoline)
+to pick a different BLAS at runtime.
+
 # Point releasing 101
 
 Creating a point/patch release consists of several distinct steps.
diff --git a/doc/build/freebsd.md b/doc/src/devdocs/build/freebsd.md
similarity index 99%
rename from doc/build/freebsd.md
rename to doc/src/devdocs/build/freebsd.md
index 51b16d58b2959..12012cd745428 100644
--- a/doc/build/freebsd.md
+++ b/doc/src/devdocs/build/freebsd.md
@@ -1,4 +1,4 @@
-## FreeBSD
+# FreeBSD
 
 Clang is the default compiler on FreeBSD 11.0-RELEASE and above.
 The remaining build tools are available from the Ports Collection, and can be installed using
diff --git a/doc/build/linux.md b/doc/src/devdocs/build/linux.md
similarity index 97%
rename from doc/build/linux.md
rename to doc/src/devdocs/build/linux.md
index 8ef67230bdaed..4e596ef73341b 100644
--- a/doc/build/linux.md
+++ b/doc/src/devdocs/build/linux.md
@@ -1,4 +1,4 @@
-## Linux
+# Linux
 
 * GCC version 4.7 or later is required to build Julia.
 * To use external shared libraries not in the system library search path, set `USE_SYSTEM_XXX=1` and `LDFLAGS=-Wl,-rpath,/path/to/dir/contains/libXXX.so` in `Make.user`.
@@ -6,7 +6,7 @@
 * The `USE_SYSTEM_*` flags should be used with caution. These are meant only for troubleshooting, porting, and packaging, where package maintainers work closely with the Julia developers to make sure that Julia is built correctly. Production use cases should use the officially provided binaries. Issues arising from the use of these flags will generally not be accepted.
 * See also the [external dependencies](build.md#required-build-tools-and-external-libraries).
 
-### Architecture Customization
+## Architecture Customization
 
 Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and `JULIA_CPU_TARGET`.
 
@@ -14,7 +14,7 @@ For example, to build for Pentium 4, set `MARCH=pentium4` and install the necess
 
 You can also set `MARCH=native` in `Make.user` for a maximum-performance build customized for the current machine CPU.
 
-### Linux Build Troubleshooting
+## Linux Build Troubleshooting
 
  Problem              | Possible Solution
 ------------------------|---------------------
diff --git a/doc/build/macos.md b/doc/src/devdocs/build/macos.md
similarity index 98%
rename from doc/build/macos.md
rename to doc/src/devdocs/build/macos.md
index bbe3649475668..d78c5f938fc9c 100644
--- a/doc/build/macos.md
+++ b/doc/src/devdocs/build/macos.md
@@ -1,4 +1,4 @@
-## macOS
+# macOS
 
 You need to have the current Xcode command line utilities installed: run `xcode-select --install` in the terminal. You will need to rerun this terminal command after each macOS update, otherwise you may run into errors involving missing libraries or headers.
 
diff --git a/doc/build/windows.md b/doc/src/devdocs/build/windows.md
similarity index 78%
rename from doc/build/windows.md
rename to doc/src/devdocs/build/windows.md
index 23a6c74b60efc..fef4413db7d1a 100644
--- a/doc/build/windows.md
+++ b/doc/src/devdocs/build/windows.md
@@ -1,4 +1,4 @@
-# Julia on Windows
+# Windows
 
 This file describes how to install, or build, and use Julia on Windows.
 
@@ -118,41 +118,44 @@ MinGW-w64 compilers available through Cygwin's package manager.
 
 ### Compiling with MinGW/MSYS2
 
-Compiling Julia from source using [MSYS2](https://msys2.github.io) has worked
-in the past but is not actively supported. Pull requests to restore support
-would be welcome. See a [past version of this file](
-https://github.com/JuliaLang/julia/blob/v0.6.0/README.windows.md)
-for the former instructions for compiling using MSYS2.
+Compiling Julia from source using [MSYS2](https://msys2.github.io) has worked in the past
+but is not actively supported. Pull requests to restore support would be welcome. See a
+[past version of this
+file](https://github.com/JuliaLang/julia/blob/v0.6.0/README.windows.md) for the former
+instructions for compiling using MSYS2.
 
 
-### Cross-compiling from Unix
+### Cross-compiling from Unix (Linux/Mac/WSL)
 
 You can also use MinGW-w64 cross compilers to build a Windows version of Julia from
 Linux, Mac, or the Windows Subsystem for Linux (WSL).
 
 First, you will need to ensure your system has the required dependencies. We
-need wine (>=1.7.5), a system compiler, and some downloaders.
+need wine (>=1.7.5), a system compiler, and some downloaders. Note: a cygwin install might
+interfere with this method if using WSL.
 
 **On Ubuntu** (on other Linux systems the dependency names are likely to be similar):
 ```sh
 apt-get install wine-stable gcc wget p7zip-full winbind mingw-w64 gfortran-mingw-w64
+dpkg --add-architecture i386 && apt-get update && apt-get install wine32 # add sudo to each if needed
 # switch all of the following to their "-posix" variants (interactively):
 for pkg in i686-w64-mingw32-g++ i686-w64-mingw32-gcc i686-w64-mingw32-gfortran x86_64-w64-mingw32-g++ x86_64-w64-mingw32-gcc x86_64-w64-mingw32-gfortran; do sudo update-alternatives --config $pkg; done
 ```
 
-**On Mac**: Install XCode, XCode command line tools, X11 (now [XQuartz](
-https://www.xquartz.org/)), and [MacPorts](https://www.macports.org/install.php)
-or [Homebrew](https://brew.sh/).  Then run `port install wine wget mingw-w64`,
-or `brew install wine wget mingw-w64`, as appropriate.
+**On Mac**: Install XCode, XCode command line tools, X11 (now
+[XQuartz](https://www.xquartz.org/)), and [MacPorts](https://www.macports.org/install.php)
+or [Homebrew](https://brew.sh/).  Then run `port install wine wget mingw-w64`, or `brew
+install wine wget mingw-w64`, as appropriate.
 
-Then run the build:
+**Then run the build:**
 
  1. `git clone https://github.com/JuliaLang/julia.git julia-win32`
- 2. `echo override XC_HOST = i686-w64-mingw32 >> Make.user`
- 3. `make`
- 4. `make win-extras` (Necessary before running `make binary-dist`)
- 5. `make binary-dist` then `make exe` to create the Windows installer.
- 6. move the `julia-*.exe` installer to the target machine
+ 2. `cd julia-win32`
+ 3. `echo override XC_HOST = i686-w64-mingw32 >> Make.user`
+ 4. `make`
+ 5. `make win-extras` (Necessary before running `make binary-dist`)
+ 6. `make binary-dist` then `make exe` to create the Windows installer.
+ 7. move the `julia-*.exe` installer to the target machine
 
 If you are building for 64-bit windows, the steps are essentially the same.
 Just replace `i686` in `XC_HOST` with `x86_64`. (note: on Mac, wine only runs
@@ -161,13 +164,13 @@ in 32-bit mode).
 
 ## Debugging a cross-compiled build under wine
 
-The most effective way to debug a cross-compiled version of Julia on the
-cross-compilation host is to install a windows version of gdb and run it under wine
-as usual. The pre-built packages available [as part of the MSYS2 project](
-https://sourceforge.net/projects/msys2/files/REPOS/MINGW/) are known to work.
-Apart from the GDB package you may also need the python and termcap packages.
-Finally, GDB's prompt may not work when launch from the command line. This can
-be worked around by prepending `wineconsole` to the regular GDB invocation.
+The most effective way to debug a cross-compiled version of Julia on the cross-compilation
+host is to install a windows version of gdb and run it under wine as usual. The pre-built
+packages available [as part of the MSYS2
+project](https://sourceforge.net/projects/msys2/files/REPOS/MINGW/) are known to work. Apart
+from the GDB package you may also need the python and termcap packages. Finally, GDB's
+prompt may not work when launch from the command line. This can be worked around by
+prepending `wineconsole` to the regular GDB invocation.
 
 
 ## After compiling
@@ -183,10 +186,10 @@ yourself using ```make win-extras``` followed by ```make binary-dist``` and ```m
 
 ### GDB hangs with cygwin mintty
 
-- Run gdb under the windows console (cmd) instead. gdb [may not function properly](
-  https://www.cygwin.com/ml/cygwin/2009-02/msg00531.html) under mintty with non-
-  cygwin applications. You can use `cmd /c start` to start the windows console
-  from mintty if necessary.
+- Run gdb under the windows console (cmd) instead. gdb [may not function
+  properly](https://www.cygwin.com/ml/cygwin/2009-02/msg00531.html) under mintty with non-
+  cygwin applications. You can use `cmd /c start` to start the windows console from mintty
+  if necessary.
 
 ### GDB not attaching to the right process
 
@@ -205,11 +208,12 @@ yourself using ```make win-extras``` followed by ```make binary-dist``` and ```m
 
 ### Build process is slow/eats memory/hangs my computer
 
- - Disable the Windows [Superfetch](https://en.wikipedia.org/wiki/Windows_Vista_I/O_technologies#SuperFetch)
-   and [Program Compatibility Assistant](
-   https://blogs.msdn.com/b/cjacks/archive/2011/11/22/managing-the-windows-7-program-compatibility-assistant-pca.aspx)
-   services, as they are known to have [spurious interactions](
-   https://cygwin.com/ml/cygwin/2011-12/msg00058.html) with MinGW/Cygwin.
+ - Disable the Windows
+   [Superfetch](https://en.wikipedia.org/wiki/Windows_Vista_I/O_technologies#SuperFetch) and
+   [Program Compatibility
+   Assistant](https://blogs.msdn.com/b/cjacks/archive/2011/11/22/managing-the-windows-7-program-compatibility-assistant-pca.aspx)
+   services, as they are known to have [spurious
+   interactions](https://cygwin.com/ml/cygwin/2011-12/msg00058.html) with MinGW/Cygwin.
 
    As mentioned in the link above: excessive memory use by `svchost` specifically
    may be investigated in the Task Manager by clicking on the high-memory
diff --git a/doc/src/devdocs/debuggingtips.md b/doc/src/devdocs/debuggingtips.md
index f145931df3ecc..a1f4894919064 100644
--- a/doc/src/devdocs/debuggingtips.md
+++ b/doc/src/devdocs/debuggingtips.md
@@ -1,4 +1,4 @@
-# gdb debugging tips
+# [gdb debugging tips](@id gdb-debugging-tips)
 
 ## Displaying Julia variables
 
@@ -234,7 +234,7 @@ process)
 
 ## Mozilla's Record and Replay Framework (rr)
 
-Julia now works out of the box with [rr](http://rr-project.org/), the lightweight recording and
+Julia now works out of the box with [rr](https://rr-project.org/), the lightweight recording and
 deterministic debugging framework from Mozilla. This allows you to replay the trace of an execution
 deterministically.  The replayed execution's address spaces, register contents, syscall data etc
 are exactly the same in every run.
diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md
index 48b0081f9b2b3..cc5384fc2e41c 100644
--- a/doc/src/devdocs/functions.md
+++ b/doc/src/devdocs/functions.md
@@ -13,7 +13,7 @@ share the same `Complex` type name object.
 All objects in Julia are potentially callable, because every object has a type, which in turn
 has a `TypeName`.
 
-## Function calls
+## [Function calls](@id Function-calls)
 
 Given the call `f(x,y)`, the following steps are performed: first, the method table to use is
 accessed as `typeof(f).name.mt`. Second, an argument tuple type is formed, `Tuple{typeof(f), typeof(x), typeof(y)}`.
diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md
index a9c4ec5c726ed..68d63600f1bb1 100644
--- a/doc/src/devdocs/inference.md
+++ b/doc/src/devdocs/inference.md
@@ -49,16 +49,12 @@ A `CodeInfo` object may be obtained with
 ci = (@code_typed convert(Int, UInt(1)))[1]
 ```
 
-## The inlining algorithm (inline_worthy)
+## The inlining algorithm (`inline_worthy`)
 
-Much of the hardest work for inlining runs in
-`inlining_pass`. However, if your question is "why didn't my function
-inline?" then you will most likely be interested in `isinlineable` and
-its primary callee, `inline_worthy`. `isinlineable` handles a number
-of special cases (e.g., critical functions like `next` and `done`,
-incorporating a bonus for functions that return tuples, etc.). The
-main decision-making happens in `inline_worthy`, which returns `true`
-if the function should be inlined.
+Much of the hardest work for inlining runs in `ssa_inlining_pass!`.
+However, if your question is "why didn't my function inline?"
+then you will most likely be interested in `inline_worthy`,
+which makes a decision to inline the function call or not.
 
 `inline_worthy` implements a cost-model, where "cheap" functions get
 inlined; more specifically, we inline functions if their anticipated
@@ -90,7 +86,7 @@ input and output types were inferred in advance) is assigned a fixed
 cost (currently 20 cycles). In contrast, a `:call` expression, for
 functions other than intrinsics/builtins, indicates that the call will
 require dynamic dispatch, in which case we assign a cost set by
-`Params.inline_nonleaf_penalty` (currently set at 1000). Note
+`Params.inline_nonleaf_penalty` (currently set at `1000`). Note
 that this is not a "first-principles" estimate of the raw cost of
 dynamic dispatch, but a mere heuristic indicating that dynamic
 dispatch is extremely expensive.
diff --git a/doc/src/devdocs/init.md b/doc/src/devdocs/init.md
index 24d0874c196a4..cf954884c57b6 100644
--- a/doc/src/devdocs/init.md
+++ b/doc/src/devdocs/init.md
@@ -10,11 +10,11 @@ which loads a few libraries, eventually calling [`repl_entrypoint()` in `src/jla
 
 `repl_entrypoint()` calls [`libsupport_init()`](https://github.com/JuliaLang/julia/blob/master/src/support/libsupportinit.c)
 to set the C library locale and to initialize the "ios" library (see [`ios_init_stdstreams()`](https://github.com/JuliaLang/julia/blob/master/src/support/ios.c)
-and [Legacy `ios.c` library](@ref)).
+and [Legacy `ios.c` library](@ref Legacy-ios.c-library)).
 
 Next [`jl_parse_opts()`](https://github.com/JuliaLang/julia/blob/master/src/jloptions.c) is called to process
 command line options. Note that `jl_parse_opts()` only deals with options that affect code generation
-or early initialization. Other options are handled later by [`process_options()` in `base/client.jl`](https://github.com/JuliaLang/julia/blob/master/base/client.jl).
+or early initialization. Other options are handled later by [`exec_options()` in `base/client.jl`](https://github.com/JuliaLang/julia/blob/master/base/client.jl).
 
 `jl_parse_opts()` stores command line options in the [global `jl_options` struct](https://github.com/JuliaLang/julia/blob/master/src/julia.h).
 
@@ -29,7 +29,7 @@ by `main()` and calls [`_julia_init()` in `init.c`](https://github.com/JuliaLang
 to zero the signal handler mask.
 
 [`jl_resolve_sysimg_location()`](https://github.com/JuliaLang/julia/blob/master/src/init.c) searches
-configured paths for the base system image. See [Building the Julia system image](@ref).
+configured paths for the base system image. See [Building the Julia system image](@ref Building-the-Julia-system-image).
 
 [`jl_gc_init()`](https://github.com/JuliaLang/julia/blob/master/src/gc.c) sets up allocation pools
 and lists for weak refs, preserved values and finalization.
@@ -55,7 +55,7 @@ jl_int32_type = jl_new_primitivetype(jl_symbol("Int32"), core,
 object; initializes the global `jl_root_task` struct; and sets `jl_current_task` to the root task.
 
 [`jl_init_codegen()`](https://github.com/JuliaLang/julia/blob/master/src/codegen.cpp) initializes
-the [LLVM library](http://llvm.org).
+the [LLVM library](https://llvm.org).
 
 [`jl_init_serializer()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) initializes
 8-bit serialization tags for builtin `jl_value_t` values.
@@ -130,14 +130,14 @@ and `main()` calls `repl_entrypoint(argc, (char**)argv)`.
 
 !!! sidebar "sysimg"
     If there is a sysimg file, it contains a pre-cooked image of the `Core` and `Main` modules (and
-    whatever else is created by `boot.jl`). See [Building the Julia system image](@ref).
+    whatever else is created by `boot.jl`). See [Building the Julia system image](@ref Building-the-Julia-system-image).
 
     [`jl_restore_system_image()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) deserializes
     the saved sysimg into the current Julia runtime environment and initialization continues after
     `jl_init_box_caches()` below...
 
     Note: [`jl_restore_system_image()` (and `staticdata.c` in general)](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c)
-    uses the [Legacy `ios.c` library](@ref).
+    uses the [Legacy `ios.c` library](@ref Legacy-ios.c-library).
 
 ## `repl_entrypoint()`
 
@@ -156,28 +156,25 @@ executes it.
 
 ## `Base._start`
 
-[`Base._start`](https://github.com/JuliaLang/julia/blob/master/base/client.jl) calls [`Base.process_options`](https://github.com/JuliaLang/julia/blob/master/base/client.jl)
+[`Base._start`](https://github.com/JuliaLang/julia/blob/master/base/client.jl) calls [`Base.exec_options`](https://github.com/JuliaLang/julia/blob/master/base/client.jl)
 which calls [`jl_parse_input_line("println("Hello World!")")`](https://github.com/JuliaLang/julia/blob/master/src/ast.c)
-to create an expression object and [`Base.eval()`](@ref eval) to execute it.
+to create an expression object and [`Core.eval(Main, ex)`](@ref Core.eval) to execute the parsed expression `ex` in the module context of `Main`.
 
-## `Base.eval`
+## `Core.eval`
 
-[`Base.eval()`](@ref eval) was [mapped to `jl_f_top_eval`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c)
-by `jl_init_primitives()`.
-
-[`jl_f_top_eval()`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c) calls [`jl_toplevel_eval_in(jl_main_module, ex)`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c),
-where `ex` is the parsed expression `println("Hello World!")`.
-
-[`jl_toplevel_eval_in()`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c) calls
-[`jl_toplevel_eval_flex()`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c) which
-calls [`eval()` in `interpreter.c`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c).
+[`Core.eval(Main, ex)`](@ref Core.eval) calls [`jl_toplevel_eval_in(m, ex)`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c),
+which calls [`jl_toplevel_eval_flex`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c).
+`jl_toplevel_eval_flex` implements a simple heuristic to decide whether to compile a given code thunk or run it by interpreter.
+When given `println("Hello World!")`, it would usually decide to run the code by interpreter, in which case it calls
+[`jl_interpret_toplevel_thunk`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c), which then calls
+[`eval_body`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c).
 
 The stack dump below shows how the interpreter works its way through various methods of [`Base.println()`](@ref)
 and [`Base.print()`](@ref) before arriving at [`write(s::IO, a::Array{T}) where T`](https://github.com/JuliaLang/julia/blob/master/base/stream.jl)
  which does `ccall(jl_uv_write())`.
 
 [`jl_uv_write()`](https://github.com/JuliaLang/julia/blob/master/src/jl_uv.c) calls `uv_write()`
-to write "Hello World!" to `JL_STDOUT`. See [Libuv wrappers for stdio](@ref).:
+to write "Hello World!" to `JL_STDOUT`. See [Libuv wrappers for stdio](@ref Libuv-wrappers-for-stdio).:
 
 ```
 Hello World!
@@ -209,12 +206,11 @@ Hello World!
 | `jl_apply_generic()`           | `gf.c`          | `Base.println(String,)`                              |
 | `jl_apply()`                   | `julia.h`       |                                                      |
 | `do_call()`                    | `interpreter.c` |                                                      |
-| `eval()`                       | `interpreter.c` |                                                      |
-| `jl_interpret_toplevel_expr()` | `interpreter.c` |                                                      |
-| `jl_toplevel_eval_flex()`      | `toplevel.c`    |                                                      |
-| `jl_toplevel_eval()`           | `toplevel.c`    |                                                      |
-| `jl_toplevel_eval_in()`        | `builtins.c`    |                                                      |
-| `jl_f_top_eval()`              | `builtins.c`    |                                                      |
+| `eval_body()`                  | `interpreter.c` |                                                      |
+| `jl_interpret_toplevel_thunk`  | `interpreter.c` |                                                      |
+| `jl_toplevel_eval_flex`        | `toplevel.c`    |                                                      |
+| `jl_toplevel_eval_in`          | `toplevel.c`    |                                                      |
+| `Core.eval`                    | `boot.jl`       |                                                      |
 
 Since our example has just one function call, which has done its job of printing "Hello World!",
 the stack now rapidly unwinds back to `main()`.
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index 7b8334e27ae59..5659d79ada24c 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -30,11 +30,11 @@ and therefore its arguments must be statically typed.
 
 ### Alias Analysis
 
-Julia currently uses LLVM's [Type Based Alias Analysis](http://llvm.org/docs/LangRef.html#tbaa-metadata).
+Julia currently uses LLVM's [Type Based Alias Analysis](https://llvm.org/docs/LangRef.html#tbaa-metadata).
 To find the comments that document the inclusion relationships, look for `static MDNode*` in
 `src/codegen.cpp`.
 
-The `-O` option enables LLVM's [Basic Alias Analysis](http://llvm.org/docs/AliasAnalysis.html#the-basicaa-pass).
+The `-O` option enables LLVM's [Basic Alias Analysis](https://llvm.org/docs/AliasAnalysis.html#the-basic-aa-pass).
 
 ## Building Julia with a different version of LLVM
 
@@ -42,11 +42,11 @@ The default version of LLVM is specified in `deps/Versions.make`. You can overri
 a file called `Make.user` in the top-level directory and adding a line to it such as:
 
 ```
-LLVM_VER = 12.0.0
+LLVM_VER = 12.0.1
 ```
 
-Besides the LLVM release numerals, you can also use `LLVM_VER = svn` to build against the latest
-development version of LLVM.
+Besides the LLVM release numerals, you can also use `DEPS_GIT = llvm` in combination with
+`USE_BINARYBUILDER_LLVM = 0` to build against the latest development version of LLVM.
 
 You can also specify to build a debug version of LLVM, by setting either `LLVM_DEBUG = 1` or
 `LLVM_DEBUG = Release` in your `Make.user` file. The former will be a fully unoptimized build
@@ -102,7 +102,7 @@ above.
 Improving LLVM code generation usually involves either changing Julia lowering to be more friendly
 to LLVM's passes, or improving a pass.
 
-If you are planning to improve a pass, be sure to read the [LLVM developer policy](http://llvm.org/docs/DeveloperPolicy.html).
+If you are planning to improve a pass, be sure to read the [LLVM developer policy](https://llvm.org/docs/DeveloperPolicy.html).
 The best strategy is to create a code example in a form where you can use LLVM's `opt` tool to
 study it and the pass of interest in isolation.
 
diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md
index 7591f0df2d356..f01209cc73e52 100644
--- a/doc/src/devdocs/locks.md
+++ b/doc/src/devdocs/locks.md
@@ -1,4 +1,4 @@
-# Proper maintenance and care of multi-threading locks
+# [Proper maintenance and care of multi-threading locks](@id Proper-maintenance-and-care-of-multi-threading-locks)
 
 The following strategies are used to ensure that the code is dead-lock free (generally by addressing
 the 4th Coffman condition: circular wait).
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
new file mode 100644
index 0000000000000..7e2fb96df5ac8
--- /dev/null
+++ b/doc/src/devdocs/probes.md
@@ -0,0 +1,168 @@
+# Instrumenting Julia with DTrace, and bpftrace
+
+DTrace and bpftrace are tools that enable lightweight instrumentation of processes.
+You can turn the instrumentation on and off while the process is running,
+and with instrumentation off the overhead is minimal.
+
+!!! compat "Julia 1.8"
+    Support for probes was added in Julia 1.8
+
+!!! note
+    This documentation has been written from a Linux perspective, most of this
+    should hold on Mac OS/Darwin and FreeBSD.
+
+## Enabling support
+
+On Linux install the `systemtap` package that has a version of `dtrace`.
+
+```
+WITH_DTRACE=1
+```
+
+### Verifying
+
+```
+> readelf -n usr/lib/libjulia-internal.so.1
+
+Displaying notes found in: .note.gnu.build-id
+  Owner                Data size 	Description
+  GNU                  0x00000014	NT_GNU_BUILD_ID (unique build ID bitstring)
+    Build ID: 57161002f35548772a87418d2385c284ceb3ead8
+
+Displaying notes found in: .note.stapsdt
+  Owner                Data size 	Description
+  stapsdt              0x00000029	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__begin
+    Location: 0x000000000013213e, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cac
+    Arguments:
+  stapsdt              0x00000032	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__stop_the_world
+    Location: 0x0000000000132144, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cae
+    Arguments:
+  stapsdt              0x00000027	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__end
+    Location: 0x000000000013214a, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb0
+    Arguments:
+  stapsdt              0x0000002d	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__finalizer
+    Location: 0x0000000000132150, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb2
+    Arguments:
+```
+
+## Adding probes in libjulia
+
+Probes are declared in dtraces format in the file `src/uprobes.d`. The generated
+header file is included in `src/julia_internal.h` and if you add probes you should
+provide a noop implementation there.
+
+The header will contain a semaphore `*_ENABLED` and the actual call to the probe.
+If the probe arguments are expensive to compute you should first check if the
+probe is enabled and then compute the arguments and call the probe.
+
+```c
+  if (JL_PROBE_{PROBE}_ENABLED())
+    auto expensive_arg = ...;
+    JL_PROBE_{PROBE}(expensive_arg);
+```
+
+If your probe has no arguments it is preferred to not include the semaphore check.
+With USDT probes enabled the cost of a semaphore is a memory load, irrespective of
+the fact that the probe is enabled or not.
+
+```c
+#define JL_PROBE_GC_BEGIN_ENABLED() __builtin_expect (julia_gc__begin_semaphore, 0)
+__extension__ extern unsigned short julia_gc__begin_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
+```
+
+Whereas the probe itself is a noop sled that will be patched to a trampoline to
+the probe handler.
+
+## Available probes
+
+### GC probes
+
+1. `julia:gc__begin`: GC begins running on one thread and triggers stop-the-world.
+2. `julia:gc__stop_the_world`: All threads have reached a safepoint and GC runs.
+3. `julia:gc__mark__begin`: Beginning the mark phase
+4. `julia:gc__mark_end(scanned_bytes, perm_scanned)`: Mark phase ended
+5. `julia:gc__sweep_begin(full)`: Starting sweep
+6. `julia:gc__sweep_end()`: Sweep phase finished
+7. `julia:gc__end`: GC is finished, other threads continue work
+8. `julia:gc__finalizer`: Initial GC thread has finished running finalizers
+
+#### GC stop-the-world latency
+
+An example `bpftrace` script is given in `contrib/gc_stop_the_world_latency.bt`
+and it creates a histogram of the latency for all threads to reach a safepoint.
+
+Running this Julia code, with `julia -t 2`
+
+```
+using Base.Threads
+
+fib(x) = x <= 1 ? 1 : fib(x-1) + fib(x-2)
+
+beaver = @spawn begin
+    while true
+        fib(30)
+        # This safepoint is necessary until #41616, since otherwise this
+        # loop will never yield to GC.
+        GC.safepoint()
+    end
+end
+
+allocator = @spawn begin
+    while true
+        zeros(1024)
+    end
+end
+
+wait(allocator)
+```
+
+and in a second terminal
+
+```
+> sudo contrib/bpftrace/gc_stop_the_world_latency.bt
+Attaching 4 probes...
+Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.
+^C
+
+
+@usecs[1743412]:
+[4, 8)               971 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
+[8, 16)              837 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@        |
+[16, 32)             129 |@@@@@@                                              |
+[32, 64)              10 |                                                    |
+[64, 128)              1 |                                                    |
+```
+
+We can see the latency distribution of the stop-the-world phase in the executed Julia process.
+
+## Notes on using `bpftrace`
+
+An example probe in the bpftrace format looks like:
+
+```
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+	@start[pid] = nsecs;
+}
+```
+
+The probe declaration takes the kind `usdt`, then either the
+path to the library or the PID, the provider name `julia`
+and the probe name `gc__begin`. Note that I am using a
+relative path to the `libjulia-internal.so`, but this might
+need to be an absolute path on a production system.
+
+## Useful references:
+
+- [Julia Evans blog on Linux tracing systems](https://jvns.ca/blog/2017/07/05/linux-tracing-systems)
+- [LWN article on USDT and BPF](https://lwn.net/Articles/753601/)
+- [GDB support for probes](https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html)
+- [Brendan Gregg -- Linux Performance](https://www.brendangregg.com/linuxperf.html)
diff --git a/doc/src/devdocs/reflection.md b/doc/src/devdocs/reflection.md
index 1a99f040a4493..ec307012c17d5 100644
--- a/doc/src/devdocs/reflection.md
+++ b/doc/src/devdocs/reflection.md
@@ -65,7 +65,7 @@ recursive application of [`subtypes`](@ref) may be used to inspect the full type
 ## DataType layout
 
 The internal representation of a `DataType` is critically important when interfacing with C code
-and several functions are available to inspect these details. [`isbits(T::DataType)`](@ref) returns
+and several functions are available to inspect these details. [`isbitstype(T::DataType)`](@ref) returns
 true if `T` is stored with C-compatible alignment. [`fieldoffset(T::DataType, i::Integer)`](@ref)
 returns the (byte) offset for field *i* relative to the start of the type.
 
@@ -122,11 +122,12 @@ calls and expand argument types automatically:
 
 ```julia-repl
 julia> @code_llvm +(1,1)
-
-define i64 @"julia_+_130862"(i64, i64) {
+;  @ int.jl:87 within `+`
+; Function Attrs: sspstrong uwtable
+define i64 @"julia_+_476"(i64 signext %0, i64 signext %1) #0 {
 top:
-    %2 = add i64 %1, %0
-    ret i64 %2
+  %2 = add i64 %1, %0
+  ret i64 %2
 }
 ```
 
@@ -138,7 +139,7 @@ For more informations see [`@code_lowered`](@ref), [`@code_typed`](@ref), [`@cod
 The aforementioned functions and macros take the keyword argument `debuginfo` that controls the level
 debug information printed.
 
-```
+```julia-repl
 julia> @code_typed debuginfo=:source +(1,1)
 CodeInfo(
     @ int.jl:53 within `+'
@@ -147,7 +148,7 @@ CodeInfo(
 ) => Int64
 ```
 
-Possible values for `debuginfo` are: `:none`, `:source`, and`:default`.
+Possible values for `debuginfo` are: `:none`, `:source`, and `:default`.
 Per default debug information is not printed, but that can be changed
 by setting `Base.IRShow.default_debuginfo[] = :source`.
 
diff --git a/doc/src/devdocs/sanitizers.md b/doc/src/devdocs/sanitizers.md
index 87a62a50ffd94..e0dfaea64b3d3 100644
--- a/doc/src/devdocs/sanitizers.md
+++ b/doc/src/devdocs/sanitizers.md
@@ -35,7 +35,7 @@ look like this, plus one or more of the `SANITIZE_*` flags listed below:
 
 ## Address Sanitizer (ASAN)
 
-For detecting or debugging memory bugs, you can use Clang's [address sanitizer (ASAN)](http://clang.llvm.org/docs/AddressSanitizer.html).
+For detecting or debugging memory bugs, you can use Clang's [address sanitizer (ASAN)](https://clang.llvm.org/docs/AddressSanitizer.html).
 By compiling with `SANITIZE_ADDRESS=1` you enable ASAN for the Julia compiler and its generated code.
 In addition, you can specify `LLVM_SANITIZE=1` to sanitize the LLVM library as well. Note that
 these options incur a high performance and memory cost. For example, using ASAN for Julia and
@@ -116,7 +116,7 @@ to build `julia-debug` with ASAN.
 
 ## Memory Sanitizer (MSAN)
 
-For detecting use of uninitialized memory, you can use Clang's [memory sanitizer (MSAN)](http://clang.llvm.org/docs/MemorySanitizer.html)
+For detecting use of uninitialized memory, you can use Clang's [memory sanitizer (MSAN)](https://clang.llvm.org/docs/MemorySanitizer.html)
 by compiling with `SANITIZE_MEMORY=1`.
 
 ## Thread Sanitizer (TSAN)
diff --git a/doc/src/devdocs/ssair.md b/doc/src/devdocs/ssair.md
index d0ad27b833301..46d33c177f469 100644
--- a/doc/src/devdocs/ssair.md
+++ b/doc/src/devdocs/ssair.md
@@ -3,12 +3,12 @@
 ## Background
 
 Beginning in Julia 0.7, parts of the compiler use a new [SSA-form](https://en.wikipedia.org/wiki/Static_single_assignment_form)
-intermediate representation. Historically, the compiler used to directly generate LLVM IR, from a lowered form of the Julia
+intermediate representation. Historically, the compiler would directly generate LLVM IR from a lowered form of the Julia
 AST. This form had most syntactic abstractions removed, but still looked a lot like an abstract syntax tree.
 Over time, in order to facilitate optimizations, SSA values were introduced to this IR and the IR was
-linearized (i.e. a form where function arguments may only be SSA values or constants). However, non-SSA values
+linearized (i.e. turned into a form where function arguments could only be SSA values or constants). However, non-SSA values
 (slots) remained in the IR due to the lack of Phi nodes in the IR (necessary for back-edges and re-merging of
-conditional control flow), negating much of the usefulness of the SSA form representation to perform
+conditional control flow). This negated much of the usefulness of SSA form representation when performing
 middle end optimizations. Some heroic effort was put into making these optimizations work without a complete SSA
 form representation, but the lack of such a representation ultimately proved prohibitive.
 
@@ -74,7 +74,7 @@ that is generally done for most optimizations that care about these conditions a
 
 Exception handling complicates the SSA story moderately, because exception handling
 introduces additional control flow edges into the IR across which values must be tracked.
-One approach to do so, which is followed by LLVM is to make calls which may throw exceptions
+One approach to do so, which is followed by LLVM, is to make calls which may throw exceptions
 into basic block terminators and add an explicit control flow edge to the catch handler:
 
 ```
@@ -87,16 +87,16 @@ catch:
 # Exceptions go here
 ```
 
-However, this is problematic in a language like julia where at the start of the optimization
+However, this is problematic in a language like Julia, where at the start of the optimization
 pipeline, we do not know which calls throw. We would have to conservatively assume that every
-call (which in julia is every statement) throws. This would have several negative effects.
+call (which in Julia is every statement) throws. This would have several negative effects.
 On the one hand, it would essentially reduce the scope of every basic block to a single call,
 defeating the purpose of having operations be performed at the basic block level. On the other
 hand, every catch basic block would have `n*m` phi node arguments (`n`, the number of statements
-in the critical region, `m` the number of live values through the catch block). To work around
-this, we use a combination of `Upsilon` and `PhiC` (the C standing for `catch`,
-written `φᶜ` in the IR pretty printer, because
-unicode subscript c is not available) nodes. There are several ways to think of these nodes, but
+in the critical region, `m` the number of live values through the catch block).
+
+To work around this, we use a combination of `Upsilon` and `PhiC` nodes (the C standing for `catch`,
+written `φᶜ` in the IR pretty printer, because unicode subscript c is not available). There are several ways to think of these nodes, but
 perhaps the easiest is to think of each `PhiC` as a load from a unique store-many, read-once slot,
 with `Upsilon` being the corresponding store operation. The `PhiC` has an operand list of all the
 upsilon nodes that store to its implicit slot. The `Upsilon` nodes however, do not record which `PhiC`
diff --git a/doc/src/devdocs/stdio.md b/doc/src/devdocs/stdio.md
index 879c3d048343b..5ee4f0206ee0b 100644
--- a/doc/src/devdocs/stdio.md
+++ b/doc/src/devdocs/stdio.md
@@ -1,8 +1,8 @@
 # printf() and stdio in the Julia runtime
 
-## Libuv wrappers for stdio
+## [Libuv wrappers for stdio](@id Libuv-wrappers-for-stdio)
 
-`julia.h` defines [libuv](http://docs.libuv.org) wrappers for the `stdio.h` streams:
+`julia.h` defines [libuv](https://docs.libuv.org) wrappers for the `stdio.h` streams:
 
 ```c
 uv_stream_t *JL_STDIN;
@@ -74,7 +74,7 @@ In `jl_uv.c` the `jl_uv_puts()` function checks its `uv_stream_t* stream` argume
 This allows for uniform use of `jl_printf()` throughout the runtime regardless of whether or not
 any particular piece of code is reachable before initialization is complete.
 
-## Legacy `ios.c` library
+## [Legacy `ios.c` library](@id Legacy-ios.c-library)
 
 The `src/support/ios.c` library is inherited from [femtolisp](https://github.com/JeffBezanson/femtolisp).
 It provides cross-platform buffered file IO and in-memory temporary buffers.
diff --git a/doc/src/devdocs/subarrays.md b/doc/src/devdocs/subarrays.md
index dee9547fb1efd..cec7a64a65245 100644
--- a/doc/src/devdocs/subarrays.md
+++ b/doc/src/devdocs/subarrays.md
@@ -19,14 +19,14 @@ julia> A = rand(2,3,4);
 
 julia> S1 = view(A, :, 1, 2:3)
 2×2 view(::Array{Float64, 3}, :, 1, 2:3) with eltype Float64:
- 0.166507  0.97397
- 0.754392  0.831383
+ 0.839622  0.711389
+ 0.967143  0.103929
 
 julia> S2 = view(A, 1, :, 2:3)
 3×2 view(::Array{Float64, 3}, 1, :, 2:3) with eltype Float64:
- 0.166507  0.97397
- 0.518957  0.0705793
- 0.503714  0.825124
+ 0.839622  0.711389
+ 0.789764  0.806704
+ 0.566704  0.962715
 ```
 ```@meta
 DocTestSetup = nothing
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index 734c8a5ca4659..5c976875846d3 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -1,6 +1,6 @@
 # System Image Building
 
-## Building the Julia system image
+## [Building the Julia system image](@id Building-the-Julia-system-image)
 
 Julia ships with a preparsed system image containing the contents of the `Base` module, named
 `sys.ji`.  This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index d1a29dc19d160..003574f99c182 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -82,7 +82,7 @@ f3(A::Array{T}) where {T<:Any} = 3
 f4(A::Array{Any}) = 4
 ```
 
-The signature - as described in [Function calls](@ref) - of `f3` is a `UnionAll` type wrapping a tuple type: `Tuple{typeof(f3), Array{T}} where T`.
+The signature - as described in [Function calls](@ref Function-calls) - of `f3` is a `UnionAll` type wrapping a tuple type: `Tuple{typeof(f3), Array{T}} where T`.
 All but `f4` can be called with `a = [1,2]`; all but `f2` can be called with `b = Any[1,2]`.
 
 Let's look at these types a little more closely:
@@ -478,7 +478,7 @@ We have not yet worked out a complete algorithm for this.
 Most operations for dealing with types are found in the files `jltypes.c` and `subtype.c`.
 A good way to start is to watch subtyping in action.
 Build Julia with `make debug` and fire up Julia within a debugger.
-[gdb debugging tips](@ref) has some tips which may be useful.
+[gdb debugging tips](@ref gdb-debugging-tips) has some tips which may be useful.
 
 Because the subtyping code is used heavily in the REPL itself -- and hence breakpoints in this
 code get triggered often -- it will be easiest if you make the following definition:
diff --git a/doc/src/devdocs/valgrind.md b/doc/src/devdocs/valgrind.md
index 5b25762133b07..8a11cb411a6fd 100644
--- a/doc/src/devdocs/valgrind.md
+++ b/doc/src/devdocs/valgrind.md
@@ -1,6 +1,6 @@
 # Using Valgrind with Julia
 
-[Valgrind](http://valgrind.org/) is a tool for memory debugging, memory leak detection, and profiling.
+[Valgrind](https://valgrind.org/) is a tool for memory debugging, memory leak detection, and profiling.
  This section describes things to keep in mind when using Valgrind to debug memory issues with
 Julia.
 
@@ -24,10 +24,13 @@ Another thing to note: if your program uses multiple workers processes, it is li
 want all such worker processes to run under Valgrind, not just the parent process.  To do this,
 pass `--trace-children=yes` to `valgrind`.
 
+Yet another thing to note: if using `valgrind` errors with `Unable to find compatible target in system image`,
+try rebuilding the sysimage with target `generic` or julia with `JULIA_CPU_TARGET=generic`.
+
 ## Suppressions
 
 Valgrind will typically display spurious warnings as it runs.  To reduce the number of such warnings,
-it helps to provide a [suppressions file](http://valgrind.org/docs/manual/manual-core.html#manual-core.suppress)
+it helps to provide a [suppressions file](https://valgrind.org/docs/manual/manual-core.html#manual-core.suppress)
 to Valgrind.  A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
 
 The suppressions file can be used from the `julia/` source directory as follows:
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index dcdc853ede125..f6e4350726269 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -223,7 +223,7 @@ julia> [1:2; 4;; 1; 3:4]
 
 Just as `;` and `;;` concatenate in the first and second dimension, using more semicolons
 extends this same general scheme. The number of semicolons in the separator specifies the
-particular dimension, so `;;;` concetenates in the third dimension, `;;;;` in the 4th, and
+particular dimension, so `;;;` concatenates in the third dimension, `;;;;` in the 4th, and
 so on. Fewer semicolons take precedence, so the lower dimensions are generally concatenated
 first.
 
@@ -875,7 +875,7 @@ full set of cartesian indices to do their lookup (see [`IndexStyle`](@ref) to
 introspect which is which). As such, when iterating over an entire array, it's
 much better to iterate over [`eachindex(A)`](@ref) instead of `1:length(A)`.
 Not only will the former be much faster in cases where `A` is `IndexCartesian`,
-but it will also support OffsetArrays, too.
+but it will also support [OffsetArrays](https://github.com/JuliaArrays/OffsetArrays.jl), too.
 
 #### Omitted and extra indices
 
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index c0181e775bd05..4eee0fccf7da2 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -211,7 +211,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     julia> close(c);
 
     julia> put!(c, 2) # `put!` on a closed channel throws an exception.
-    ERROR: InvalidStateException("Channel is closed.",:closed)
+    ERROR: InvalidStateException: Channel is closed.
     Stacktrace:
     [...]
     ```
@@ -230,7 +230,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     1
 
     julia> take!(c) # No more data available on a closed channel.
-    ERROR: InvalidStateException("Channel is closed.",:closed)
+    ERROR: InvalidStateException: Channel is closed.
     Stacktrace:
     [...]
     ```
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 89d535fd54344..5529018217c1a 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -186,6 +186,10 @@ Julia function. The arguments to [`@cfunction`](@ref) are:
     function on 32-bit Windows, but can be used on WIN64 (where `stdcall` is unified with the
     C calling convention).
 
+!!! note
+    Callback functions exposed via `@cfunction` should not throw errors, as that will
+    return control to the Julia runtime unexpectedly and may leave the program in an undefined state.
+
 A classic example is the standard C library `qsort` function, declared as:
 
 ```c
@@ -721,7 +725,8 @@ For translating a C return type to Julia:
           * `Ptr{T}`, where `T` is the Julia type corresponding to `T`
   * `T (*)(...)` (e.g. a pointer to a function)
 
-      * `Ptr{Cvoid}` (you may need to use [`@cfunction`](@ref) explicitly to create this pointer)
+      * `Ptr{Cvoid}` to call this directly from Julia you will need to pass this as the first argument to [`ccall`](@ref).
+        See [Indirect Calls](@ref).
 
 ### Passing Pointers for Modifying Inputs
 
@@ -840,7 +845,7 @@ the Julia pointer to a Julia array data structure into a form understandable by
 
 ## Fortran Wrapper Example
 
-The following example utilizes ccall to call a function in a common Fortran library (libBLAS) to
+The following example utilizes `ccall` to call a function in a common Fortran library (libBLAS) to
 computes a dot product. Notice that the argument mapping is a bit different here than above, as
 we need to map from Julia to Fortran.  On every argument type, we specify `Ref` or `Ptr`. This
 mangling convention may be specific to your fortran compiler and operating system, and is likely
@@ -1003,12 +1008,12 @@ hn = Vector{UInt8}(undef, 256)
 err = ccall(:gethostname, stdcall, Int32, (Ptr{UInt8}, UInt32), hn, length(hn))
 ```
 
-For more information, please see the [LLVM Language Reference](http://llvm.org/docs/LangRef.html#calling-conventions).
+For more information, please see the [LLVM Language Reference](https://llvm.org/docs/LangRef.html#calling-conventions).
 
 There is one additional special calling convention [`llvmcall`](@ref Base.llvmcall),
 which allows inserting calls to LLVM intrinsics directly.
 This can be especially useful when targeting unusual platforms such as GPGPUs.
-For example, for [CUDA](http://llvm.org/docs/NVPTXUsage.html), we need to be able to read the thread index:
+For example, for [CUDA](https://llvm.org/docs/NVPTXUsage.html), we need to be able to read the thread index:
 
 ```julia
 ccall("llvm.nvvm.read.ptx.sreg.tid.x", llvmcall, Int32, ())
diff --git a/doc/src/manual/command-line-options.md b/doc/src/manual/command-line-options.md
index a0fe1849cc7b1..17763fb434e94 100644
--- a/doc/src/manual/command-line-options.md
+++ b/doc/src/manual/command-line-options.md
@@ -6,7 +6,7 @@ The following is a complete list of command-line switches available when launchi
 |:---                                   |:---|
 |`-v`, `--version`                      |Display version information|
 |`-h`, `--help`                         |Print command-line options (this message).|
-|`--project[={<dir>\|@.}]`              |Set <dir> as the home project/environment. The default @. option will search through parent directories until a Project.toml or JuliaProject.toml file is found.|
+|`--project[={<dir>\|@.}]`              |Set `<dir>` as the home project/environment. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
 |`-J`, `--sysimage <file>`              |Start up with the given system image file|
 |`-H`, `--home <dir>`                   |Set location of `julia` executable|
 |`--startup-file={yes\|no}`             |Load `~/.julia/config/startup.jl`|
@@ -29,10 +29,10 @@ The following is a complete list of command-line switches available when launchi
 |`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
 |`-O`, `--optimize={0,1,2,3}`           |Set the optimization level (default level is 2 if unspecified or 3 if used without a level)|
 |`--min-optlevel={0,1,2,3}`             |Set the lower bound on per-module optimization (default is 0)|
-|`-g`, `-g <level>`                     |Enable / Set the level of debug info generation (default level is 1 if unspecified or 2 if used without a level)|
+|`-g`, `-g <level>`                     |Enable or set the level of debug info generation (default level is 1 if unspecified or 2 if used without a level)|
 |`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
-|`--check-bounds={yes\|no\|auto}`       |Emit bounds checks always, never, or respect @inbounds declarations|
-|`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)|
+|`--check-bounds={yes\|no\|auto}`       |Emit bounds checks always, never, or respect `@inbounds` declarations|
+|`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)|
 |`--code-coverage={none\|user\|all}`    |Count executions of source lines|
 |`--code-coverage`                      |equivalent to `--code-coverage=user`|
 |`--track-allocation={none\|user\|all}` |Count bytes allocated by each source line|
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
index ac48e5b420f5e..94ad70982bbae 100644
--- a/doc/src/manual/complex-and-rational-numbers.md
+++ b/doc/src/manual/complex-and-rational-numbers.md
@@ -36,7 +36,7 @@ julia> (-1 + 2im)^2
 -3 - 4im
 
 julia> (-1 + 2im)^2.5
-2.729624464784009 - 6.9606644595719im
+2.7296244647840084 - 6.960664459571898im
 
 julia> (-1 + 2im)^(1 + 1im)
 -0.27910381075826657 + 0.08708053414102428im
diff --git a/doc/src/manual/conversion-and-promotion.md b/doc/src/manual/conversion-and-promotion.md
index a8d8bf61dc54b..63ae37660cff4 100644
--- a/doc/src/manual/conversion-and-promotion.md
+++ b/doc/src/manual/conversion-and-promotion.md
@@ -321,7 +321,7 @@ Int64
 
 Note that we do **not** overload `promote_type` directly: we overload `promote_rule` instead.
 `promote_type` uses `promote_rule`, and adds the symmetry.
-Overloading it directly can cause ambiguity errrors.
+Overloading it directly can cause ambiguity errors.
 We overload `promote_rule` to define how things should be promoted, and we use `promote_type`
 to query that.
 
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index e0f373233c723..abaf47a53b39c 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -705,11 +705,11 @@ Num Unique objects : 3
 ```
 
 As can be seen, [`put!`](@ref) on a locally owned [`RemoteChannel`](@ref) with the same
-object `v` modifed between calls results in the same single object instance stored. As
+object `v` modified between calls results in the same single object instance stored. As
 opposed to copies of `v` being created when the node owning `rc` is a different node.
 
 It is to be noted that this is generally not an issue. It is something to be factored in only
-if the object is both being stored locally and modifed post the call. In such cases it may be
+if the object is both being stored locally and modified post the call. In such cases it may be
 appropriate to store a `deepcopy` of the object.
 
 This is also true for remotecalls on the local node as seen in the following example:
@@ -1409,4 +1409,4 @@ mpirun -np 4 ./julia example.jl
     patterns. For additional information on the latest MPI standard, see <https://mpi-forum.org/docs>.
 
 [^2]:
-    [Julia GPU man pages](http://juliagpu.github.io/CUDAnative.jl/stable/man/usage.html#Julia-support-1)
+    [Julia GPU man pages](https://juliagpu.github.io/CUDAnative.jl/stable/man/usage.html#Julia-support-1)
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index 77a3e78dfd970..f537d191dd055 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -214,7 +214,7 @@ by typing `?` followed by the name of a function or macro, and pressing `Enter`.
 ```
 
 will show documentation for the relevant function, macro or string macro respectively. In
-[Juno](http://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
+[Juno](https://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
 under the cursor.
 
 ## Functions & Methods
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index efdd0c56895a1..08d00f3a8cae4 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -239,7 +239,7 @@ affinitized. Otherwise, Julia lets the operating system handle thread policy.
 
 Environment variables that determine how REPL output should be formatted at the
 terminal. Generally, these variables should be set to [ANSI terminal escape
-sequences](http://ascii-table.com/ansi-escape-sequences.php). Julia provides
+sequences](https://en.wikipedia.org/wiki/ANSI_escape_code). Julia provides
 a high-level interface with much of the same functionality; see the section on
 [The Julia REPL](@ref).
 
@@ -334,7 +334,7 @@ event listener for just-in-time (JIT) profiling.
     profiling support, using either
     * Intel's [VTune™ Amplifier](https://software.intel.com/en-us/vtune)
       (`USE_INTEL_JITEVENTS` set to `1` in the build configuration), or
-    * [OProfile](http://oprofile.sourceforge.net/news/) (`USE_OPROFILE_JITEVENTS` set to `1`
+    * [OProfile](https://oprofile.sourceforge.io/news/) (`USE_OPROFILE_JITEVENTS` set to `1`
       in the build configuration).
     * [Perf](https://perf.wiki.kernel.org) (`USE_PERF_JITEVENTS` set to `1`
       in the build configuration). This integration is enabled by default.
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index b7ccf92939a5e..813587359810e 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -834,9 +834,10 @@ no values and no subtypes (except itself). You will generally not need to use th
 
 ### Why does `x += y` allocate memory when `x` and `y` are arrays?
 
-In Julia, `x += y` gets replaced during parsing by `x = x + y`. For arrays, this has the consequence
+In Julia, `x += y` gets replaced during lowering by `x = x + y`. For arrays, this has the consequence
 that, rather than storing the result in the same location in memory as `x`, it allocates a new
-array to store the result.
+array to store the result. If you prefer to mutate `x`, use `x .+= y` to update each element
+individually.
 
 While this behavior might surprise some, the choice is deliberate. The main reason is the presence
 of immutable objects within Julia, which cannot change their value once created.  Indeed, a
@@ -869,8 +870,8 @@ After a call like `x = 5; y = power_by_squaring(x, 4)`, you would get the expect
     `x`, after the call you'd have (in general) `y != x`, but for mutable `x` you'd have `y == x`.
 
 Because supporting generic programming is deemed more important than potential performance optimizations
-that can be achieved by other means (e.g., using explicit loops), operators like `+=` and `*=`
-work by rebinding new values.
+that can be achieved by other means (e.g., using broadcasting or explicit loops), operators like `+=` and
+`*=` work by rebinding new values.
 
 ## [Asynchronous IO and concurrent synchronous writes](@id faq-async-io)
 
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index 5fbca52bbfaad..a6a42d05d44b5 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -74,11 +74,11 @@ and the `::Integer` specification means that it will only be callable when `n` i
 
 Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller.   For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
 
-* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments.  For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet's_formula) to extend it to non-integer values.
+* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments.  For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
 * **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types.  For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
 * **Clarity:** Type declarations can serve as a form of documentation about the expected arguments.
 
-However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate.    For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)).  If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason.   In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**.  You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
+However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate.    For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)).  If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason.   In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**.  You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
 
 ## The `return` Keyword
 
@@ -481,7 +481,7 @@ Instead of destructuring based on iteration, the right side of assignments can a
 This follows the syntax for NamedTuples, and works by assigning to each variable on the left a
 property of the right side of the assignment with the same name using `getproperty`:
 
-```julia
+```jldoctest
 julia> (; b, a) = (a=1, b=2, c=3)
 (a = 1, b = 2, c = 3)
 
@@ -498,7 +498,7 @@ The destructuring feature can also be used within a function argument.
 If a function argument name is written as a tuple (e.g. `(x, y)`) instead of just
 a symbol, then an assignment `(x, y) = argument` will be inserted for you:
 
-```julia
+```julia-repl
 julia> minmax(x, y) = (y < x) ? (y, x) : (x, y)
 
 julia> gap((min, max)) = max - min
@@ -512,7 +512,7 @@ would be a two-argument function, and this example would not work.
 
 Similarly, property destructuring can also be used for function arguments:
 
-```julia
+```julia-repl
 julia> foo((; x, y)) = x + y
 foo (generic function with 1 method)
 
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index da8f3f985335f..75f630091b6c1 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -102,6 +102,21 @@ command-line-options).
 
 ## Resources
 
-A curated list of useful learning resources to help new users get started can be found on the [learning](https://julialang.org/learning/) page of the main Julia web site.
+A curated list of useful learning resources to help new users get started can be found on the [learning](https://julialang.org/learning/) page of the main Julia website.
+
+You can use the REPL as a learning resource by switching into the help mode.
+Switch to help mode by pressing `?` at an empty `julia> ` prompt, before typing
+anything else. Typing a keyword in help mode will fetch the documentation for
+it, along with examples. Similarly for most functions or other objects you
+might encounter!
+
+```
+help?> begin
+search: begin disable_sigint reenable_sigint
+
+  begin
+
+  begin...end denotes a block of code.
+```
 
 If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips).
diff --git a/doc/src/manual/handling-operating-system-variation.md b/doc/src/manual/handling-operating-system-variation.md
index 026d7df26cedd..d8dc3abd93d7f 100644
--- a/doc/src/manual/handling-operating-system-variation.md
+++ b/doc/src/manual/handling-operating-system-variation.md
@@ -27,13 +27,15 @@ Complex blocks:
 ```julia
 @static if Sys.islinux()
     linux_specific_thing(a)
+elseif Sys.isapple()
+    apple_specific_thing(a)
 else
     generic_thing(a)
 end
 ```
 
-When chaining conditionals (including `if`/`elseif`/`end`), the `@static` must be repeated for
-each level (parentheses optional, but recommended for readability):
+When nesting conditionals, the `@static` must be repeated for each level
+(parentheses optional, but recommended for readability):
 
 ```julia
 @static Sys.iswindows() ? :a : (@static Sys.isapple() ? :b : :c)
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 8ba962da27184..24c7a8c5a0eeb 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -173,8 +173,18 @@ UInt128
 As for hexadecimal literals, binary and octal literals produce unsigned integer types. The size
 of the binary data item is the minimal needed size, if the leading digit of the literal is not
 `0`. In the case of leading zeros, the size is determined by the minimal needed size for a
-literal, which has the same length but leading digit `1`. That allows the user to control
-the size.
+literal, which has the same length but leading digit `1`. It means that:
+
+- `0x1` and `0x12` are `UInt8` literals,
+- `0x123` and `0x1234` are `UInt16` literals,
+- `0x12345` and `0x12345678` are `UInt32` literals,
+- `0x123456789` and `0x1234567890adcdef` are `UInt64` literals, etc.
+
+Even if there are leading zero digits which don’t contribute to the value, they count for
+determining storage size of a literal. So `0x01` is a `UInt8` while `0x0001` is a `UInt16`.
+
+That allows the user to control the size.
+
 Values which cannot be stored in `UInt128` cannot be written as such literals.
 
 Binary, octal, and hexadecimal literals may be signed by a `-` immediately preceding the
@@ -534,7 +544,7 @@ most books on scientific computation, and also in the following references:
     abstraction of real numbers.
   * Also recommended is Bruce Dawson's [series of blog posts on floating-point numbers](https://randomascii.wordpress.com/2012/05/20/thats-not-normalthe-performance-of-odd-floats/).
   * For an excellent, in-depth discussion of floating-point numbers and issues of numerical accuracy
-    encountered when computing with them, see David Goldberg's paper [What Every Computer Scientist Should Know About Floating-Point Arithmetic](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.22.6768&rep=rep1&type=pdf).
+    encountered when computing with them, see David Goldberg's paper [What Every Computer Scientist Should Know About Floating-Point Arithmetic](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.22.6768&rep=rep1&type=pdf).
   * For even more extensive documentation of the history of, rationale for, and issues with floating-point
     numbers, as well as discussion of many other topics in numerical computing, see the [collected writings](https://people.eecs.berkeley.edu/~wkahan/)
     of [William Kahan](https://en.wikipedia.org/wiki/William_Kahan), commonly known as the "Father
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index 7333434b87afb..b2ac237dab1a4 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -17,6 +17,7 @@ to generically build upon those behaviors.
 | `eltype(IterType)`             | `Any`                  | The type of the first entry of the tuple returned by `iterate()`                      |
 | `length(iter)`                 | (*undefined*)          | The number of items, if known                                                         |
 | `size(iter, [dim])`            | (*undefined*)          | The number of items in each dimension, if known                                       |
+| `Base.isdone(iter[, state])`   | `missing`              | Fast-path hint for iterator completion. Should be defined for mutable iterators, or else `isempty(iter)` will call `iterate(iter[, state])` and may mutate the iterator. |
 
 | Value returned by `IteratorSize(IterType)` | Required Methods                           |
 |:------------------------------------------ |:------------------------------------------ |
@@ -238,7 +239,7 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 | `similar(A, dims::Dims)`                        | `similar(A, eltype(A), dims)`          | Return a mutable array with the same element type and size *dims*                     |
 | `similar(A, ::Type{S}, dims::Dims)`             | `Array{S}(undef, dims)`                | Return a mutable array with the specified element type and size                       |
 | **Non-traditional indices**                     | **Default definition**                 | **Brief description**                                                                 |
-| `axes(A)`                                    | `map(OneTo, size(A))`                  | Return the a tuple of `AbstractUnitRange{<:Integer}` of valid indices                    |
+| `axes(A)`                                    | `map(OneTo, size(A))`                  | Return a tuple of `AbstractUnitRange{<:Integer}` of valid indices                    |
 | `similar(A, ::Type{S}, inds)`              | `similar(A, S, Base.to_shape(inds))`   | Return a mutable array with the specified indices `inds` (see below)                  |
 | `similar(T::Union{Type,Function}, inds)`   | `T(Base.to_shape(inds))`               | Return an array similar to `T` with the specified indices `inds` (see below)          |
 
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index a94d674501e04..1985292d66611 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -41,7 +41,7 @@ for structuring and organizing programs.
     it can be omitted altogether, writing just `meth(arg1,arg2)`, with `this` implied as the receiving
     object.
 !!! note
-    All the examples in this chapter assume that you are defining modules for a function in the *same*
+    All the examples in this chapter assume that you are defining methods for a function in the *same*
     module. If you want to add methods to a function in *another* module, you have to `import` it or
     use the name qualified with module names. See the section on [namespace management](@ref
     namespace-management).
diff --git a/doc/src/manual/missing.md b/doc/src/manual/missing.md
index d2046c5ede9c3..4c6d36c7381b2 100644
--- a/doc/src/manual/missing.md
+++ b/doc/src/manual/missing.md
@@ -1,7 +1,7 @@
 # [Missing Values](@id missing)
 
-Julia provides support for representing missing values in the statistical sense,
-that is for situations where no value is available for a variable in an observation,
+Julia provides support for representing missing values in the statistical sense.
+This is for situations where no value is available for a variable in an observation,
 but a valid value theoretically exists.
 Missing values are represented via the [`missing`](@ref) object, which is the
 singleton instance of the type [`Missing`](@ref). `missing` is equivalent to
@@ -15,7 +15,7 @@ and behaves like them in most situations.
 operators and functions.
 For these functions, uncertainty about the value of one of the operands
 induces uncertainty about the result. In practice, this means a math operation
-involving a `missing` value generally returns `missing`
+involving a `missing` value generally returns `missing`:
 ```jldoctest
 julia> missing + 1
 missing
@@ -27,14 +27,16 @@ julia> abs(missing)
 missing
 ```
 
-As `missing` is a normal Julia object, this propagation rule only works
+Since `missing` is a normal Julia object, this propagation rule only works
 for functions which have opted in to implement this behavior. This can be
-achieved either via a specific method defined for arguments of type `Missing`,
-or simply by accepting arguments of this type, and passing them to functions
-which propagate them (like standard math operators). Packages should consider
+achieved by:
+ - adding a specific method defined for arguments of type `Missing`,
+ - accepting arguments of this type, and passing them to functions
+   which propagate them (like standard math operators).
+Packages should consider
 whether it makes sense to propagate missing values when defining new functions,
-and define methods appropriately if that is the case. Passing a `missing` value
-to a function for which no method accepting arguments of type `Missing` is defined
+and define methods appropriately if this is the case. Passing a `missing` value
+to a function which does not have a method accepting arguments of type `Missing`
 throws a [`MethodError`](@ref), just like for any other type.
 
 Functions that do not propagate `missing` values can be made to do so by wrapping
@@ -46,7 +48,8 @@ For example, `f(x)` becomes `passmissing(f)(x)`.
 
 Standard equality and comparison operators follow the propagation rule presented
 above: if any of the operands is `missing`, the result is `missing`.
-Here are a few examples
+Here are a few examples:
+
 ```jldoctest
 julia> missing == 1
 missing
@@ -66,9 +69,10 @@ be used to test whether a value is missing. To test whether `x` is `missing`,
 use [`ismissing(x)`](@ref).
 
 Special comparison operators [`isequal`](@ref) and [`===`](@ref) are exceptions
-to the propagation rule: they always return a `Bool` value, even in the presence
+to the propagation rule. They will always return a `Bool` value, even in the presence
 of `missing` values, considering `missing` as equal to `missing` and as different
-from any other value. They can therefore be used to test whether a value is `missing`
+from any other value. They can therefore be used to test whether a value is `missing`:
+
 ```jldoctest
 julia> missing === 1
 false
@@ -85,7 +89,8 @@ true
 
 The [`isless`](@ref) operator is another exception: `missing` is considered
 as greater than any other value. This operator is used by [`sort`](@ref),
-which therefore places `missing` values after all other values.
+which therefore places `missing` values after all other values:
+
 ```jldoctest
 julia> isless(1, missing)
 true
@@ -100,18 +105,19 @@ false
 ## Logical operators
 
 Logical (or boolean) operators [`|`](@ref), [`&`](@ref) and [`xor`](@ref) are
-another special case, as they only propagate `missing` values when it is logically
-required. For these operators, whether or not the result is uncertain depends
-on the particular operation, following the well-established rules of
+another special case since they only propagate `missing` values when it is logically
+required. For these operators, whether or not the result is uncertain, depends
+on the particular operation. This follows the well-established rules of
 [*three-valued logic*](https://en.wikipedia.org/wiki/Three-valued_logic) which are
-also implemented by `NULL` in SQL and `NA` in R. This abstract definition actually
+implemented by e.g. `NULL` in SQL and `NA` in R. This abstract definition
 corresponds to a relatively natural behavior which is best explained
 via concrete examples.
 
 Let us illustrate this principle with the logical "or" operator [`|`](@ref).
 Following the rules of boolean logic, if one of the operands is `true`,
 the value of the other operand does not have an influence on the result,
-which will always be `true`
+which will always be `true`:
+
 ```jldoctest
 julia> true | true
 true
@@ -123,12 +129,13 @@ julia> false | true
 true
 ```
 
-Based on this observation, we can conclude that if one of the operands is `true`
+Based on this observation, we can conclude if one of the operands is `true`
 and the other `missing`, we know that the result is `true` in spite of the
 uncertainty about the actual value of one of the operands. If we had
 been able to observe the actual value of the second operand, it could only be
 `true` or `false`, and in both cases the result would be `true`. Therefore,
-in this particular case, missingness does *not* propagate
+in this particular case, missingness does *not* propagate:
+
 ```jldoctest
 julia> true | missing
 true
@@ -139,7 +146,8 @@ true
 
 On the contrary, if one of the operands is `false`, the result could be either
 `true` or `false` depending on the value of the other operand. Therefore,
-if that operand is `missing`, the result has to be `missing` too
+if that operand is `missing`, the result has to be `missing` too:
+
 ```jldoctest
 julia> false | true
 true
@@ -160,7 +168,8 @@ missing
 The behavior of the logical "and" operator [`&`](@ref) is similar to that of the
 `|` operator, with the difference that missingness does not propagate when
 one of the operands is `false`. For example, when that is the case of the first
-operand
+operand:
+
 ```jldoctest
 julia> false & false
 false
@@ -173,7 +182,8 @@ false
 ```
 
 On the other hand, missingness propagates when one of the operands is `true`,
-for example the first one
+for example the first one:
+
 ```jldoctest
 julia> true & true
 true
@@ -188,16 +198,17 @@ missing
 Finally, the "exclusive or" logical operator [`xor`](@ref) always propagates
 `missing` values, since both operands always have an effect on the result.
 Also note that the negation operator [`!`](@ref) returns `missing` when the
-operand is `missing` just like other unary operators.
+operand is `missing`, just like other unary operators.
 
 ## Control Flow and Short-Circuiting Operators
 
 Control flow operators including [`if`](@ref), [`while`](@ref) and the
 [ternary operator](@ref man-conditional-evaluation) `x ? y : z`
 do not allow for missing values. This is because of the uncertainty about whether
-the actual value would be `true` or `false` if we could observe it,
-which implies that we do not know how the program should behave. A [`TypeError`](@ref)
-is thrown as soon as a `missing` value is encountered in this context
+the actual value would be `true` or `false` if we could observe it.
+This implies we do not know how the program should behave. In this case, a
+[`TypeError`](@ref) is thrown as soon as a `missing` value is encountered in this context:
+
 ```jldoctest
 julia> if missing
            println("here")
@@ -208,7 +219,8 @@ ERROR: TypeError: non-boolean (Missing) used in boolean context
 For the same reason, contrary to logical operators presented above,
 the short-circuiting boolean operators [`&&`](@ref) and [`||`](@ref) do not
 allow for `missing` values in situations where the value of the operand
-determines whether the next operand is evaluated or not. For example
+determines whether the next operand is evaluated or not. For example:
+
 ```jldoctest
 julia> missing || false
 ERROR: TypeError: non-boolean (Missing) used in boolean context
@@ -220,10 +232,11 @@ julia> true && missing && false
 ERROR: TypeError: non-boolean (Missing) used in boolean context
 ```
 
-On the other hand, no error is thrown when the result can be determined without
+In contrast, there is no error thrown when the result can be determined without
 the `missing` values. This is the case when the code short-circuits
 before evaluating the `missing` operand, and when the `missing` operand is the
-last one
+last one:
+
 ```jldoctest
 julia> true && missing
 missing
@@ -234,7 +247,8 @@ false
 
 ## Arrays With Missing Values
 
-Arrays containing missing values can be created like other arrays
+Arrays containing missing values can be created like other arrays:
+
 ```jldoctest
 julia> [1, missing]
 2-element Vector{Union{Missing, Int64}}:
@@ -243,7 +257,7 @@ julia> [1, missing]
 ```
 
 As this example shows, the element type of such arrays is `Union{Missing, T}`,
-with `T` the type of the non-missing values. This simply reflects the fact that
+with `T` the type of the non-missing values. This reflects the fact that
 array entries can be either of type `T` (here, `Int64`) or of type `Missing`.
 This kind of array uses an efficient memory storage equivalent to an `Array{T}`
 holding the actual values combined with an `Array{UInt8}` indicating the type
@@ -252,6 +266,7 @@ of the entry (i.e. whether it is `Missing` or `T`).
 Arrays allowing for missing values can be constructed with the standard syntax.
 Use `Array{Union{Missing, T}}(missing, dims)` to create arrays filled with
 missing values:
+
 ```jldoctest
 julia> Array{Union{Missing, String}}(missing, 2, 3)
 2×3 Matrix{Union{Missing, String}}:
@@ -264,10 +279,12 @@ julia> Array{Union{Missing, String}}(missing, 2, 3)
     `missing`, but this is not the correct way to obtain such an array.
     Use a `missing` constructor as shown above instead.
 
-An array allowing for `missing` values but which does not contain any such value
-can be converted back to an array which does not allow for missing values using
+An array with element type allowing `missing` entries (e.g. `Vector{Union{Missing, T}}`)
+which does not contain any `missing` entries can be converted to an array type that does
+not allow for `missing` entries (e.g. `Vector{T}`) using
 [`convert`](@ref). If the array contains `missing` values, a `MethodError` is thrown
-during conversion
+during conversion:
+
 ```jldoctest
 julia> x = Union{Missing, String}["a", "b"]
 2-element Vector{Union{Missing, String}}:
@@ -287,23 +304,27 @@ julia> y = Union{Missing, String}[missing, "b"]
 julia> convert(Array{String}, y)
 ERROR: MethodError: Cannot `convert` an object of type Missing to an object of type String
 ```
+
 ## Skipping Missing Values
 
 Since `missing` values propagate with standard mathematical operators, reduction
-functions return `missing` when called on arrays which contain missing values
+functions return `missing` when called on arrays which contain missing values:
+
 ```jldoctest
 julia> sum([1, missing])
 missing
 ```
 
-In this situation, use the [`skipmissing`](@ref) function to skip missing values
+In this situation, use the [`skipmissing`](@ref) function to skip missing values:
+
 ```jldoctest
 julia> sum(skipmissing([1, missing]))
 1
 ```
 
 This convenience function returns an iterator which filters out `missing` values
-efficiently. It can therefore be used with any function which supports iterators
+efficiently. It can therefore be used with any function which supports iterators:
+
 ```jldoctest skipmissing; setup = :(using Statistics)
 julia> x = skipmissing([3, missing, 2, 1])
 skipmissing(Union{Missing, Int64}[3, missing, 2, 1])
@@ -320,8 +341,9 @@ julia> mapreduce(sqrt, +, x)
 
 Objects created by calling `skipmissing` on an array can be indexed using indices
 from the parent array. Indices corresponding to missing values are not valid for
-these objects and an error is thrown when trying to use them (they are also skipped
-by `keys` and `eachindex`)
+these objects, and an error is thrown when trying to use them (they are also skipped
+by `keys` and `eachindex`):
+
 ```jldoctest skipmissing
 julia> x[1]
 3
@@ -332,9 +354,10 @@ ERROR: MissingException: the value at index (2,) is missing
 ```
 
 This allows functions which operate on indices to work in combination with `skipmissing`.
-This is notably the case for search and find functions, which return indices
-valid for the object returned by `skipmissing` which are also the indices of the
-matching entries *in the parent array*
+This is notably the case for search and find functions. These functions return indices
+valid for the object returned by `skipmissing`, and are also the indices of the
+matching entries *in the parent array*:
+
 ```jldoctest skipmissing
 julia> findall(==(1), x)
 1-element Vector{Int64}:
@@ -347,7 +370,8 @@ julia> argmax(x)
 1
 ```
 
-Use [`collect`](@ref) to extract non-`missing` values and store them in an array
+Use [`collect`](@ref) to extract non-`missing` values and store them in an array:
+
 ```jldoctest skipmissing
 julia> collect(x)
 3-element Vector{Int64}:
@@ -362,9 +386,10 @@ The three-valued logic described above for logical operators is also used
 by logical functions applied to arrays. Thus, array equality tests using
 the [`==`](@ref) operator return `missing` whenever the result cannot be
 determined without knowing the actual value of the `missing` entry. In practice,
-this means that `missing` is returned if all non-missing values of the compared
+this means `missing` is returned if all non-missing values of the compared
 arrays are equal, but one or both arrays contain missing values (possibly at
-different positions)
+different positions):
+
 ```jldoctest
 julia> [1, missing] == [2, missing]
 false
@@ -377,7 +402,8 @@ missing
 ```
 
 As for single values, use [`isequal`](@ref) to treat `missing` values as equal
-to other `missing` values but different from non-missing values
+to other `missing` values, but different from non-missing values:
+
 ```jldoctest
 julia> isequal([1, missing], [1, missing])
 true
@@ -387,7 +413,8 @@ false
 ```
 
 Functions [`any`](@ref) and [`all`](@ref) also follow the rules of
-three-valued logic, returning `missing` when the result cannot be determined
+three-valued logic. Thus, returning `missing` when the result cannot be determined:
+
 ```jldoctest
 julia> all([true, missing])
 missing
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index 7d3304810a428..d8242d1749f2d 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -189,7 +189,7 @@ nice(::Cat) = "nice 😸"
 ```
 
 Which one you choose is a matter of style. The first form makes it clear that you are adding a
-method to a function in another module (remember, that the imports and the method defintion may be
+method to a function in another module (remember, that the imports and the method definition may be
 in separate files), while the second one is shorter, which is especially convenient if you are
 defining multiple methods.
 
@@ -205,7 +205,7 @@ For example, `Base` exports the function name `read`, but the CSV.jl package als
 If we are going to invoke CSV reading many times, it would be convenient to drop the `CSV.` qualifier.
 But then it is ambiguous whether we are referring to `Base.read` or `CSV.read`:
 
-```julia
+```julia-repl
 julia> read;
 
 julia> import CSV: read
@@ -214,7 +214,7 @@ WARNING: ignoring conflicting import of CSV.read into Main
 
 Renaming provides a solution:
 
-```julia
+```julia-repl
 julia> import CSV: read as rd
 ```
 
@@ -303,7 +303,7 @@ include(p) = Base.include(Mod, p)
 end
 ```
 
-If even `Core` is not wanted, a module that imports nothing at all can be defined with `Module(:YourNameHere, false, false)` and code can be evaluated into it with [`@eval`](@ref) or [`Core.eval`](@ref).
+If even `Core` is not wanted, a module that imports nothing and defines no names at all can be defined with `Module(:YourNameHere, false, false)` and code can be evaluated into it with [`@eval`](@ref) or [`Core.eval`](@ref).
 
 ### Standard modules
 
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index 658bec21bbfb9..16bf35bb42fd6 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -18,10 +18,16 @@ The number of execution threads is controlled either by using the
 [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) environment variable. When both are
 specified, then `-t`/`--threads` takes precedence.
 
+The number of threads can either be specified as an integer (`--threads=4`) or as `auto`
+(`--threads=auto`), where `auto` sets the number of threads to the number of local CPU
+threads.
+
 !!! compat "Julia 1.5"
     The `-t`/`--threads` command line argument requires at least Julia 1.5.
     In older versions you must use the environment variable instead.
 
+!!! compat "Julia 1.7"
+    Using `auto` together with the environment variable `JULIA_NUM_THREADS` requires at least Julia 1.7.
 Lets start Julia with 4 threads:
 
 ```bash
@@ -244,11 +250,14 @@ which will later be published formally.
 
 Any field in a struct declaration can be decorated with `@atomic`, and then any
 write must be marked with `@atomic` also, and must use one of the defined atomic
-orderings (:monotonic, :acquire, :release, :acquire\_release, or
-:sequentially\_consistent). Any read of an atomic field can also be annotated
+orderings (`:monotonic`, `:acquire`, `:release`, `:acquire_release`, or
+`:sequentially_consistent`). Any read of an atomic field can also be annotated
 with an atomic ordering constraint, or will be done with monotonic (relaxed)
 ordering if unspecified.
 
+!!! compat "Julia 1.7"
+    Per-field atomics requires at least Julia 1.7.
+
 
 ## Side effects and mutable function arguments
 
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index 0bdef1b338925..fc62632433850 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -351,3 +351,68 @@ Finished connection to google.com
 Finished connection to julialang.org
 Finished connection to github.com
 ```
+
+## Multicast
+
+Julia supports [multicast](https://datatracker.ietf.org/doc/html/rfc1112) over IPv4 and IPv6 using the User Datagram Protocol ([UDP](https://datatracker.ietf.org/doc/html/rfc768)) as transport.
+
+Unlike the Transmission Control Protocol ([TCP](https://datatracker.ietf.org/doc/html/rfc793)), UDP makes almost no assumptions about the needs of the application.
+TCP provides flow control (it accelerates and decelerates to maximize throughput), reliability (lost or corrupt packets are automatically retransmitted), sequencing (packets are ordered by the operating system before they are given to the application), segment size, and session setup and teardown.
+UDP provides no such features.
+
+A common use for UDP is in multicast applications.
+TCP is a stateful protocol for communication between exactly two devices.
+UDP can use special multicast addresses to allow simultaneous communication between many devices.
+
+### Receiving IP Multicast Packets
+
+To transmit data over UDP multicast, simply `recv` on the socket, and the first packet received will be returned. Note that it may not be the first packet that you sent however!
+
+```
+using Sockets
+group = ip"228.5.6.7"
+socket = Sockets.UDPSocket()
+bind(socket, ip"0.0.0.0", 6789)
+join_multicast_group(socket, group)
+println(String(recv(socket)))
+leave_multicast_group(socket, group)
+close(socket)
+```
+
+### Sending IP Multicast Packets
+
+To transmit data over UDP multicast, simply `send` to the socket.
+Notice that it is not necessary for a sender to join the multicast group.
+
+```
+using Sockets
+group = ip"228.5.6.7"
+socket = Sockets.UDPSocket()
+send(socket, group, 6789, "Hello over IPv4")
+close(socket)
+```
+
+### IPv6 Example
+
+This example gives the same functionality as the previous program, but uses IPv6 as the network-layer protocol.
+
+Listener:
+```
+using Sockets
+group = Sockets.IPv6("ff05::5:6:7")
+socket = Sockets.UDPSocket()
+bind(socket, Sockets.IPv6("::"), 6789)
+join_multicast_group(socket, group)
+println(String(recv(socket)))
+leave_multicast_group(socket, group)
+close(socket)
+```
+
+Sender:
+```
+using Sockets
+group = Sockets.IPv6("ff05::5:6:7")
+socket = Sockets.UDPSocket()
+send(socket, group, 6789, "Hello over IPv6")
+close(socket)
+```
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index 270489ed7a9ee..9825c34cc62b9 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -163,6 +163,9 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, a range like `a:b` is not shorthand for a vector like in R, but is a specialized `AbstractRange`
     object that is used for iteration. To convert a range into a vector, use
     [`collect(a:b)`](@ref).
+  * The `:` operator has a different precedence in R and Julia. In particular, in Julia arithmetic operators
+    have higher precedence than the `:` operator, whereas the reverse is true in R. For example, `1:n-1` in
+    Julia is equivalent to `1:(n-1)` in R.
   * Julia's [`max`](@ref) and [`min`](@ref) are the equivalent of `pmax` and `pmin` respectively
     in R, but both arguments need to have the same dimensions.  While [`maximum`](@ref) and [`minimum`](@ref)
     replace `max` and `min` in R, there are important differences.
@@ -207,10 +210,13 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, indexing of arrays, strings, etc. is 1-based not 0-based.
   * Julia's slice indexing includes the last element, unlike in Python. `a[2:3]` in Julia is `a[1:3]`
     in Python.
-  * Julia does not support negative indices. In particular, the last element of a list or array is
-    indexed with `end` in Julia, not `-1` as in Python.
+  * Unlike Python, Julia allows [AbstractArrays with arbitrary indexes](https://julialang.org/blog/2017/04/offset-arrays/).
+    Python's special interpretation of negative indexing, `a[-1]` and `a[-2]`, should be written
+    `a[end]` and `a[end-1]` in Julia.
   * Julia requires `end` for indexing until the last element. `x[1:]` in Python is equivalent to `x[2:end]` in Julia.
   * Julia's range indexing has the format of `x[start:step:stop]`, whereas Python's format is `x[start:(stop+1):step]`. Hence, `x[0:10:2]` in Python is equivalent to `x[1:2:10]` in Julia. Similarly, `x[::-1]` in Python, which refers to the reversed array, is equivalent to `x[end:-1:1]` in Julia.
+  * In Julia, ranges can be constructed independently as `start:step:stop`, the same syntax it uses
+    in array-indexing.  The `range` function is also supported.
   * In Julia, indexing a matrix with arrays like `X[[1,2], [1,3]]` refers to a sub-matrix that contains the intersections of the first and second rows with the first and third columns. In Python, `X[[1,2], [1,3]]` refers to a vector that contains the values of cell `[1,1]` and `[2,3]` in the matrix. `X[[1,2], [1,3]]` in Julia is equivalent with `X[np.ix_([0,1],[0,2])]` in Python. `X[[0,1], [0,2]]` in Python is equivalent with `X[[CartesianIndex(1,1), CartesianIndex(2,3)]]` in Julia.
   * Julia has no line continuation syntax: if, at the end of a line, the input so far is a complete
     expression, it is considered done; otherwise the input continues. One way to force an expression
@@ -246,10 +252,12 @@ For users coming to Julia from R, these are some noteworthy differences:
   * The logical Julia program structure (Packages and Modules) is independent of the file structure (`include` for additional files), whereas the Python code structure is defined by directories (Packages) and files (Modules).
   * The ternary operator `x > 0 ? 1 : -1` in Julia corresponds to a conditional expression in Python `1 if x > 0 else -1`.
   * In Julia the `@` symbol refers to a macro, whereas in Python it refers to a decorator.
-  * Exception handling in Julia is done using `try` — `catch` — `finally`, instead of `try` — `except` — `finally`. In contrast to Python, it is not recommended to use exception handling as part of the normal workflow in Julia due to performance reasons.
+  * Exception handling in Julia is done using `try` — `catch` — `finally`, instead of `try` — `except` — `finally`. In contrast to Python, it is not recommended to use exception handling as part of the normal workflow in Julia (compared with Python, Julia is faster at ordinary control flow but slower at exception-catching).
   * In Julia loops are fast, there is no need to write "vectorized" code for performance reasons.
   * Be careful with non-constant global variables in Julia, especially in tight loops. Since you can write close-to-metal code in Julia (unlike Python), the effect of globals can be drastic (see [Performance Tips](@ref man-performance-tips)).
-  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`.
+  * In Julia, rounding and truncation are explicit. Python's `int(3.7)` should be `floor(Int, 3.7)` or `Int(floor(3.7))` and is distinguished from `round(Int, 3.7)`. `floor(x)` and `round(x)` on their own return an integer value of the same type as `x` rather than always returning `Int`.
+  * In Julia, parsing is explicit. Python's `float("3.7")` would be `parse(Float64, "3.7")` in Julia.
+  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`.  Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
   * In Julia, a new local scope is introduced by most code blocks, including loops and `try` — `catch` — `finally`. Note that comprehensions (list, generator, etc.) introduce a new local scope both in Python and Julia, whereas `if` blocks do not introduce a new local scope in both languages.
 
 ## Noteworthy differences from C/C++
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index da9bf8fd1bf46..7fa52d5c546e6 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -13,10 +13,9 @@ The functions should take arguments, instead of operating directly on global var
 
 ## Avoid global variables
 
-A global variable might have its value, and therefore its type, change at any point. This makes
-it difficult for the compiler to optimize code using global variables. Variables should be local,
-or passed as arguments to functions, whenever possible.
-
+A global variable might have its value, and therefore possibly its type, changed at any point. This makes
+it difficult for the compiler to optimize code using global variables. This also applies to type-valued variables,
+i.e. type aliases on the global level. Variables should be local, or passed as arguments to functions, whenever possible.
 
 We find that global names are frequently constants, and declaring them as such greatly improves
 performance:
@@ -77,12 +76,12 @@ julia> function sum_global()
        end;
 
 julia> @time sum_global()
-  0.026328 seconds (9.30 k allocations: 416.747 KiB, 36.50% gc time, 99.48% compilation time)
-508.39048990953665
+  0.011539 seconds (9.08 k allocations: 373.386 KiB, 98.69% compilation time)
+523.0007221951678
 
 julia> @time sum_global()
-  0.000075 seconds (3.49 k allocations: 70.156 KiB)
-508.39048990953665
+  0.000091 seconds (3.49 k allocations: 70.156 KiB)
+523.0007221951678
 ```
 
 On the first call (`@time sum_global()`) the function gets compiled. (If you've not yet used [`@time`](@ref)
@@ -113,12 +112,12 @@ julia> function sum_arg(x)
        end;
 
 julia> @time sum_arg(x)
-  0.010298 seconds (4.23 k allocations: 226.021 KiB, 99.81% compilation time)
-508.39048990953665
+  0.007551 seconds (3.98 k allocations: 200.548 KiB, 99.77% compilation time)
+523.0007221951678
 
 julia> @time sum_arg(x)
-  0.000005 seconds (1 allocation: 16 bytes)
-508.39048990953665
+  0.000006 seconds (1 allocation: 16 bytes)
+523.0007221951678
 ```
 
 The 1 allocation seen is from running the `@time` macro itself in global scope. If we instead run
@@ -128,8 +127,8 @@ the timing in a function, we can see that indeed no allocations are performed:
 julia> time_sum(x) = @time sum_arg(x);
 
 julia> time_sum(x)
-  0.000001 seconds
-508.39048990953665
+  0.000002 seconds
+523.0007221951678
 ```
 
 In some situations, your function may need to allocate memory as part of its operation, and this
@@ -325,7 +324,7 @@ Float32
 
 For all practical purposes, such objects behave identically to those of `MyStillAmbiguousType`.
 
-It's quite instructive to compare the sheer amount code generated for a simple function
+It's quite instructive to compare the sheer amount of code generated for a simple function
 
 ```julia
 func(m::MyType) = m.a+1
@@ -342,6 +341,14 @@ For reasons of length the results are not shown here, but you may wish to try th
 the type is fully-specified in the first case, the compiler doesn't need to generate any code
 to resolve the type at run-time. This results in shorter and faster code.
 
+One should also keep in mind that not-fully-parameterized types behave like abstract types. For example, even though a fully specified `Array{T,n}` is concrete, `Array` itself with no parameters given is not concrete:
+
+```jldoctest myambig3
+julia> !isconcretetype(Array), !isabstracttype(Array), isstructtype(Array), !isconcretetype(Array{Int}), isconcretetype(Array{Int,1})
+(true, true, true, true, true)
+```
+In this case, it would be better to avoid declaring `MyType` with a field `a::Array` and instead declare the field as `a::Array{T,N}` or as `a::A`, where `{T,N}` or `A` are parameters of `MyType`.
+
 ### Avoid fields with abstract containers
 
 The same best practices also work for container types:
@@ -354,6 +361,10 @@ julia> struct MySimpleContainer{A<:AbstractVector}
 julia> struct MyAmbiguousContainer{T}
            a::AbstractVector{T}
        end
+
+julia> struct MyAlsoAmbiguousContainer
+           a::Array
+       end
 ```
 
 For example:
@@ -378,6 +389,17 @@ julia> b = MyAmbiguousContainer([1:3;]);
 
 julia> typeof(b)
 MyAmbiguousContainer{Int64}
+
+julia> d = MyAlsoAmbiguousContainer(1:3);
+
+julia> typeof(d), typeof(d.a)
+(MyAlsoAmbiguousContainer, Vector{Int64})
+
+julia> d = MyAlsoAmbiguousContainer(1:1.0:3);
+
+julia> typeof(d), typeof(d.a)
+(MyAlsoAmbiguousContainer, Vector{Float64})
+
 ```
 
 For `MySimpleContainer`, the object is fully-specified by its type and parameters, so the compiler
@@ -648,10 +670,10 @@ julia> function strange_twos(n)
        end;
 
 julia> strange_twos(3)
-3-element Vector{Float64}:
- 2.0
- 2.0
- 2.0
+3-element Vector{Int64}:
+ 2
+ 2
+ 2
 ```
 
 This should be written as:
@@ -670,10 +692,10 @@ julia> function strange_twos(n)
        end;
 
 julia> strange_twos(3)
-3-element Vector{Float64}:
- 2.0
- 2.0
- 2.0
+3-element Vector{Int64}:
+ 2
+ 2
+ 2
 ```
 
 Julia's compiler specializes code for argument types at function boundaries, so in the original
diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md
index b736c46f90282..424ea3eff8ef7 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/manual/profile.md
@@ -338,7 +338,7 @@ For example with `OProfile` you can try a simple recording :
 >opreport -l `which ./julia`
 ```
 
-Or similary with `perf` :
+Or similarly with `perf` :
 
 ```
 $ ENABLE_JITPROFILING=1 perf record -o /tmp/perf.data --call-graph dwarf -k 1 ./julia /test/fastmath.jl
@@ -347,7 +347,7 @@ $ perf report --call-graph -G -i /tmp/perf-jit.data
 ```
 
 There are many more interesting things that you can measure about your program, to get a comprehensive list
-please read the [Linux perf examples page](http://www.brendangregg.com/perf.html).
+please read the [Linux perf examples page](https://www.brendangregg.com/perf.html).
 
 Remember that perf saves for each execution a `perf.data` file that, even for small programs, can get
 quite large. Also the perf LLVM module saves temporarily debug objects in `~/.debug/jit`, remember
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index 16dc779318d51..e643ffff3ee61 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -399,7 +399,7 @@ julia> run(Cmd(`sh -c "echo foo \$HOWLONG"`, env=("HOWLONG" => "ever!",)));
 foo ever!
 ```
 
-See `[`Cmd`](@ref)` for additional keyword arguments. The [`setenv`](@ref) and [`addenv`](@ref) commands
+See [`Cmd`](@ref) for additional keyword arguments. The [`setenv`](@ref) and [`addenv`](@ref) commands
 provide another means for replacing or adding to the `Cmd` execution environment variables, respectively:
 
 ```jldoctest
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index 56a5a20c1cef4..df486190938d6 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -482,17 +482,17 @@ of the concatenated strings, e.g.:
 julia> a, b = "\xe2\x88", "\x80"
 ("\xe2\x88", "\x80")
 
-julia> c = a*b
+julia> c = string(a, b)
 "∀"
 
 julia> collect.([a, b, c])
-3-element Array{Array{Char,1},1}:
+3-element Vector{Vector{Char}}:
  ['\xe2\x88']
  ['\x80']
  ['∀']
 
 julia> length.([a, b, c])
-3-element Array{Int64,1}:
+3-element Vector{Int64}:
  1
  1
  1
@@ -767,8 +767,8 @@ Further documentation is given in the [Metaprogramming](@ref meta-non-standard-s
 
 ## [Regular Expressions](@id man-regex-literals)
 
-Julia has Perl-compatible regular expressions (regexes), as provided by the [PCRE](http://www.pcre.org/)
-library (a description of the syntax can be found [here](http://www.pcre.org/current/doc/html/pcre2syntax.html)). Regular expressions are related to strings in two ways: the obvious connection is that
+Julia has Perl-compatible regular expressions (regexes), as provided by the [PCRE](https://www.pcre.org/)
+library (a description of the syntax can be found [here](https://www.pcre.org/current/doc/html/pcre2syntax.html)). Regular expressions are related to strings in two ways: the obvious connection is that
 regular expressions are used to find regular patterns in strings; the other connection is that
 regular expressions are themselves input as strings, which are parsed into a state machine that
 can be used to efficiently search for patterns in strings. In Julia, regular expressions are input
@@ -899,7 +899,7 @@ julia> m.offsets
 ```
 
 It is convenient to have captures returned as an array so that one can use destructuring syntax
-to bind them to local variables. As a convinience, the `RegexMatch` object implements iterator methods that pass through to the `captures` field, so you can destructure the match object directly:
+to bind them to local variables. As a convenience, the `RegexMatch` object implements iterator methods that pass through to the `captures` field, so you can destructure the match object directly:
 
 ```jldoctest acdmatch
 julia> first, second, third = m; first
@@ -939,7 +939,7 @@ julia> replace("a", r"." => s"\g<0>1")
 
 You can modify the behavior of regular expressions by some combination of the flags `i`, `m`,
 `s`, and `x` after the closing double quote mark. These flags have the same meaning as they do
-in Perl, as explained in this excerpt from the [perlre manpage](http://perldoc.perl.org/perlre.html#Modifiers):
+in Perl, as explained in this excerpt from the [perlre manpage](https://perldoc.perl.org/perlre#Modifiers):
 
 ```
 i   Do case-insensitive pattern matching.
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index 3a94f263e9e65..cbe7e9b94eefc 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -144,7 +144,7 @@ is stated to be the API. This has several benefits:
 - Package developers are freer to change the implementation without breaking
   user code.
 - Methods can be passed to higher-order constructs like [`map`](@ref) (e.g.
-  `map(imag, zs))` rather than `[z.im for z in zs]`).
+  `map(imag, zs)`) rather than `[z.im for z in zs]`).
 - Methods can be defined on abstract types.
 - Methods can describe a conceptual operation that can be shared across
   disparate types (e.g. `real(z)` works on Complex numbers or Quaternions).
@@ -345,8 +345,7 @@ to behave in a certain way, and overly customizing its behavior can make it hard
 ## Avoid type piracy
 
 "Type piracy" refers to the practice of extending or redefining methods in Base
-or other packages on types that you have not defined. In some cases, you can get away with
-type piracy with little ill effect. In extreme cases, however, you can even crash Julia
+or other packages on types that you have not defined. In extreme cases, you can crash Julia
 (e.g. if your method extension or redefinition causes invalid input to be passed to a
 `ccall`). Type piracy can complicate reasoning about code, and may introduce
 incompatibilities that are hard to predict and diagnose.
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index 0be372d4fa612..c44df95f89658 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -171,11 +171,11 @@ Let's consider some of the abstract types that make up Julia's numerical hierarc
 
 ```julia
 abstract type Number end
-abstract type Real     <: Number end
+abstract type Real          <: Number end
 abstract type AbstractFloat <: Real end
-abstract type Integer  <: Real end
-abstract type Signed   <: Integer end
-abstract type Unsigned <: Integer end
+abstract type Integer       <: Real end
+abstract type Signed        <: Integer end
+abstract type Unsigned      <: Integer end
 ```
 
 The [`Number`](@ref) type is a direct child type of `Any`, and [`Real`](@ref) is its child.
@@ -1108,6 +1108,50 @@ julia> NoFieldsParam{Int}() === NoFieldsParam{Int}()
 true
 ```
 
+## Types of functions
+
+Each function has its own type, which is a subtype of `Function`.
+
+```jldoctest foo41
+julia> foo41(x) = x + 1
+foo41 (generic function with 1 method)
+
+julia> typeof(foo41)
+typeof(foo41) (singleton type of function foo41, subtype of Function)
+```
+
+Note how `typeof(foo41)` prints as itself. This is merely a convention for printing, as it is a first-class object that can be used like any other value:
+
+```jldoctest foo41
+julia> T = typeof(foo41)
+typeof(foo41) (singleton type of function foo41, subtype of Function)
+
+julia> T <: Function
+true
+```
+
+Types of functions defined at top-level are singletons. When necessary, you can compare them with [`===`](@ref).
+
+[Closures](@ref man-anonymous-functions) also have their own type, which is usually printed with names that end in `#<number>`. Names and types for functions defined at different locations are distinct, but not guaranteed to be printed the same way across sessions.
+
+```jldoctest; filter = r"[0-9\.]+"
+julia> typeof(x -> x + 1)
+var"#9#10"
+```
+
+Types of closures are not necessarily singletons.
+
+```jldoctest
+julia> addy(y) = x -> x + y
+addy (generic function with 1 method)
+
+julia> Base.issingletontype(addy(1))
+false
+
+julia> addy(1) === addy(2)
+false
+```
+
 ## [`Type{T}` type selectors](@id man-typet-type)
 
 For each type `T`, `Type{T}` is an abstract parametric type whose only instance is the
@@ -1182,7 +1226,7 @@ While `Type` is part of Julia's type hierarchy like any other abstract parametri
 is not commonly used outside method signatures except in some special cases. Another
 important use case for `Type` is sharpening field types which would otherwise be captured
 less precisely, e.g. as [`DataType`](@ref man-declared-types) in the example below where the
-default constuctor could lead to performance problems in code relying on the precise wrapped
+default constructor could lead to performance problems in code relying on the precise wrapped
 type (similarly to [abstract type parameters](@ref man-performance-abstract-container)).
 
 ```jldoctest
@@ -1342,7 +1386,7 @@ REPL and other interactive environments, and also a more compact single-line for
 [`print`](@ref) or for displaying the object as part of another object (e.g. in an array). Although
 by default the `show(io, z)` function is called in both cases, you can define a *different* multi-line
 format for displaying an object by overloading a three-argument form of `show` that takes the
-`text/plain` MIME type as its second argument (see [Multimedia I/O](@ref)), for example:
+`text/plain` MIME type as its second argument (see [Multimedia I/O](@ref Multimedia-I/O)), for example:
 
 ```jldoctest polartype
 julia> Base.show(io::IO, ::MIME"text/plain", z::Polar{T}) where{T} =
@@ -1365,7 +1409,7 @@ julia> [Polar(3, 4.0), Polar(4.0,5.3)]
 where the single-line `show(io, z)` form is still used for an array of `Polar` values.   Technically,
 the REPL calls `display(z)` to display the result of executing a line, which defaults to `show(stdout, MIME("text/plain"), z)`,
 which in turn defaults to `show(stdout, z)`, but you should *not* define new [`display`](@ref)
-methods unless you are defining a new multimedia display handler (see [Multimedia I/O](@ref)).
+methods unless you are defining a new multimedia display handler (see [Multimedia I/O](@ref Multimedia-I/O)).
 
 Moreover, you can also define `show` methods for other MIME types in order to enable richer display
 (HTML, images, etcetera) of objects in environments that support this (e.g. IJulia).   For example,
diff --git a/doc/src/manual/unicode-input.md b/doc/src/manual/unicode-input.md
index 489b256cbdea2..7539e75bb4f24 100644
--- a/doc/src/manual/unicode-input.md
+++ b/doc/src/manual/unicode-input.md
@@ -46,7 +46,7 @@ end
 
 # Surround combining characters with no-break spaces (i.e '\u00A0'). Follows the same format
 # for how unicode is displayed on the unicode.org website:
-# http://unicode.org/cldr/utility/character.jsp?a=0300
+# https://util.unicode.org/UnicodeJsps/character.jsp?a=0300
 function fix_combining_chars(char)
     cat = Base.Unicode.category_code(char)
     return cat == 6 || cat == 8 ? "$NBSP$char$NBSP" : "$char"
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index 442943806a3b8..8df036f8c06aa 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -130,7 +130,7 @@ _always_ updates that existing local: you can only shadow a local by explicitly
 declaring a new local in a nested scope with the `local` keyword. In particular,
 this applies to variables assigned in inner functions, which may surprise users
 coming from Python where assignment in an inner function creates a new local
-unless the variable is explictly declared to be non-local.
+unless the variable is explicitly declared to be non-local.
 
 Mostly this is pretty intuitive, but as with many things that behave
 intuitively, the details are more subtle than one might naïvely imagine.
@@ -526,11 +526,20 @@ file, if it behaves differently than it did in the REPL, then you will get a war
 ### Let Blocks
 
 `let` statements create a new *hard scope* block (see above) and introduce new variable
-bindings each time they run. Whereas assignments might reassign a new value to an existing value location,
-`let` always creates a new location.
-This difference is usually not important, and is only detectable in the case of variables that
-outlive their scope via closures. The `let` syntax accepts a comma-separated series of assignments
-and variable names:
+bindings each time they run. The variable need not be immediately assigned:
+```jldoctest
+julia> var1 = let x
+           for i in 1:5
+               (i == 4) && (x = i; break)
+           end
+           x
+       end
+4
+```
+Whereas assignments might reassign a new value to an existing value location, `let` always creates a
+new location. This difference is usually not important, and is only detectable in the case of
+variables that outlive their scope via closures. The `let` syntax accepts a comma-separated series of
+assignments and variable names:
 
 ```jldoctest
 julia> x, y, z = -1, -1, -1;
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index 004efb92dc0e5..f61503d99a67c 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -93,7 +93,7 @@ ERROR: cannot assign a value to variable Base.sqrt from module Main
 ## [Allowed Variable Names](@id man-allowed-variable-names)
 
 Variable names must begin with a letter (A-Z or a-z), underscore, or a subset of Unicode code
-points greater than 00A0; in particular, [Unicode character categories](http://www.fileformat.info/info/unicode/category/index.htm)
+points greater than 00A0; in particular, [Unicode character categories](https://www.fileformat.info/info/unicode/category/index.htm)
 Lu/Ll/Lt/Lm/Lo/Nl (letters), Sc/So (currency and other symbols), and a few other letter-like characters
 (e.g. a subset of the Sm math symbols) are allowed. Subsequent characters may also include ! and
 digits (0-9 and other characters in categories Nd/No), as well as other Unicode code points: diacritics
@@ -123,7 +123,7 @@ julia> y = ___
 ERROR: syntax: all-underscore identifier used as rvalue
 ```
 
-The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref):
+The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref Keywords):
 
 ```julia-repl
 julia> else = false
diff --git a/doc/src/manual/workflow-tips.md b/doc/src/manual/workflow-tips.md
index 2f7abf5a6a033..7ee4b6aefba77 100644
--- a/doc/src/manual/workflow-tips.md
+++ b/doc/src/manual/workflow-tips.md
@@ -104,7 +104,7 @@ the following modifications:
 
      Navigate to your temporary directory and launch Julia, then do the following:
 
-     ```julia
+     ```julia-repl
      pkg> generate MyPkg            # type ] to enter pkg mode
      julia> push!(LOAD_PATH, pwd())   # hit backspace to exit pkg mode
      ```
@@ -123,7 +123,7 @@ the following modifications:
    Then navigate to the directory containing your test file (here
    assumed to be `"runtests.jl"`) and do the following:
 
-   ```julia
+   ```julia-repl
    julia> using MyPkg
 
    julia> include("runtests.jl")
diff --git a/src/.gitignore b/src/.gitignore
index 3b845e647b02c..388e971d4f12d 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -10,6 +10,7 @@
 /julia_flisp.boot
 /julia_flisp.boot.inc
 /flisp.boot.inc
+/jl_internal_funcs.inc
 
 /libjulia-debug.a
 /libjulia-debug.so
diff --git a/src/Makefile b/src/Makefile
index bb38a848ab12f..3646f14c75ff5 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -17,15 +17,17 @@ JLDFLAGS += $(LDFLAGS)
 FLAGS := \
 	-D_GNU_SOURCE -I$(BUILDDIR) -I$(SRCDIR) \
 	-I$(SRCDIR)/flisp -I$(SRCDIR)/support \
-	-I$(LIBUV_INC) -I$(build_includedir) -DLIBRARY_EXPORTS \
+	-I$(LIBUV_INC) -I$(build_includedir) \
 	-I$(JULIAHOME)/deps/valgrind
-ifneq ($(USEMSVC), 1)
 FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \
 		 -Wno-comment -Wpointer-arith -Wundef
 ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result))
 FLAGS += -Wno-unused-result
 endif
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
+
+ifeq ($(USECLANG),1)
+FLAGS += -Wno-return-type-c-linkage
 endif
 
 FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"'
@@ -45,22 +47,26 @@ RUNTIME_SRCS := \
 	simplevector runtime_intrinsics precompile \
 	threading partr stackwalk gc gc-debug gc-pages gc-stacks method \
 	jlapi signal-handling safepoint timing subtype \
-	crc32c APInt-C processor ircode opaque_closure
+	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage
 SRCS := jloptions runtime_ccall rtutils
+ifeq ($(OS),WINNT)
+SRCS += win32_ucontext
+endif
 
-LLVMLINK :=
+RT_LLVMLINK :=
+CG_LLVMLINK :=
 
 ifeq ($(JULIACODEGEN),LLVM)
-SRCS += codegen llvm-ptls
-RUNTIME_SRCS += jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
+CODEGEN_SRCS := codegen llvm-ptls
+RUNTIME_CODEGEN_SRCS := jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
 	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \
 	llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
 	llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-remove-addrspaces \
-	llvm-remove-ni llvm-julia-licm llvm-demote-float16
+	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
-LLVM_LIBS := all
+CG_LLVM_LIBS := all
 ifeq ($(USE_POLLY),1)
-LLVMLINK += -lPolly -lPollyISL
+CG_LLVMLINK += -lPolly -lPollyISL
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/include
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --obj-root)/tools/polly/include
 FLAGS += -DUSE_POLLY
@@ -68,31 +74,42 @@ ifeq ($(USE_POLLY_OPENMP),1)
 FLAGS += -fopenmp
 endif
 ifeq ($(USE_POLLY_ACC),1)
-LLVMLINK += -lPollyPPCG -lGPURuntime
+CG_LLVMLINK += -lPollyPPCG -lGPURuntime
 FLAGS += -DUSE_POLLY_ACC
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/tools # Required to find GPURuntime/GPUJIT.h
 endif
 endif
 else
-RUNTIME_SRCS += anticodegen
-LLVM_LIBS := support
+# JULIACODEGEN != LLVM
 endif
 
-ifeq ($(USEMSVC), 1)
-SRCS += getopt
-endif
+RT_LLVM_LIBS := support
 
 SRCS += $(RUNTIME_SRCS)
 
+CODEGEN_SRCS += $(RUNTIME_CODEGEN_SRCS)
+
+ifeq ($(WITH_DTRACE),1)
+DTRACE_HEADERS := uprobes.h.gen
+ifneq ($(OS),Darwin)
+SRCS += uprobes
+endif
+else
+DTRACE_HEADERS :=
+endif
 
 # headers are used for dependency tracking, while public headers will be part of the dist
 UV_HEADERS :=
-HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h locks.h atomics.h julia_internal.h options.h timing.h)
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h locks.h atomics.h julia_gcext.h)
 ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+ifeq ($(OS),WINNT)
+PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
+endif
+HEADERS := $(PUBLIC_HEADERS) $(addprefix $(SRCDIR)/,julia_internal.h options.h timing.h) $(addprefix $(BUILDDIR)/,$(DTRACE_HEADERS) jl_internal_funcs.inc)
+PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,julia_gcext.h)
 PUBLIC_HEADER_TARGETS := $(addprefix $(build_includedir)/julia/,$(notdir $(PUBLIC_HEADERS)) $(UV_HEADERS))
 
 LLVM_LDFLAGS := $(shell $(LLVM_CONFIG_HOST) --ldflags)
@@ -100,14 +117,14 @@ LLVM_CXXFLAGS := $(shell $(LLVM_CONFIG_HOST) --cxxflags)
 
 ifeq ($(JULIACODEGEN),LLVM)
 ifneq ($(USE_SYSTEM_LLVM),0)
-LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
+CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
 # HACK: llvm-config doesn't correctly point to shared libs on all platforms
 #       https://github.com/JuliaLang/julia/issues/29981
 else
 ifneq ($(USE_LLVM_SHLIB),1)
-LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs $(LLVM_LIBS)) $($(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --system-libs 2> /dev/null)
+CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs $(CG_LLVM_LIBS) --link-static) $($(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --system-libs 2> /dev/null)
 else
-LLVMLINK += $(LLVM_LDFLAGS) -lLLVM
+CG_LLVMLINK += $(LLVM_LDFLAGS) -lLLVM
 endif
 endif
 ifeq ($(USE_LLVM_SHLIB),1)
@@ -115,20 +132,33 @@ FLAGS += -DLLVM_SHLIB
 endif # USE_LLVM_SHLIB == 1
 endif
 
+RT_LLVM_LINK_ARGS := $(shell $(LLVM_CONFIG_HOST) --libs $(RT_LLVM_LIBS) --system-libs --link-static)
+RT_LLVMLINK += $(LLVM_LDFLAGS) $(RT_LLVM_LINK_ARGS)
+ifeq ($(OS), WINNT)
+RT_LLVMLINK += -luuid -lole32
+endif
+
 CLANG_LDFLAGS := $(LLVM_LDFLAGS)
 ifeq ($(OS), Darwin)
 CLANG_LDFLAGS += -Wl,-undefined,dynamic_lookup
 endif
 
 COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
-COMMON_LIBS := $(LIBUV) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(LLVMLINK) $(OSLIBS)
-DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(COMMON_LIBS)
-RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(COMMON_LIBS)
+RT_LIBS := $(LIBUV) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS)
+CG_LIBS := $(NO_WHOLE_ARCHIVE) $(LIBUV) $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS)
+RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
+CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
+RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
+CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia -ljulia-internal
 
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
-DEBUGFLAGS += $(FLAGS)
-SHIPFLAGS += $(FLAGS)
+
+CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o)
+CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj)
+
+DEBUGFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
+SHIPFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
 
 # if not absolute, then relative to the directory of the julia executable
 SHIPFLAGS  += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\""
@@ -156,13 +186,27 @@ endif
 default: $(JULIA_BUILD_MODE) # contains either "debug" or "release"
 all: debug release
 
-release debug: %: libjulia-internal-%
+release debug: %: libjulia-internal-% libjulia-codegen-%
 
 $(BUILDDIR):
 	mkdir -p $(BUILDDIR)
 
 LLVM_CONFIG_ABSOLUTE := $(shell which $(LLVM_CONFIG))
 
+# Generate the DTrace header file, while also renaming the macros from
+# JULIA_ to JL_PROBE to clearly delinate them.
+$(BUILDDIR)/%.h.gen : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -h -s $< -o $@)
+	sed 's/JULIA_/JL_PROBE_/' $@ > $@.tmp
+	mv $@.tmp $@
+
+$(BUILDDIR)/jl_internal_funcs.inc: $(SRCDIR)/jl_exported_funcs.inc
+	# Generate `.inc` file that contains a list of `#define` macros to rename functions defined in `libjulia-internal`
+	# to have a `ijl_` prefix instead of `jl_`, to denote that they are coming from `libjulia-internal`.  This avoids
+	# potential confusion with debugging tools, when inspecting a process that has both `libjulia` and `libjulia-internal`
+	# loaded at the same time.
+	grep 'XX(.\+)' $< | sed -E 's/.*XX\((.+)\).*/#define \1 i\1/g' >$@
+
 # source file rules
 $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
@@ -172,6 +216,10 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONF
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(SHIPFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+$(BUILDDIR)/%.o : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
+$(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
 
 # public header rules
 $(eval $(call dir_target,$(build_includedir)/julia))
@@ -194,7 +242,7 @@ else
 JULIA_SPLITDEBUG := 0
 endif
 $(build_shlibdir)/libccalltest.$(SHLIB_EXT): $(SRCDIR)/ccalltest.c
-	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JCPPFLAGS) $(DEBUGFLAGS) -O3 $< $(fPIC) -shared -o $@.tmp $(JLDFLAGS))
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@.tmp $(LDFLAGS))
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@.tmp
 ifeq ($(JULIA_SPLITDEBUG),1)
 	@# Create split debug info file for libccalltest stacktraces test
@@ -211,7 +259,7 @@ endif
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCXXFLAGS) $(JCPPFLAGS) $(DEBUGFLAGS) -O3 $< $(fPIC) -shared -o $@ $(JLDFLAGS) -L$(build_shlibdir) -L$(build_libdir) $(NO_WHOLE_ARCHIVE) $(LLVMLINK))
+	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread
 
 julia_flisp.boot.inc.phony: $(BUILDDIR)/julia_flisp.boot.inc
 
@@ -225,7 +273,7 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
 		$(call cygpath_w,$(SRCDIR)/mk_julia_flisp_boot.scm) $(call cygpath_w,$(dir $<)) $(notdir $<) $(call cygpath_w,$@))
 
 # additional dependency links
-$(BUILDDIR)/anticodegen.o $(BUILDDIR)/anticodegen.dbg.obj: $(SRCDIR)/intrinsics.h
+$(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
 $(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
 $(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
@@ -242,11 +290,12 @@ $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_pro
 $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h $(BUILDDIR)/julia_version.h
-$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h
 $(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
 $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
-$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
@@ -259,10 +308,10 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
-$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
-$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
 $(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
@@ -290,21 +339,17 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 	@echo "#endif" >> $@.$(JULIA_BUILD_MODE).tmp
 	mv $@.$(JULIA_BUILD_MODE).tmp $@
 
-ifneq ($(USEMSVC), 1)
 CXXLD = $(CXX) -shared
-else
-CXXLD = $(LD) -dll -export:jl_setjmp -export:jl_longjmp
-endif
 
 $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
 $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
@@ -322,10 +367,36 @@ libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_
 libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
 libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS)
 
+$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@
+	$(DSYMUTIL) $@
+
+$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(CODEGEN_DOBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-codegen-debug.$(SHLIB_EXT) $@
+	$(DSYMUTIL) $@
+
+ifneq ($(OS), WINNT)
+$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-codegen%.$(JL_MAJOR_SHLIB_EXT): \
+		$(build_shlibdir)/libjulia-codegen%.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+$(build_shlibdir)/libjulia-codegen.$(SHLIB_EXT) $(build_shlibdir)/libjulia-codegen-debug.$(SHLIB_EXT): $(build_shlibdir)/libjulia-codegen%.$(SHLIB_EXT): \
+		$(build_shlibdir)/libjulia-codegen%.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-codegen.$(SHLIB_EXT)
+libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-codegen-debug.$(SHLIB_EXT)
+endif
+libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT)
+libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
+libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS)
+
 clean:
-	-rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest*
-	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc
-	-rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a
+	-rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest*
+	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
+	-rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen
 	-rm -f $(BUILDDIR)/julia_version.h
 
 clean-flisp:
@@ -336,7 +407,7 @@ clean-support:
 
 cleanall: clean clean-flisp clean-support clean-analyzegc
 
-$(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/GCChecker.cpp $(LLVM_CONFIG_ABSOLUTE)
+$(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG_ABSOLUTE)
 	@$(call PRINT_CC, $(CXX) -g $(fPIC) -shared -o $@ -DCLANG_PLUGIN -I$(build_includedir) -L$(build_libdir) \
 		$(LLVM_CXXFLAGS) $(CLANG_LDFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) $(CXXLDFLAGS) $<)
 
@@ -348,7 +419,7 @@ $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/GCChecker.c
 install-analysis-deps:
 	$(MAKE) -C $(JULIAHOME)/deps install-llvm install-clang install-llvm-tools install-libuv install-utf8proc install-unwind
 
-analyzegc-deps-check: $(BUILDDIR)/julia_version.h $(BUILDDIR)/julia_flisp.boot.inc
+analyzegc-deps-check: $(BUILDDIR)/julia_version.h $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
 ifeq ($(USE_BINARYBUILDER_LLVM),0)
 ifneq ($(BUILD_LLVM_CLANG),1)
 	$(error Clang must be available to use the clang analyzer. Either build it (BUILD_LLVM_CLANG=1) or use BinaryBuilder)
@@ -356,16 +427,58 @@ endif
 endif
 
 clangsa: $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT)
-
-clang-sa-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) | analyzegc-deps-check
-	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS)  -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c $<)
-clang-sa-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) | analyzegc-deps-check
-	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c++ $<)
-
-# Add C files as a target of `analyzegc`
-analyzegc: $(addprefix clang-sa-,$(RUNTIME_SRCS))
+clangsa: $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT)
+# TODO: clangsa: $(build_shlibdir)/libImplicitAtomics2Plugin.$(SHLIB_EXT)
+
+clang-sagc-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
+		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
+		$(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c $<)
+clang-sagc-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
+		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
+		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c++ $<)
+
+ # optarg is a required_argument for these
+SA_EXCEPTIONS-jloptions.c                   := -Xanalyzer -analyzer-disable-checker=core.NonNullParamChecker,unix.cstring.NullArg
+ # clang doesn't understand that e->vars has the same value in save_env (NULL) and restore_env (assumed non-NULL)
+SA_EXCEPTIONS-subtype.c                     := -Xanalyzer -analyzer-disable-checker=core.uninitialized.Assign,core.UndefinedBinaryOperatorResult
+ # these need to be annotated (and possibly fixed)
+SKIP_IMPLICIT_ATOMICS := dump.c gf.c jitlayers.cpp module.c precompile.c rtutils.c staticdata.c toplevel.c codegen.cpp
+
+clang-sa-%: $(SRCDIR)/%.c $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
+		$(if $(findstring $(notdir $<),$(SKIP_IMPLICIT_ATOMICS)),,-Xclang -load -Xclang $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=julia.ImplicitAtomics) \
+		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
+		 --analyzer-no-default-checks  \
+		$(SA_EXCEPTIONS-$(notdir $<)) \
+		$(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c $<)
+clang-sa-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
+		$(if $(findstring $(notdir $<),$(SKIP_IMPLICIT_ATOMICS)),,-Xclang -load -Xclang $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=julia.ImplicitAtomics) \
+		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
+		 --analyzer-no-default-checks  \
+		$(SA_EXCEPTIONS-$(notdir $<)) \
+		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c++ $<)
+
+clang-tidy-%: $(SRCDIR)/%.c $(build_shlibdir)/libImplicitAtomics2Plugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
+		-load $(build_shlibdir)/libImplicitAtomics2Plugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
+		-- $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -fno-caret-diagnostics -x c)
+clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomics2Plugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
+		-load $(build_shlibdir)/libImplicitAtomics2Plugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
+		-- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
+
+
+# Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc`
+tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)), $(SRCS)))
+analyzesrc: $(addprefix clang-sa-,$(SRCS))
+analyzegc: analyzesrc $(addprefix clang-sagc-,$(RUNTIME_SRCS))
 
 clean-analyzegc:
 	rm -f $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT)
+	rm -f $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT)
 
-.PHONY: default all debug release clean cleanall clean-* libccalltest libllvmcalltest julia_flisp.boot.inc.phony analyzegc clang-sa-*
+.FORCE:
+.PHONY: default all debug release clean cleanall clean-* libccalltest libllvmcalltest julia_flisp.boot.inc.phony analyzegc analyzesrc .FORCE
diff --git a/src/anticodegen.c b/src/anticodegen.c
deleted file mode 100644
index df2738b0f67d5..0000000000000
--- a/src/anticodegen.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include "julia.h"
-#include "julia_internal.h"
-
-#include "intrinsics.h"
-
-#define UNAVAILABLE { jl_errorf("%s: not available in this build of Julia", __func__); }
-
-void jl_dump_native(const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, const char *sysimg_data, size_t sysimg_len) UNAVAILABLE
-int32_t jl_get_llvm_gv(jl_value_t *p) UNAVAILABLE
-void jl_write_malloc_log(void) UNAVAILABLE
-void jl_write_coverage_data(void) UNAVAILABLE
-
-JL_DLLEXPORT void jl_clear_malloc_data(void) UNAVAILABLE
-JL_DLLEXPORT int jl_extern_c(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
-JL_DLLEXPORT void *jl_function_ptr(jl_function_t *f, jl_value_t *rt, jl_value_t *argt) UNAVAILABLE
-JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world, int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo) UNAVAILABLE
-JL_DLLEXPORT const jl_value_t *jl_dump_function_ir(void *f, uint8_t strip_ir_metadata, uint8_t dump_module, const char *debuginfo) UNAVAILABLE
-JL_DLLEXPORT void *jl_get_llvmf_defn(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
-
-JL_DLLEXPORT void *jl_LLVMCreateDisasm(const char *TripleName, void *DisInfo, int TagType, void *GetOpInfo, void *SymbolLookUp) UNAVAILABLE
-JL_DLLEXPORT size_t jl_LLVMDisasmInstruction(void *DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize) UNAVAILABLE
-
-int32_t jl_assign_functionID(const char *fname) UNAVAILABLE
-
-void jl_init_codegen(void) { }
-
-int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline)
-{
-    return 0;
-}
-
-void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
-                       jl_method_instance_t **linfos, size_t n)
-{
-    (void)sysimage_base; (void)fptrs; (void)linfos; (void)n;
-}
-
-jl_llvm_functions_t jl_compile_linfo(jl_method_instance_t **pli, jl_code_info_t *src, size_t world, const jl_cgparams_t *params)
-{
-    jl_method_instance_t *li = *pli;
-    jl_llvm_functions_t decls = {};
-
-    if (jl_is_method(li->def.method)) {
-        jl_printf(JL_STDERR, "code missing for ");
-        jl_static_show(JL_STDERR, (jl_value_t*)li);
-        jl_printf(JL_STDERR, " : sysimg may not have been built with --compile=all\n");
-    }
-    else {
-        jl_printf(JL_STDERR, "top level expression cannot be compiled in this build of Julia");
-    }
-    return decls;
-}
-
-jl_value_t *jl_fptr_interpret_call(jl_method_instance_t *lam, jl_value_t **args, uint32_t nargs);
-jl_callptr_t jl_generate_fptr(jl_method_instance_t **pli, jl_llvm_functions_t decls, size_t world)
-{
-    return (jl_callptr_t)&jl_fptr_interpret_call;
-}
-
-JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void)
-{
-    return 0;
-}
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 93683d320e6b9..c6fa58d0ee621 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -5,11 +5,11 @@
 
 // target support
 #include <llvm/ADT/Triple.h>
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+#include <llvm/IR/DataLayout.h>
 #include <llvm/Support/TargetRegistry.h>
 #include <llvm/Target/TargetMachine.h>
-#include <llvm/IR/DataLayout.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
 
 // analysis passes
 #include <llvm/Analysis/Passes.h>
@@ -36,19 +36,11 @@
 #endif
 #endif
 
-// for outputting assembly
+// for outputting code
 #include <llvm/Bitcode/BitcodeWriter.h>
 #include <llvm/Bitcode/BitcodeWriterPass.h>
 #include "llvm/Object/ArchiveWriter.h"
 #include <llvm/IR/IRPrintingPasses.h>
-#include <llvm/CodeGen/AsmPrinter.h>
-#include <llvm/CodeGen/MachineModuleInfo.h>
-#include <llvm/CodeGen/TargetPassConfig.h>
-#include <llvm/MC/MCAsmInfo.h>
-#include <llvm/MC/MCStreamer.h>
-#include <llvm/MC/MCAsmBackend.h>
-#include <llvm/MC/MCCodeEmitter.h>
-#include <llvm/Support/CodeGen.h>
 
 #include <llvm/IR/LegacyPassManagers.h>
 #include <llvm/Transforms/Utils/Cloning.h>
@@ -66,24 +58,11 @@ namespace llvm {
 #include "jitlayers.h"
 #include "julia_assert.h"
 
-// MSVC's link.exe requires each function declaration to have a Comdat section
-// So rather than litter the code with conditionals,
-// all global values that get emitted call this function
-// and it decides whether the definition needs a Comdat section and adds the appropriate declaration
 template<class T> // for GlobalObject's
 static T *addComdat(T *G)
 {
 #if defined(_OS_WINDOWS_)
     if (!G->isDeclaration()) {
-        // Add comdat information to make MSVC link.exe happy
-        // it's valid to emit this for ld.exe too,
-        // but makes it very slow to link for no benefit
-#if defined(_COMPILER_MICROSOFT_)
-        Comdat *jl_Comdat = G->getParent()->getOrInsertComdat(G->getName());
-        // ELF only supports Comdat::Any
-        jl_Comdat->setSelectionKind(Comdat::NoDuplicates);
-        G->setComdat(jl_Comdat);
-#endif
         // add __declspec(dllexport) to everything marked for export
         if (G->getLinkage() == GlobalValue::ExternalLinkage)
             G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
@@ -104,7 +83,7 @@ typedef struct {
 } jl_native_code_desc_t;
 
 extern "C" JL_DLLEXPORT
-void jl_get_function_id(void *native_code, jl_code_instance_t *codeinst,
+void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
         int32_t *func_idx, int32_t *specfunc_idx)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -117,8 +96,8 @@ void jl_get_function_id(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
-extern "C"
-int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p)
+extern "C" JL_DLLEXPORT
+int32_t jl_get_llvm_gv_impl(void *native_code, jl_value_t *p)
 {
     // map a jl_value_t memory location to a GlobalVariable
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -132,7 +111,7 @@ int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p)
 }
 
 extern "C" JL_DLLEXPORT
-Module* jl_get_llvm_module(void *native_code)
+Module* jl_get_llvm_module_impl(void *native_code)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
     if (data)
@@ -142,7 +121,7 @@ Module* jl_get_llvm_module(void *native_code)
 }
 
 extern "C" JL_DLLEXPORT
-GlobalValue* jl_get_llvm_function(void *native_code, uint32_t idx)
+GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
     if (data)
@@ -152,7 +131,7 @@ GlobalValue* jl_get_llvm_function(void *native_code, uint32_t idx)
 }
 
 extern "C" JL_DLLEXPORT
-LLVMContext* jl_get_llvm_context(void *native_code)
+LLVMContext* jl_get_llvm_context_impl(void *native_code)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
     if (data)
@@ -275,19 +254,21 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
 // all reachable & inferrrable functions. The `policy` flag switches between the default
 // mode `0`, the extern mode `1`, and imaging mode `2`.
 extern "C" JL_DLLEXPORT
-void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _policy)
+void *jl_create_native_impl(jl_array_t *methods, const jl_cgparams_t *cgparams, int _policy)
 {
+    if (cgparams == NULL)
+        cgparams = &jl_default_cgparams;
     jl_native_code_desc_t *data = new jl_native_code_desc_t;
     jl_codegen_params_t params;
-    params.params = &cgparams;
+    params.params = cgparams;
     std::map<jl_code_instance_t*, jl_compile_result_t> emitted;
     jl_method_instance_t *mi = NULL;
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
-    JL_LOCK(&codegen_lock);
+    JL_LOCK(&jl_codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
 
     CompilationPolicy policy = (CompilationPolicy) _policy;
@@ -296,7 +277,7 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
     std::unique_ptr<Module> clone(jl_create_llvm_module("text"));
 
     // compile all methods for the current world and type-inference world
-    size_t compile_for[] = { jl_typeinf_world, jl_world_counter };
+    size_t compile_for[] = { jl_typeinf_world, jl_atomic_load_acquire(&jl_world_counter) };
     for (int worlds = 0; worlds < 2; worlds++) {
         params.world = compile_for[worlds];
         if (!params.world)
@@ -322,7 +303,7 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
             if (mi->def.method->primary_world <= params.world && params.world <= mi->def.method->deleted_world) {
                 // find and prepare the source code to compile
                 jl_code_instance_t *codeinst = NULL;
-                jl_ci_cache_lookup(cgparams, mi, params.world, &codeinst, &src);
+                jl_ci_cache_lookup(*cgparams, mi, params.world, &codeinst, &src);
                 if (src && !emitted.count(codeinst)) {
                     // now add it to our compilation results
                     JL_GC_PROMISE_ROOTED(codeinst->rettype);
@@ -415,11 +396,11 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
     }
 
     data->M = std::move(clone);
-    if (jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     if (policy == CompilationPolicy::ImagingMode)
         imaging_mode = 0;
-    JL_UNLOCK(&codegen_lock); // Might GC
+    JL_UNLOCK(&jl_codegen_lock); // Might GC
     return (void*)data;
 }
 
@@ -449,8 +430,8 @@ static void reportWriterError(const ErrorInfoBase &E)
 
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup
-extern "C"
-void jl_dump_native(void *native_code,
+extern "C" JL_DLLEXPORT
+void jl_dump_native_impl(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname,
         const char *asm_fname,
         const char *sysimg_data, size_t sysimg_len)
@@ -494,10 +475,10 @@ void jl_dump_native(void *native_code,
     addTargetPasses(&PM, TM.get());
 
     // set up optimization passes
-    SmallVector<char, 128> bc_Buffer;
-    SmallVector<char, 128> obj_Buffer;
-    SmallVector<char, 128> asm_Buffer;
-    SmallVector<char, 128> unopt_bc_Buffer;
+    SmallVector<char, 0> bc_Buffer;
+    SmallVector<char, 0> obj_Buffer;
+    SmallVector<char, 0> asm_Buffer;
+    SmallVector<char, 0> unopt_bc_Buffer;
     raw_svector_ostream bc_OS(bc_Buffer);
     raw_svector_ostream obj_OS(obj_Buffer);
     raw_svector_ostream asm_OS(asm_Buffer);
@@ -512,7 +493,7 @@ void jl_dump_native(void *native_code,
         PM.add(createBitcodeWriterPass(unopt_bc_OS));
     if (bc_fname || obj_fname || asm_fname) {
         addOptimizationPasses(&PM, jl_options.opt_level, true, true);
-        addMachinePasses(&PM, TM.get());
+        addMachinePasses(&PM, TM.get(), jl_options.opt_level);
     }
     if (bc_fname)
         PM.add(createBitcodeWriterPass(bc_OS));
@@ -601,7 +582,6 @@ void jl_dump_native(void *native_code,
     delete data;
 }
 
-
 void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM)
 {
     PM->add(new TargetLibraryInfoWrapperPass(Triple(TM->getTargetTriple())));
@@ -609,11 +589,12 @@ void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM)
 }
 
 
-void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM)
+void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM, int optlevel)
 {
     // TODO: don't do this on CPUs that natively support Float16
     PM->add(createDemoteFloat16Pass());
-    PM->add(createGVNPass());
+    if (optlevel > 1)
+        PM->add(createGVNPass());
 }
 
 
@@ -638,6 +619,13 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
 
     PM->add(createConstantMergePass());
     if (opt_level < 2) {
+        if (!dump_native) {
+            // we won't be multiversioning, so lower CPU feature checks early on
+            // so that we can avoid an additional CFG simplification pass at the end.
+            PM->add(createCPUFeaturesPass());
+            if (opt_level == 1)
+                PM->add(createInstSimplifyLegacyPass());
+        }
         PM->add(createCFGSimplificationPass(simplifyCFGOptions));
         if (opt_level == 1) {
             PM->add(createSROAPass());
@@ -662,15 +650,22 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
             PM->add(createRemoveNIPass());
         }
         PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (dump_native)
+        if (dump_native) {
             PM->add(createMultiVersioningPass());
-#if defined(JL_ASAN_ENABLED)
+            PM->add(createCPUFeaturesPass());
+            // minimal clean-up to get rid of CPU feature checks
+            if (opt_level == 1) {
+                PM->add(createInstSimplifyLegacyPass());
+                PM->add(createCFGSimplificationPass(simplifyCFGOptions));
+            }
+        }
+#if defined(_COMPILER_ASAN_ENABLED_)
         PM->add(createAddressSanitizerFunctionPass());
 #endif
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
         PM->add(createMemorySanitizerPass(true));
 #endif
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
         PM->add(createThreadSanitizerLegacyPassPass());
 #endif
         return;
@@ -699,6 +694,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     PM->add(createCFGSimplificationPass(simplifyCFGOptions));
     if (dump_native)
         PM->add(createMultiVersioningPass());
+    PM->add(createCPUFeaturesPass());
     PM->add(createSROAPass());
     PM->add(createInstSimplifyLegacyPass());
     PM->add(createJumpThreadingPass());
@@ -730,6 +726,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     PM->add(createLoopUnswitchPass());
     PM->add(createLICMPass());
     PM->add(createJuliaLICMPass());
+    PM->add(createInductiveRangeCheckEliminationPass());
     // Subsequent passes not stripping metadata from terminator
     PM->add(createInstSimplifyLegacyPass());
     PM->add(createIndVarSimplifyPass());
@@ -813,13 +810,13 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     }
     PM->add(createCombineMulAddPass());
     PM->add(createDivRemPairsPass());
-#if defined(JL_ASAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_)
     PM->add(createAddressSanitizerFunctionPass());
 #endif
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
     PM->add(createMemorySanitizerPass(true));
 #endif
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
     PM->add(createThreadSanitizerLegacyPassPass());
 #endif
 }
@@ -842,7 +839,7 @@ class JuliaPipeline : public Pass {
         TPMAdapter Adapter(TPM);
         addTargetPasses(&Adapter, jl_TargetMachine);
         addOptimizationPasses(&Adapter, OptLevel);
-        addMachinePasses(&Adapter, jl_TargetMachine);
+        addMachinePasses(&Adapter, jl_TargetMachine, OptLevel);
     }
     JuliaPipeline() : Pass(PT_PassManager, ID) {}
     Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const override {
@@ -864,10 +861,10 @@ void jl_add_optimization_passes(LLVMPassManagerRef PM, int opt_level, int lower_
 // --- native code info, and dump function to IR and ASM ---
 // Get pointer to llvm::Function instance, compiling if necessary
 // for use in reflection from Julia.
-// this is paired with jl_dump_function_ir, jl_dump_method_asm, jl_dump_llvm_asm in particular ways:
+// this is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
 // misuse will leak memory or cause read-after-free
 extern "C" JL_DLLEXPORT
-void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
+void *jl_get_llvmf_defn_impl(jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
 {
     if (jl_is_method(mi->def.method) && mi->def.method->source == NULL &&
             mi->def.method->generator == NULL) {
@@ -880,7 +877,7 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
         PM = new legacy::PassManager();
         addTargetPasses(PM, jl_TargetMachine);
         addOptimizationPasses(PM, jl_options.opt_level);
-        addMachinePasses(PM, jl_TargetMachine);
+        addMachinePasses(PM, jl_TargetMachine, jl_options.opt_level);
     }
 
     // get the source code for this function
@@ -914,10 +911,10 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
         output.params = &params;
         std::unique_ptr<Module> m;
         jl_llvm_functions_t decls;
-        JL_LOCK(&codegen_lock);
+        JL_LOCK(&jl_codegen_lock);
         uint64_t compiler_start_time = 0;
-        int tid = jl_threadid();
-        if (jl_measure_compile_time[tid])
+        uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+        if (measure_compile_time_enabled)
             compiler_start_time = jl_hrtime();
         std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);
 
@@ -942,9 +939,9 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
             m.release(); // the return object `llvmf` will be the owning pointer
         }
         JL_GC_POP();
-        if (jl_measure_compile_time[tid])
-            jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
-        JL_UNLOCK(&codegen_lock); // Might GC
+        if (measure_compile_time_enabled)
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+        JL_UNLOCK(&jl_codegen_lock); // Might GC
         if (F)
             return F;
     }
@@ -952,74 +949,3 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
     const char *mname = name_from_method_instance(mi);
     jl_errorf("unable to compile source for function %s", mname);
 }
-
-/// addPassesToX helper drives creation and initialization of TargetPassConfig.
-static MCContext *
-addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
-    TargetPassConfig *PassConfig = TM->createPassConfig(PM);
-    PassConfig->setDisableVerify(false);
-    PM.add(PassConfig);
-    MachineModuleInfoWrapperPass *MMIWP =
-        new MachineModuleInfoWrapperPass(TM);
-    PM.add(MMIWP);
-    if (PassConfig->addISelPasses())
-        return NULL;
-    PassConfig->addMachinePasses();
-    PassConfig->setInitialized();
-    return &MMIWP->getMMI().getContext();
-}
-
-void jl_strip_llvm_debug(Module *m);
-
-
-// get a native assembly for llvm::Function
-// TODO: implement debuginfo handling
-extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_llvm_asm(void *F, const char* asm_variant, const char *debuginfo)
-{
-    // precise printing via IR assembler
-    SmallVector<char, 4096> ObjBufferSV;
-    { // scope block
-        Function *f = (Function*)F;
-        llvm::raw_svector_ostream asmfile(ObjBufferSV);
-        assert(!f->isDeclaration());
-        std::unique_ptr<Module> m(f->getParent());
-        for (auto &f2 : m->functions()) {
-            if (f != &f2 && !f->isDeclaration())
-                f2.deleteBody();
-        }
-        jl_strip_llvm_debug(m.get());
-        legacy::PassManager PM;
-        LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(jl_TargetMachine);
-        MCContext *Context = addPassesToGenerateCode(TM, PM);
-        if (Context) {
-            const MCSubtargetInfo &STI = *TM->getMCSubtargetInfo();
-            const MCAsmInfo &MAI = *TM->getMCAsmInfo();
-            const MCRegisterInfo &MRI = *TM->getMCRegisterInfo();
-            const MCInstrInfo &MII = *TM->getMCInstrInfo();
-            unsigned OutputAsmDialect = MAI.getAssemblerDialect();
-            if (!strcmp(asm_variant, "att"))
-                OutputAsmDialect = 0;
-            if (!strcmp(asm_variant, "intel"))
-                OutputAsmDialect = 1;
-            MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
-                TM->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
-             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
-                STI, MRI, TM->Options.MCOptions));
-            std::unique_ptr<MCCodeEmitter> MCE;
-            auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
-            std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
-                *Context, std::move(FOut), true,
-                true, InstPrinter,
-                std::move(MCE), std::move(MAB),
-                false));
-            std::unique_ptr<AsmPrinter> Printer(
-                TM->getTarget().createAsmPrinter(*TM, std::move(S)));
-            if (Printer) {
-                PM.add(Printer.release());
-                PM.run(*m);
-            }
-        }
-    }
-    return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
-}
diff --git a/src/array.c b/src/array.c
index 778fb6d1e677c..e22089adc00b3 100644
--- a/src/array.c
+++ b/src/array.c
@@ -26,7 +26,7 @@ static inline void arrayassign_safe(int hasptr, jl_value_t *parent, char *dst, c
     assert(nb >= jl_datatype_size(jl_typeof(src))); // nb might move some undefined bits, but we should be okay with that
     if (hasptr) {
         size_t nptr = nb / sizeof(void*);
-        memmove_refs((void**)dst, (void**)src, nptr);
+        memmove_refs((void**)dst, (void* const*)src, nptr);
         jl_gc_multi_wb(parent, src);
     }
     else {
@@ -219,51 +219,28 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
                                           jl_value_t *_dims)
 {
     jl_task_t *ct = jl_current_task;
-    jl_array_t *a;
+    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
+
     size_t ndims = jl_nfields(_dims);
     assert(is_ntuple_long(_dims));
     size_t *dims = (size_t*)_dims;
-    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
-
     int ndimwords = jl_array_ndimwords(ndims);
     int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
+    jl_array_t *a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
     // No allocation or safepoint allowed after this
+    // copy data (except dims) from the old object
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->flags.ndims = ndims;
     a->offset = 0;
     a->data = NULL;
     a->flags.isaligned = data->flags.isaligned;
-    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
-    jl_value_t *eltype = jl_tparam0(atype);
-    size_t elsz = 0, align = 0;
-    int isboxed = !jl_islayout_inline(eltype, &elsz, &align);
-    assert(isboxed == data->flags.ptrarray);
-    if (!isboxed) {
-        a->elsize = LLT_ALIGN(elsz, align);
-        jl_value_t *ownerty = jl_typeof(owner);
-        size_t oldelsz = 0, oldalign = 0;
-        if (ownerty == (jl_value_t*)jl_string_type) {
-            oldalign = 1;
-        }
-        else {
-            jl_islayout_inline(jl_tparam0(ownerty), &oldelsz, &oldalign);
-        }
-        if (oldalign < align)
-            jl_exceptionf(jl_argumenterror_type,
-                          "reinterpret from alignment %d bytes to alignment %d bytes not allowed",
-                          (int) oldalign, (int) align);
-        a->flags.ptrarray = 0;
-        a->flags.hasptr = data->flags.hasptr;
-    }
-    else {
-        a->elsize = sizeof(void*);
-        a->flags.ptrarray = 1;
-        a->flags.hasptr = 0;
-    }
+    a->elsize = data->elsize;
+    a->flags.ptrarray = data->flags.ptrarray;
+    a->flags.hasptr = data->flags.hasptr;
 
     // if data is itself a shared wrapper,
     // owner should point back to the original array
+    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
     jl_array_data_owner(a) = (jl_value_t*)owner;
 
     a->flags.how = 3;
@@ -588,7 +565,7 @@ JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t
 {
     assert(i < jl_array_len(a));
     assert(a->flags.ptrarray);
-    jl_value_t *elt = jl_atomic_load_relaxed(((jl_value_t**)a->data) + i);
+    jl_value_t *elt = jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)a->data) + i);
     if (elt == NULL)
         jl_throw(jl_undefref_exception);
     return elt;
@@ -617,7 +594,7 @@ JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
 JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
 {
     if (a->flags.ptrarray) {
-        return jl_atomic_load_relaxed(((jl_value_t**)jl_array_data(a)) + i) != NULL;
+        return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)jl_array_data(a)) + i) != NULL;
     }
     else if (a->flags.hasptr) {
          jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
@@ -656,7 +633,7 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs
         arrayassign_safe(hasptr, jl_array_owner(a), &((char*)a->data)[i * a->elsize], rhs, a->elsize);
     }
     else {
-        jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, rhs);
+        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)a->data) + i, rhs);
         jl_gc_wb(jl_array_owner(a), rhs);
     }
 }
@@ -666,7 +643,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
     if (i >= jl_array_len(a))
         jl_bounds_error_int((jl_value_t*)a, i + 1);
     if (a->flags.ptrarray)
-        jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, NULL);
+        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)a->data) + i, NULL);
     else if (a->flags.hasptr) {
         size_t elsize = a->elsize;
         jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
@@ -1119,7 +1096,7 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
         // Move the rest of the data if the offset changed
         if (newoffs != offset) {
             memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, n - idx);
+            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, a->nrows - idx);
         }
         a->data = newdata;
     }
@@ -1243,9 +1220,11 @@ static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner,
                                                   void **src_p, void **dest_p,
                                                   ssize_t n) JL_NOTSAFEPOINT
 {
+    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
+    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
     for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_p + i);
-        jl_atomic_store_relaxed(dest_p + i, val);
+        void *val = jl_atomic_load_relaxed(src_pa + i);
+        jl_atomic_store_relaxed(dest_pa + i, val);
         // `val` is young or old-unmarked
         if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
             jl_gc_queue_root(owner);
@@ -1259,9 +1238,11 @@ static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner,
                                                    void **src_p, void **dest_p,
                                                    ssize_t n) JL_NOTSAFEPOINT
 {
+    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
+    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
     for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_p + n - i - 1);
-        jl_atomic_store_relaxed(dest_p + n - i - 1, val);
+        void *val = jl_atomic_load_relaxed(src_pa + n - i - 1);
+        jl_atomic_store_relaxed(dest_pa + n - i - 1, val);
         // `val` is young or old-unmarked
         if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
             jl_gc_queue_root(owner);
diff --git a/src/ast.c b/src/ast.c
index 94bbf48dde17e..bdc891ebd3e10 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -19,93 +19,103 @@
 extern "C" {
 #endif
 
-// MSVC complains about "julia_flisp.boot.inc : error C4335: Mac file format
-// detected: please convert the source file to either DOS or UNIX format"
-#ifdef _MSC_VER
-#pragma warning(disable:4335)
-#endif
-
-
 // head symbols for each expression type
-jl_sym_t *call_sym;    jl_sym_t *invoke_sym;
-jl_sym_t *empty_sym;   jl_sym_t *top_sym;
-jl_sym_t *module_sym;  jl_sym_t *slot_sym;
-jl_sym_t *export_sym;  jl_sym_t *import_sym;
-jl_sym_t *toplevel_sym; jl_sym_t *quote_sym;
-jl_sym_t *line_sym;    jl_sym_t *incomplete_sym;
-jl_sym_t *goto_sym;    jl_sym_t *goto_ifnot_sym;
-jl_sym_t *return_sym;  jl_sym_t *lineinfo_sym;
-jl_sym_t *lambda_sym;  jl_sym_t *assign_sym;
-jl_sym_t *globalref_sym; jl_sym_t *do_sym;
-jl_sym_t *method_sym;  jl_sym_t *core_sym;
-jl_sym_t *enter_sym;   jl_sym_t *leave_sym;
-jl_sym_t *pop_exception_sym;
-jl_sym_t *exc_sym;     jl_sym_t *error_sym;
-jl_sym_t *new_sym;     jl_sym_t *using_sym;
-jl_sym_t *splatnew_sym; jl_sym_t *block_sym;
-jl_sym_t *new_opaque_closure_sym;
-jl_sym_t *opaque_closure_method_sym;
-jl_sym_t *const_sym;   jl_sym_t *thunk_sym;
-jl_sym_t *foreigncall_sym; jl_sym_t *as_sym;
-jl_sym_t *global_sym; jl_sym_t *list_sym;
-jl_sym_t *dot_sym;    jl_sym_t *newvar_sym;
-jl_sym_t *boundscheck_sym; jl_sym_t *inbounds_sym;
-jl_sym_t *copyast_sym; jl_sym_t *cfunction_sym;
-jl_sym_t *pure_sym; jl_sym_t *loopinfo_sym;
-jl_sym_t *meta_sym; jl_sym_t *inert_sym;
-jl_sym_t *polly_sym; jl_sym_t *unused_sym;
-jl_sym_t *static_parameter_sym; jl_sym_t *inline_sym;
-jl_sym_t *noinline_sym; jl_sym_t *generated_sym;
-jl_sym_t *generated_only_sym; jl_sym_t *isdefined_sym;
-jl_sym_t *propagate_inbounds_sym; jl_sym_t *specialize_sym;
-jl_sym_t *aggressive_constprop_sym;
-jl_sym_t *nospecialize_sym; jl_sym_t *macrocall_sym;
-jl_sym_t *colon_sym; jl_sym_t *hygienicscope_sym;
-jl_sym_t *throw_undef_if_not_sym; jl_sym_t *getfield_undefref_sym;
-jl_sym_t *gc_preserve_begin_sym; jl_sym_t *gc_preserve_end_sym;
-jl_sym_t *coverageeffect_sym; jl_sym_t *escape_sym;
-jl_sym_t *aliasscope_sym; jl_sym_t *popaliasscope_sym;
-jl_sym_t *optlevel_sym; jl_sym_t *thismodule_sym;
-jl_sym_t *atom_sym; jl_sym_t *statement_sym; jl_sym_t *all_sym;
-jl_sym_t *compile_sym; jl_sym_t *infer_sym;
-
-jl_sym_t *atomic_sym;
-jl_sym_t *not_atomic_sym;
-jl_sym_t *unordered_sym;
-jl_sym_t *monotonic_sym;
-jl_sym_t *acquire_sym;
-jl_sym_t *release_sym;
-jl_sym_t *acquire_release_sym;
-jl_sym_t *sequentially_consistent_sym;
-
-
-static uint8_t flisp_system_image[] = {
+JL_DLLEXPORT jl_sym_t *jl_call_sym;
+JL_DLLEXPORT jl_sym_t *jl_invoke_sym;
+JL_DLLEXPORT jl_sym_t *jl_invoke_modify_sym;
+JL_DLLEXPORT jl_sym_t *jl_empty_sym;
+JL_DLLEXPORT jl_sym_t *jl_top_sym;
+JL_DLLEXPORT jl_sym_t *jl_module_sym;
+JL_DLLEXPORT jl_sym_t *jl_slot_sym;
+JL_DLLEXPORT jl_sym_t *jl_export_sym;
+JL_DLLEXPORT jl_sym_t *jl_import_sym;
+JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
+JL_DLLEXPORT jl_sym_t *jl_quote_sym;
+JL_DLLEXPORT jl_sym_t *jl_line_sym;
+JL_DLLEXPORT jl_sym_t *jl_incomplete_sym;
+JL_DLLEXPORT jl_sym_t *jl_goto_sym;
+JL_DLLEXPORT jl_sym_t *jl_goto_ifnot_sym;
+JL_DLLEXPORT jl_sym_t *jl_return_sym;
+JL_DLLEXPORT jl_sym_t *jl_lineinfo_sym;
+JL_DLLEXPORT jl_sym_t *jl_lambda_sym;
+JL_DLLEXPORT jl_sym_t *jl_assign_sym;
+JL_DLLEXPORT jl_sym_t *jl_globalref_sym;
+JL_DLLEXPORT jl_sym_t *jl_do_sym;
+JL_DLLEXPORT jl_sym_t *jl_method_sym;
+JL_DLLEXPORT jl_sym_t *jl_core_sym;
+JL_DLLEXPORT jl_sym_t *jl_enter_sym;
+JL_DLLEXPORT jl_sym_t *jl_leave_sym;
+JL_DLLEXPORT jl_sym_t *jl_pop_exception_sym;
+JL_DLLEXPORT jl_sym_t *jl_exc_sym;
+JL_DLLEXPORT jl_sym_t *jl_error_sym;
+JL_DLLEXPORT jl_sym_t *jl_new_sym;
+JL_DLLEXPORT jl_sym_t *jl_using_sym;
+JL_DLLEXPORT jl_sym_t *jl_splatnew_sym;
+JL_DLLEXPORT jl_sym_t *jl_block_sym;
+JL_DLLEXPORT jl_sym_t *jl_new_opaque_closure_sym;
+JL_DLLEXPORT jl_sym_t *jl_opaque_closure_method_sym;
+JL_DLLEXPORT jl_sym_t *jl_const_sym;
+JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
+JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
+JL_DLLEXPORT jl_sym_t *jl_as_sym;
+JL_DLLEXPORT jl_sym_t *jl_global_sym;
+JL_DLLEXPORT jl_sym_t *jl_list_sym;
+JL_DLLEXPORT jl_sym_t *jl_dot_sym;
+JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
+JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
+JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
+JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
+JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
+JL_DLLEXPORT jl_sym_t *jl_pure_sym;
+JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
+JL_DLLEXPORT jl_sym_t *jl_meta_sym;
+JL_DLLEXPORT jl_sym_t *jl_inert_sym;
+JL_DLLEXPORT jl_sym_t *jl_polly_sym;
+JL_DLLEXPORT jl_sym_t *jl_unused_sym;
+JL_DLLEXPORT jl_sym_t *jl_static_parameter_sym;
+JL_DLLEXPORT jl_sym_t *jl_inline_sym;
+JL_DLLEXPORT jl_sym_t *jl_noinline_sym;
+JL_DLLEXPORT jl_sym_t *jl_generated_sym;
+JL_DLLEXPORT jl_sym_t *jl_generated_only_sym;
+JL_DLLEXPORT jl_sym_t *jl_isdefined_sym;
+JL_DLLEXPORT jl_sym_t *jl_propagate_inbounds_sym;
+JL_DLLEXPORT jl_sym_t *jl_specialize_sym;
+JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
+JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
+JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
+JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
+JL_DLLEXPORT jl_sym_t *jl_colon_sym;
+JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
+JL_DLLEXPORT jl_sym_t *jl_throw_undef_if_not_sym;
+JL_DLLEXPORT jl_sym_t *jl_getfield_undefref_sym;
+JL_DLLEXPORT jl_sym_t *jl_gc_preserve_begin_sym;
+JL_DLLEXPORT jl_sym_t *jl_gc_preserve_end_sym;
+JL_DLLEXPORT jl_sym_t *jl_coverageeffect_sym;
+JL_DLLEXPORT jl_sym_t *jl_escape_sym;
+JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
+JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
+JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
+JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+JL_DLLEXPORT jl_sym_t *jl_atom_sym;
+JL_DLLEXPORT jl_sym_t *jl_statement_sym;
+JL_DLLEXPORT jl_sym_t *jl_all_sym;
+JL_DLLEXPORT jl_sym_t *jl_compile_sym;
+JL_DLLEXPORT jl_sym_t *jl_force_compile_sym;
+JL_DLLEXPORT jl_sym_t *jl_infer_sym;
+JL_DLLEXPORT jl_sym_t *jl_atomic_sym;
+JL_DLLEXPORT jl_sym_t *jl_not_atomic_sym;
+JL_DLLEXPORT jl_sym_t *jl_unordered_sym;
+JL_DLLEXPORT jl_sym_t *jl_monotonic_sym;
+JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
+JL_DLLEXPORT jl_sym_t *jl_release_sym;
+JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
+JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
+
+
+static const uint8_t flisp_system_image[] = {
 #include <julia_flisp.boot.inc>
 };
 
-typedef struct _jl_ast_context_list_t {
-    struct _jl_ast_context_list_t *next;
-    struct _jl_ast_context_list_t **prev;
-} jl_ast_context_list_t;
-
-STATIC_INLINE void jl_ast_context_list_insert(jl_ast_context_list_t **head,
-                                              jl_ast_context_list_t *node) JL_NOTSAFEPOINT
-{
-    jl_ast_context_list_t *next = *head;
-    if (next)
-        next->prev = &node->next;
-    node->next = next;
-    node->prev = head;
-    *head = node;
-}
-
-STATIC_INLINE void jl_ast_context_list_delete(jl_ast_context_list_t *node) JL_NOTSAFEPOINT
-{
-    if (node->next)
-        node->next->prev = node->prev;
-    *node->prev = node->next;
-}
-
 typedef struct _jl_ast_context_t {
     fl_context_t fl;
     fltype_t *jvtype;
@@ -116,33 +126,23 @@ typedef struct _jl_ast_context_t {
     value_t null_sym;
     value_t ssavalue_sym;
     value_t slot_sym;
-    jl_ast_context_list_t list;
-    int ref;
-    jl_task_t *task; // the current owner (user) of this jl_ast_context_t
     jl_module_t *module; // context module for `current-julia-module-counter`
+    struct _jl_ast_context_t *next; // invasive list pointer for getting free contexts
 } jl_ast_context_t;
 
 static jl_ast_context_t jl_ast_main_ctx;
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 jl_ast_context_t *jl_ast_ctx(fl_context_t *fl) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #else
 #define jl_ast_ctx(fl_ctx) container_of(fl_ctx, jl_ast_context_t, fl)
 #endif
-#define jl_ast_context_list_item(node)          \
-    container_of(node, jl_ast_context_t, list)
 
 struct macroctx_stack {
     jl_module_t *m;
     struct macroctx_stack *parent;
 };
 
-#define JL_AST_PRESERVE_PUSH(ctx, old, inmodule)  \
-    jl_module_t *(old) = ctx->module;           \
-    ctx->module = (inmodule)
-#define JL_AST_PRESERVE_POP(ctx, old)           \
-    ctx->module = (old)
-
 static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod);
 static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v);
 static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error);
@@ -158,19 +158,19 @@ static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint
     return (b != NULL && b->owner == ctx->module) ? fl_ctx->T : fl_ctx->F;
 }
 
-static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
 {
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
     assert(ctx->module);
     return fixnum(jl_module_next_counter(ctx->module));
 }
 
-static value_t fl_julia_current_file(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+static value_t fl_julia_current_file(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
 {
     return symbol(fl_ctx, jl_filename);
 }
 
-static value_t fl_julia_current_line(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+static value_t fl_julia_current_line(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
 {
     return fixnum(jl_lineno);
 }
@@ -193,59 +193,18 @@ static value_t fl_julia_scalar(fl_context_t *fl_ctx, value_t *args, uint32_t nar
 
 static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *mod);
 
-static value_t fl_julia_logmsg(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
-{
-    int kwargs_len = (int)nargs - 6;
-    if (nargs < 6 || kwargs_len % 2 != 0) {
-        lerror(fl_ctx, fl_ctx->ArgError, "julia-logmsg: bad argument list - expected "
-               "level (symbol) group (symbol) id file line msg . kwargs");
-    }
-    value_t arg_level = args[0];
-    value_t arg_group = args[1];
-    value_t arg_id    = args[2];
-    value_t arg_file  = args[3];
-    value_t arg_line  = args[4];
-    value_t arg_msg   = args[5];
-    value_t *arg_kwargs = args + 6;
-    if (!isfixnum(arg_level) || !issymbol(arg_group) || !issymbol(arg_id) ||
-        !issymbol(arg_file) || !isfixnum(arg_line) || !fl_isstring(fl_ctx, arg_msg)) {
-        lerror(fl_ctx, fl_ctx->ArgError,
-               "julia-logmsg: Unexpected type in argument list");
-    }
-
-    // Abuse scm_to_julia here to convert arguments.  This is meant for `Expr`s
-    // but should be good enough provided we're only passing simple numbers,
-    // symbols and strings.
-    jl_value_t *group=NULL, *id=NULL, *file=NULL, *line=NULL, *msg=NULL;
-    jl_array_t *kwargs=NULL;
-    JL_GC_PUSH6(&group, &id, &file, &line, &msg, &kwargs);
-    group = scm_to_julia(fl_ctx, arg_group, NULL);
-    id    = scm_to_julia(fl_ctx, arg_id, NULL);
-    file  = scm_to_julia(fl_ctx, arg_file, NULL);
-    line  = scm_to_julia(fl_ctx, arg_line, NULL);
-    msg   = scm_to_julia(fl_ctx, arg_msg, NULL);
-    kwargs = jl_alloc_vec_any(kwargs_len);
-    for (int i = 0; i < kwargs_len; ++i) {
-        jl_array_ptr_set(kwargs, i, scm_to_julia(fl_ctx, arg_kwargs[i], NULL));
-    }
-    jl_log(numval(arg_level), NULL, group, id, file, line, (jl_value_t*)kwargs, msg);
-    JL_GC_POP();
-    return fl_ctx->T;
-}
-
 static const builtinspec_t julia_flisp_ast_ext[] = {
-    { "defined-julia-global", fl_defined_julia_global },
+    { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint
     { "current-julia-module-counter", fl_current_module_counter },
-    { "julia-scalar?", fl_julia_scalar },
-    { "julia-logmsg", fl_julia_logmsg },
+    { "julia-scalar?", fl_julia_scalar }, // TODO: can we kill this safepoint? (from jl_isa)
     { "julia-current-file", fl_julia_current_file },
     { "julia-current-line", fl_julia_current_line },
     { NULL, NULL }
 };
 
-static void jl_init_ast_ctx(jl_ast_context_t *ast_ctx) JL_NOTSAFEPOINT
+static void jl_init_ast_ctx(jl_ast_context_t *ctx) JL_NOTSAFEPOINT
 {
-    fl_context_t *fl_ctx = &ast_ctx->fl;
+    fl_context_t *fl_ctx = &ctx->fl;
     fl_init(fl_ctx, 4*1024*1024);
 
     if (fl_load_system_image_str(fl_ctx, (char*)flisp_system_image,
@@ -255,7 +214,6 @@ static void jl_init_ast_ctx(jl_ast_context_t *ast_ctx) JL_NOTSAFEPOINT
 
     fl_applyn(fl_ctx, 0, symbol_value(symbol(fl_ctx, "__init_globals")));
 
-    jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
     ctx->jvtype = define_opaque_type(fl_ctx->jl_sym, sizeof(void*), NULL, NULL);
     assign_global_builtins(fl_ctx, julia_flisp_ast_ext);
     ctx->true_sym = symbol(fl_ctx, "true");
@@ -264,76 +222,48 @@ static void jl_init_ast_ctx(jl_ast_context_t *ast_ctx) JL_NOTSAFEPOINT
     ctx->null_sym = symbol(fl_ctx, "null");
     ctx->ssavalue_sym = symbol(fl_ctx, "ssavalue");
     ctx->slot_sym = symbol(fl_ctx, "slot");
-    ctx->task = NULL;
     ctx->module = NULL;
-    set(symbol(fl_ctx, "*depwarn-opt*"), fixnum(jl_options.depwarn));
     set(symbol(fl_ctx, "*scopewarn-opt*"), fixnum(jl_options.warn_scope));
 }
 
 // There should be no GC allocation while holding this lock
-static jl_mutex_t flisp_lock;
-static jl_ast_context_list_t *jl_ast_ctx_using = NULL;
-static jl_ast_context_list_t *jl_ast_ctx_freed = NULL;
+static uv_mutex_t flisp_lock;
+static jl_ast_context_t *jl_ast_ctx_freed = NULL;
 
-static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
+static jl_ast_context_t *jl_ast_ctx_enter(jl_module_t *m) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
     JL_SIGATOMIC_BEGIN();
-    JL_LOCK_NOGC(&flisp_lock);
-    jl_ast_context_list_t *node;
-    jl_ast_context_t *ctx;
-    // First check if the current task is using one of the contexts
-    for (node = jl_ast_ctx_using;node;(node = node->next)) {
-        ctx = jl_ast_context_list_item(node);
-        if (ctx->task == ct) {
-            ctx->ref++;
-            JL_UNLOCK_NOGC(&flisp_lock);
-            return ctx;
-        }
+    uv_mutex_lock(&flisp_lock);
+    jl_ast_context_t *ctx = jl_ast_ctx_freed;
+    if (ctx != NULL) {
+        jl_ast_ctx_freed = ctx->next;
+        ctx->next = NULL;
     }
-    // If not, grab one from the free list
-    if ((node = jl_ast_ctx_freed)) {
-        jl_ast_context_list_delete(node);
-        jl_ast_context_list_insert(&jl_ast_ctx_using, node);
-        ctx = jl_ast_context_list_item(node);
-        ctx->ref = 1;
-        ctx->task = ct;
-        ctx->module = NULL;
-        JL_UNLOCK_NOGC(&flisp_lock);
-        return ctx;
+    uv_mutex_unlock(&flisp_lock);
+    if (ctx == NULL) {
+        // Construct a new one if we can't find any
+        ctx = (jl_ast_context_t*)calloc(1, sizeof(jl_ast_context_t));
+        jl_init_ast_ctx(ctx);
     }
-    // Construct a new one if we can't find any
-    ctx = (jl_ast_context_t*)calloc(1, sizeof(jl_ast_context_t));
-    ctx->ref = 1;
-    ctx->task = ct;
-    node = &ctx->list;
-    jl_ast_context_list_insert(&jl_ast_ctx_using, node);
-    JL_UNLOCK_NOGC(&flisp_lock);
-    jl_init_ast_ctx(ctx);
+    ctx->module = m;
     return ctx;
 }
 
 static void jl_ast_ctx_leave(jl_ast_context_t *ctx)
 {
+    uv_mutex_lock(&flisp_lock);
+    ctx->module = NULL;
+    ctx->next = jl_ast_ctx_freed;
+    jl_ast_ctx_freed = ctx;
+    uv_mutex_unlock(&flisp_lock);
     JL_SIGATOMIC_END();
-    if (--ctx->ref)
-        return;
-    JL_LOCK_NOGC(&flisp_lock);
-    ctx->task = NULL;
-    jl_ast_context_list_t *node = &ctx->list;
-    jl_ast_context_list_delete(node);
-    jl_ast_context_list_insert(&jl_ast_ctx_freed, node);
-    JL_UNLOCK_NOGC(&flisp_lock);
 }
 
 void jl_init_flisp(void)
 {
-    jl_task_t *ct = jl_current_task;
-    if (jl_ast_ctx_using || jl_ast_ctx_freed)
+    if (jl_ast_ctx_freed)
         return;
-    jl_ast_main_ctx.ref = 1;
-    jl_ast_main_ctx.task = ct;
-    jl_ast_context_list_insert(&jl_ast_ctx_using, &jl_ast_main_ctx.list);
+    uv_mutex_init(&flisp_lock);
     jl_init_ast_ctx(&jl_ast_main_ctx);
     // To match the one in jl_ast_ctx_leave
     JL_SIGATOMIC_BEGIN();
@@ -342,93 +272,96 @@ void jl_init_flisp(void)
 
 void jl_init_common_symbols(void)
 {
-    empty_sym = jl_symbol("");
-    call_sym = jl_symbol("call");
-    invoke_sym = jl_symbol("invoke");
-    foreigncall_sym = jl_symbol("foreigncall");
-    cfunction_sym = jl_symbol("cfunction");
-    quote_sym = jl_symbol("quote");
-    inert_sym = jl_symbol("inert");
-    top_sym = jl_symbol("top");
-    core_sym = jl_symbol("core");
-    globalref_sym = jl_symbol("globalref");
-    line_sym = jl_symbol("line");
-    lineinfo_sym = jl_symbol("lineinfo");
-    incomplete_sym = jl_symbol("incomplete");
-    error_sym = jl_symbol("error");
-    goto_sym = jl_symbol("goto");
-    goto_ifnot_sym = jl_symbol("gotoifnot");
-    return_sym = jl_symbol("return");
-    lambda_sym = jl_symbol("lambda");
-    module_sym = jl_symbol("module");
-    export_sym = jl_symbol("export");
-    import_sym = jl_symbol("import");
-    using_sym = jl_symbol("using");
-    assign_sym = jl_symbol("=");
-    method_sym = jl_symbol("method");
-    exc_sym = jl_symbol("the_exception");
-    enter_sym = jl_symbol("enter");
-    leave_sym = jl_symbol("leave");
-    pop_exception_sym = jl_symbol("pop_exception");
-    new_sym = jl_symbol("new");
-    splatnew_sym = jl_symbol("splatnew");
-    new_opaque_closure_sym = jl_symbol("new_opaque_closure");
-    opaque_closure_method_sym = jl_symbol("opaque_closure_method");
-    const_sym = jl_symbol("const");
-    global_sym = jl_symbol("global");
-    thunk_sym = jl_symbol("thunk");
-    toplevel_sym = jl_symbol("toplevel");
-    dot_sym = jl_symbol(".");
-    as_sym = jl_symbol("as");
-    colon_sym = jl_symbol(":");
-    boundscheck_sym = jl_symbol("boundscheck");
-    inbounds_sym = jl_symbol("inbounds");
-    newvar_sym = jl_symbol("newvar");
-    copyast_sym = jl_symbol("copyast");
-    loopinfo_sym = jl_symbol("loopinfo");
-    pure_sym = jl_symbol("pure");
-    meta_sym = jl_symbol("meta");
-    list_sym = jl_symbol("list");
-    unused_sym = jl_symbol("#unused#");
-    slot_sym = jl_symbol("slot");
-    static_parameter_sym = jl_symbol("static_parameter");
-    inline_sym = jl_symbol("inline");
-    noinline_sym = jl_symbol("noinline");
-    polly_sym = jl_symbol("polly");
-    propagate_inbounds_sym = jl_symbol("propagate_inbounds");
-    aggressive_constprop_sym = jl_symbol("aggressive_constprop");
-    isdefined_sym = jl_symbol("isdefined");
-    nospecialize_sym = jl_symbol("nospecialize");
-    specialize_sym = jl_symbol("specialize");
-    optlevel_sym = jl_symbol("optlevel");
-    compile_sym = jl_symbol("compile");
-    infer_sym = jl_symbol("infer");
-    macrocall_sym = jl_symbol("macrocall");
-    escape_sym = jl_symbol("escape");
-    hygienicscope_sym = jl_symbol("hygienic-scope");
-    gc_preserve_begin_sym = jl_symbol("gc_preserve_begin");
-    gc_preserve_end_sym = jl_symbol("gc_preserve_end");
-    generated_sym = jl_symbol("generated");
-    generated_only_sym = jl_symbol("generated_only");
-    throw_undef_if_not_sym = jl_symbol("throw_undef_if_not");
-    getfield_undefref_sym = jl_symbol("##getfield##");
-    do_sym = jl_symbol("do");
-    coverageeffect_sym = jl_symbol("code_coverage_effect");
-    aliasscope_sym = jl_symbol("aliasscope");
-    popaliasscope_sym = jl_symbol("popaliasscope");
-    thismodule_sym = jl_symbol("thismodule");
-    block_sym = jl_symbol("block");
-    atom_sym = jl_symbol("atom");
-    statement_sym = jl_symbol("statement");
-    all_sym = jl_symbol("all");
-    atomic_sym = jl_symbol("atomic");
-    not_atomic_sym = jl_symbol("not_atomic");
-    unordered_sym = jl_symbol("unordered");
-    monotonic_sym = jl_symbol("monotonic");
-    acquire_sym = jl_symbol("acquire");
-    release_sym = jl_symbol("release");
-    acquire_release_sym = jl_symbol("acquire_release");
-    sequentially_consistent_sym = jl_symbol("sequentially_consistent");
+    jl_empty_sym = jl_symbol("");
+    jl_call_sym = jl_symbol("call");
+    jl_invoke_sym = jl_symbol("invoke");
+    jl_invoke_modify_sym = jl_symbol("invoke_modify");
+    jl_foreigncall_sym = jl_symbol("foreigncall");
+    jl_cfunction_sym = jl_symbol("cfunction");
+    jl_quote_sym = jl_symbol("quote");
+    jl_inert_sym = jl_symbol("inert");
+    jl_top_sym = jl_symbol("top");
+    jl_core_sym = jl_symbol("core");
+    jl_globalref_sym = jl_symbol("globalref");
+    jl_line_sym = jl_symbol("line");
+    jl_lineinfo_sym = jl_symbol("lineinfo");
+    jl_incomplete_sym = jl_symbol("incomplete");
+    jl_error_sym = jl_symbol("error");
+    jl_goto_sym = jl_symbol("goto");
+    jl_goto_ifnot_sym = jl_symbol("gotoifnot");
+    jl_return_sym = jl_symbol("return");
+    jl_lambda_sym = jl_symbol("lambda");
+    jl_module_sym = jl_symbol("module");
+    jl_export_sym = jl_symbol("export");
+    jl_import_sym = jl_symbol("import");
+    jl_using_sym = jl_symbol("using");
+    jl_assign_sym = jl_symbol("=");
+    jl_method_sym = jl_symbol("method");
+    jl_exc_sym = jl_symbol("the_exception");
+    jl_enter_sym = jl_symbol("enter");
+    jl_leave_sym = jl_symbol("leave");
+    jl_pop_exception_sym = jl_symbol("pop_exception");
+    jl_new_sym = jl_symbol("new");
+    jl_splatnew_sym = jl_symbol("splatnew");
+    jl_new_opaque_closure_sym = jl_symbol("new_opaque_closure");
+    jl_opaque_closure_method_sym = jl_symbol("opaque_closure_method");
+    jl_const_sym = jl_symbol("const");
+    jl_global_sym = jl_symbol("global");
+    jl_thunk_sym = jl_symbol("thunk");
+    jl_toplevel_sym = jl_symbol("toplevel");
+    jl_dot_sym = jl_symbol(".");
+    jl_as_sym = jl_symbol("as");
+    jl_colon_sym = jl_symbol(":");
+    jl_boundscheck_sym = jl_symbol("boundscheck");
+    jl_inbounds_sym = jl_symbol("inbounds");
+    jl_newvar_sym = jl_symbol("newvar");
+    jl_copyast_sym = jl_symbol("copyast");
+    jl_loopinfo_sym = jl_symbol("loopinfo");
+    jl_pure_sym = jl_symbol("pure");
+    jl_meta_sym = jl_symbol("meta");
+    jl_list_sym = jl_symbol("list");
+    jl_unused_sym = jl_symbol("#unused#");
+    jl_slot_sym = jl_symbol("slot");
+    jl_static_parameter_sym = jl_symbol("static_parameter");
+    jl_inline_sym = jl_symbol("inline");
+    jl_noinline_sym = jl_symbol("noinline");
+    jl_polly_sym = jl_symbol("polly");
+    jl_propagate_inbounds_sym = jl_symbol("propagate_inbounds");
+    jl_aggressive_constprop_sym = jl_symbol("aggressive_constprop");
+    jl_no_constprop_sym = jl_symbol("no_constprop");
+    jl_isdefined_sym = jl_symbol("isdefined");
+    jl_nospecialize_sym = jl_symbol("nospecialize");
+    jl_specialize_sym = jl_symbol("specialize");
+    jl_optlevel_sym = jl_symbol("optlevel");
+    jl_compile_sym = jl_symbol("compile");
+    jl_force_compile_sym = jl_symbol("force_compile");
+    jl_infer_sym = jl_symbol("infer");
+    jl_macrocall_sym = jl_symbol("macrocall");
+    jl_escape_sym = jl_symbol("escape");
+    jl_hygienicscope_sym = jl_symbol("hygienic-scope");
+    jl_gc_preserve_begin_sym = jl_symbol("gc_preserve_begin");
+    jl_gc_preserve_end_sym = jl_symbol("gc_preserve_end");
+    jl_generated_sym = jl_symbol("generated");
+    jl_generated_only_sym = jl_symbol("generated_only");
+    jl_throw_undef_if_not_sym = jl_symbol("throw_undef_if_not");
+    jl_getfield_undefref_sym = jl_symbol("##getfield##");
+    jl_do_sym = jl_symbol("do");
+    jl_coverageeffect_sym = jl_symbol("code_coverage_effect");
+    jl_aliasscope_sym = jl_symbol("aliasscope");
+    jl_popaliasscope_sym = jl_symbol("popaliasscope");
+    jl_thismodule_sym = jl_symbol("thismodule");
+    jl_block_sym = jl_symbol("block");
+    jl_atom_sym = jl_symbol("atom");
+    jl_statement_sym = jl_symbol("statement");
+    jl_all_sym = jl_symbol("all");
+    jl_atomic_sym = jl_symbol("atomic");
+    jl_not_atomic_sym = jl_symbol("not_atomic");
+    jl_unordered_sym = jl_symbol("unordered");
+    jl_monotonic_sym = jl_symbol("monotonic");
+    jl_acquire_sym = jl_symbol("acquire");
+    jl_release_sym = jl_symbol("release");
+    jl_acquire_release_sym = jl_symbol("acquire_release");
+    jl_sequentially_consistent_sym = jl_symbol("sequentially_consistent");
 }
 
 JL_DLLEXPORT void jl_lisp_prompt(void)
@@ -437,17 +370,15 @@ JL_DLLEXPORT void jl_lisp_prompt(void)
     // We don't have our signal handler registered in that case anyway...
     JL_SIGATOMIC_BEGIN();
     jl_init_flisp();
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, jl_main_module);
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(jl_main_module);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "__start")), fl_cons(fl_ctx, fl_ctx->NIL,fl_ctx->NIL));
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
 }
 
 JL_DLLEXPORT void fl_show_profile(void)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 0, symbol_value(symbol(fl_ctx, "show-profiles")));
     jl_ast_ctx_leave(ctx);
@@ -455,7 +386,7 @@ JL_DLLEXPORT void fl_show_profile(void)
 
 JL_DLLEXPORT void fl_clear_profile(void)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 0, symbol_value(symbol(fl_ctx, "clear-profiles")));
     jl_ast_ctx_leave(ctx);
@@ -463,7 +394,7 @@ JL_DLLEXPORT void fl_clear_profile(void)
 
 JL_DLLEXPORT void fl_profile(const char *fname)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "profile-e")), symbol(fl_ctx, fname));
     jl_ast_ctx_leave(ctx);
@@ -492,7 +423,7 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
     }
     JL_CATCH {
         // if expression cannot be converted, replace with error expr
-        jl_expr_t *ex = jl_exprn(error_sym, 1);
+        jl_expr_t *ex = jl_exprn(jl_error_sym, 1);
         v = (jl_value_t*)ex;
         jl_array_ptr_set(ex->args, 0, jl_cstr_to_string("invalid AST"));
     }
@@ -566,7 +497,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
         if (issymbol(hd))
             sym = scmsym_to_julia(fl_ctx, hd);
         else
-            sym = list_sym;
+            sym = jl_list_sym;
         size_t n = llength(e)-1;
         if (issymbol(hd))
             e = cdr_(e);
@@ -574,7 +505,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             n++;
         // nodes with special representations
         jl_value_t *ex = NULL, *temp = NULL;
-        if (sym == line_sym && (n == 1 || n == 2)) {
+        if (sym == jl_line_sym && (n == 1 || n == 2)) {
             jl_value_t *linenum = scm_to_julia_(fl_ctx, car_(e), mod);
             jl_value_t *file = jl_nothing;
             JL_GC_PUSH2(&linenum, &file);
@@ -584,7 +515,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             JL_GC_POP();
             return temp;
         }
-        else if (sym == lineinfo_sym && n == 5) {
+        else if (sym == jl_lineinfo_sym && n == 5) {
             jl_value_t *modu=NULL, *name=NULL, *file=NULL, *linenum=NULL, *inlinedat=NULL;
             JL_GC_PUSH5(&modu, &name, &file, &linenum, &inlinedat);
             value_t lst = e;
@@ -602,41 +533,41 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             return temp;
         }
         JL_GC_PUSH2(&ex, &temp);
-        if (sym == goto_sym) {
+        if (sym == jl_goto_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_gotonode_type, ex);
         }
-        else if (sym == goto_ifnot_sym) {
+        else if (sym == jl_goto_ifnot_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
             temp = jl_new_struct(jl_gotoifnot_type, ex, temp);
         }
-        else if (sym == newvar_sym) {
+        else if (sym == jl_newvar_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_newvarnode_type, ex);
         }
-        else if (sym == globalref_sym) {
+        else if (sym == jl_globalref_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = scm_to_julia_(fl_ctx, car_(cdr_(e)), mod);
             assert(jl_is_module(ex));
             assert(jl_is_symbol(temp));
             temp = jl_module_globalref((jl_module_t*)ex, (jl_sym_t*)temp);
         }
-        else if (sym == top_sym) {
+        else if (sym == jl_top_sym) {
             assert(mod && "top should not be generated by the parser");
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             assert(jl_is_symbol(ex));
             temp = jl_module_globalref(jl_base_relative_to(mod), (jl_sym_t*)ex);
         }
-        else if (sym == core_sym) {
+        else if (sym == jl_core_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             assert(jl_is_symbol(ex));
             temp = jl_module_globalref(jl_core_module, (jl_sym_t*)ex);
         }
-        else if (sym == thismodule_sym) {
+        else if (sym == jl_thismodule_sym) {
             temp = (jl_value_t*)mod;
         }
-        else if (iscons(e) && (sym == inert_sym || (sym == quote_sym && (!iscons(car_(e)))))) {
+        else if (iscons(e) && (sym == jl_inert_sym || (sym == jl_quote_sym && (!iscons(car_(e)))))) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_quotenode_type, ex);
         }
@@ -651,10 +582,10 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             jl_array_ptr_set(((jl_expr_t*)ex)->args, i, scm_to_julia_(fl_ctx, car_(e), mod));
             e = cdr_(e);
         }
-        if (sym == lambda_sym)
+        if (sym == jl_lambda_sym)
             ex = (jl_value_t*)jl_new_code_info_from_ir((jl_expr_t*)ex);
         JL_GC_POP();
-        if (sym == list_sym)
+        if (sym == jl_list_sym)
             return (jl_value_t*)((jl_expr_t*)ex)->args;
         return (jl_value_t*)ex;
     }
@@ -778,11 +709,11 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali
         jl_expr_t *ex = (jl_expr_t*)v;
         value_t args = fl_ctx->NIL;
         fl_gc_handle(fl_ctx, &args);
-        if (jl_expr_nargs(ex) > 520000 && ex->head != block_sym)
+        if (jl_expr_nargs(ex) > 520000 && ex->head != jl_block_sym)
             lerror(fl_ctx, symbol(fl_ctx, "error"), "expression too large");
         array_to_list(fl_ctx, ex->args, &args, check_valid);
         value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)ex->head, check_valid);
-        if (ex->head == lambda_sym && jl_expr_nargs(ex)>0 && jl_is_array(jl_exprarg(ex,0))) {
+        if (ex->head == jl_lambda_sym && jl_expr_nargs(ex)>0 && jl_is_array(jl_exprarg(ex,0))) {
             value_t llist = fl_ctx->NIL;
             fl_gc_handle(fl_ctx, &llist);
             array_to_list(fl_ctx, (jl_array_t*)jl_exprarg(ex,0), &llist, check_valid);
@@ -801,26 +732,26 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali
         jl_value_t *line = jl_fieldref(v,0);
         value_t args = julia_to_list2_noalloc(fl_ctx, line, file, check_valid);
         fl_gc_handle(fl_ctx, &args);
-        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)line_sym, check_valid);
+        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)jl_line_sym, check_valid);
         value_t scmv = fl_cons(fl_ctx, hd, args);
         fl_free_gc_handles(fl_ctx, 1);
         return scmv;
     }
     if (jl_typeis(v, jl_gotonode_type))
-        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)goto_sym, jl_fieldref(v,0), check_valid);
+        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_goto_sym, jl_fieldref(v,0), check_valid);
     if (jl_typeis(v, jl_quotenode_type))
-        return julia_to_list2(fl_ctx, (jl_value_t*)inert_sym, jl_fieldref_noalloc(v,0), 0);
+        return julia_to_list2(fl_ctx, (jl_value_t*)jl_inert_sym, jl_fieldref_noalloc(v,0), 0);
     if (jl_typeis(v, jl_newvarnode_type))
-        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)newvar_sym, jl_fieldref(v,0), check_valid);
+        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_newvar_sym, jl_fieldref(v,0), check_valid);
     if (jl_typeis(v, jl_globalref_type)) {
         jl_module_t *m = jl_globalref_mod(v);
         jl_sym_t *sym = jl_globalref_name(v);
         if (m == jl_core_module)
-            return julia_to_list2(fl_ctx, (jl_value_t*)core_sym,
+            return julia_to_list2(fl_ctx, (jl_value_t*)jl_core_sym,
                                   (jl_value_t*)sym, check_valid);
         value_t args = julia_to_list2(fl_ctx, (jl_value_t*)m, (jl_value_t*)sym, check_valid);
         fl_gc_handle(fl_ctx, &args);
-        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)globalref_sym, check_valid);
+        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)jl_globalref_sym, check_valid);
         value_t scmv = fl_cons(fl_ctx, hd, args);
         fl_free_gc_handles(fl_ctx, 1);
         return scmv;
@@ -841,14 +772,14 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
         jl_bounds_error(textstr, jl_box_long(offset+1));
     }
     jl_sym_t *rule = (jl_sym_t*)options;
-    if (rule != atom_sym && rule != statement_sym && rule != all_sym) {
+    if (rule != jl_atom_sym && rule != jl_statement_sym && rule != jl_all_sym) {
         jl_error("jl_fl_parse: unrecognized parse options");
     }
-    if (offset != 0 && rule == all_sym) {
+    if (offset != 0 && rule == jl_all_sym) {
         jl_error("Parse `all`: offset not supported");
     }
 
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     value_t fl_text = cvalue_static_cstrn(fl_ctx, text, text_len);
     fl_gc_handle(fl_ctx, &fl_text);
@@ -857,14 +788,14 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
     fl_gc_handle(fl_ctx, &fl_filename);
     value_t fl_expr;
     size_t offset1 = 0;
-    if (rule == all_sym) {
+    if (rule == jl_all_sym) {
         value_t e = fl_applyn(fl_ctx, 2, symbol_value(symbol(fl_ctx, "jl-parse-all")),
                               fl_text, fl_filename);
         fl_expr = e;
         offset1 = e == fl_ctx->FL_EOF ? text_len : 0;
     }
     else {
-        value_t greedy = rule == statement_sym ? fl_ctx->T : fl_ctx->F;
+        value_t greedy = rule == jl_statement_sym ? fl_ctx->T : fl_ctx->F;
         value_t p = fl_applyn(fl_ctx, 4, symbol_value(symbol(fl_ctx, "jl-parse-one")),
                               fl_text, fl_filename, fixnum(offset), greedy);
         fl_expr = car_(p);
@@ -886,14 +817,12 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
 // returns either an expression or a thunk
 jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, inmodule);
     value_t arg = julia_to_scm(fl_ctx, expr);
     value_t e = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, funcname)), arg);
     jl_value_t *result = scm_to_julia(fl_ctx, e, inmodule);
     JL_GC_PUSH1(&result);
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
     JL_GC_POP();
     return result;
@@ -902,15 +831,13 @@ jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module
 static jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr,
                                               jl_module_t *inmodule, const char *file, int line)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, inmodule);
     value_t arg = julia_to_scm(fl_ctx, expr);
     value_t e = fl_applyn(fl_ctx, 3, symbol_value(symbol(fl_ctx, funcname)), arg,
                           symbol(fl_ctx, file), fixnum(line));
     jl_value_t *result = scm_to_julia(fl_ctx, e, inmodule);
     JL_GC_PUSH1(&result);
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
     JL_GC_POP();
     return result;
@@ -994,7 +921,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
 
 JL_DLLEXPORT int jl_is_operator(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "operator?")), symbol(fl_ctx, sym)) == fl_ctx->T;
     jl_ast_ctx_leave(ctx);
@@ -1003,7 +930,7 @@ JL_DLLEXPORT int jl_is_operator(char *sym)
 
 JL_DLLEXPORT int jl_is_unary_operator(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "unary-op?")), symbol(fl_ctx, sym)) == fl_ctx->T;
     jl_ast_ctx_leave(ctx);
@@ -1012,7 +939,7 @@ JL_DLLEXPORT int jl_is_unary_operator(char *sym)
 
 JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "unary-and-binary-op?")), symbol(fl_ctx, sym)) == fl_ctx->T;
     jl_ast_ctx_leave(ctx);
@@ -1021,7 +948,7 @@ JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
 
 JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "syntactic-op?")), symbol(fl_ctx, sym)) == fl_ctx->T;
     jl_ast_ctx_leave(ctx);
@@ -1030,7 +957,7 @@ JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
 
 JL_DLLEXPORT int jl_operator_precedence(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = numval(fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "operator-precedence")), symbol(fl_ctx, sym)));
     jl_ast_ctx_leave(ctx);
@@ -1042,7 +969,7 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
     size_t i, l = jl_array_len(body);
     for (i = 0; i < l; i++) {
         jl_expr_t *stmt = (jl_expr_t*)jl_array_ptr_ref(body, i);
-        if (jl_is_expr((jl_value_t*)stmt) && stmt->head == meta_sym) {
+        if (jl_is_expr((jl_value_t*)stmt) && stmt->head == jl_meta_sym) {
             size_t i, l = jl_array_len(stmt->args);
             for (i = 0; i < l; i++)
                 if (jl_array_ptr_ref(stmt->args, i) == (jl_value_t*)sym)
@@ -1073,7 +1000,9 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
         margs[i] = jl_array_ptr_ref(args, i - 1);
 
     size_t last_age = ct->world_age;
-    ct->world_age = world < jl_world_counter ? world : jl_world_counter;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    if (ct->world_age > world)
+        ct->world_age = world;
     jl_value_t *result;
     JL_TRY {
         margs[0] = jl_toplevel_eval(*ctx, margs[0]);
@@ -1112,20 +1041,20 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     if (!expr || !jl_is_expr(expr))
         return expr;
     jl_expr_t *e = (jl_expr_t*)expr;
-    if (e->head == inert_sym ||
-        e->head == module_sym ||
-        //e->head == toplevel_sym || // TODO: enable this once julia-expand-macroscope is fixed / removed
-        e->head == meta_sym) {
+    if (e->head == jl_inert_sym ||
+        e->head == jl_module_sym ||
+        //e->head == jl_toplevel_sym || // TODO: enable this once julia-expand-macroscope is fixed / removed
+        e->head == jl_meta_sym) {
         return expr;
     }
-    if (e->head == quote_sym && jl_expr_nargs(e) == 1) {
+    if (e->head == jl_quote_sym && jl_expr_nargs(e) == 1) {
         expr = jl_call_scm_on_ast("julia-bq-macro", jl_exprarg(e, 0), inmodule);
         JL_GC_PUSH1(&expr);
         expr = jl_expand_macros(expr, inmodule, macroctx, onelevel, world, throw_load_error);
         JL_GC_POP();
         return expr;
     }
-    if (e->head == hygienicscope_sym && jl_expr_nargs(e) == 2) {
+    if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) == 2) {
         struct macroctx_stack newctx;
         newctx.m = (jl_module_t*)jl_exprarg(e, 1);
         JL_TYPECHK(hygienic-scope, module, (jl_value_t*)newctx.m);
@@ -1136,7 +1065,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
             jl_array_ptr_set(e->args, 0, a2);
         return expr;
     }
-    if (e->head == macrocall_sym) {
+    if (e->head == jl_macrocall_sym) {
         struct macroctx_stack newctx;
         newctx.m = macroctx ? macroctx->m : inmodule;
         newctx.parent = macroctx;
@@ -1144,10 +1073,10 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         jl_value_t *wrap = NULL;
         JL_GC_PUSH3(&result, &wrap, &newctx.m);
         // copy and wrap the result in `(hygienic-scope ,result ,newctx)
-        if (jl_is_expr(result) && ((jl_expr_t*)result)->head == escape_sym)
+        if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym)
             result = jl_exprarg(result, 0);
         else
-            wrap = (jl_value_t*)jl_exprn(hygienicscope_sym, 2);
+            wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 2);
         result = jl_copy_ast(result);
         if (!onelevel)
             result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel, world, throw_load_error);
@@ -1159,11 +1088,11 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         JL_GC_POP();
         return result;
     }
-    if (e->head == do_sym && jl_expr_nargs(e) == 2 && jl_is_expr(jl_exprarg(e, 0)) &&
-        ((jl_expr_t*)jl_exprarg(e, 0))->head == macrocall_sym) {
+    if (e->head == jl_do_sym && jl_expr_nargs(e) == 2 && jl_is_expr(jl_exprarg(e, 0)) &&
+        ((jl_expr_t*)jl_exprarg(e, 0))->head == jl_macrocall_sym) {
         jl_expr_t *mc = (jl_expr_t*)jl_exprarg(e, 0);
         size_t nm = jl_expr_nargs(mc);
-        jl_expr_t *mc2 = jl_exprn(macrocall_sym, nm+1);
+        jl_expr_t *mc2 = jl_exprn(jl_macrocall_sym, nm+1);
         JL_GC_PUSH1(&mc2);
         jl_exprargset(mc2, 0, jl_exprarg(mc, 0));  // macro name
         jl_exprargset(mc2, 1, jl_exprarg(mc, 1));  // location
@@ -1176,7 +1105,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         JL_GC_POP();
         return ret;
     }
-    if (e->head == escape_sym && macroctx) {
+    if (e->head == jl_escape_sym && macroctx) {
         macroctx = macroctx->parent;
     }
 
@@ -1195,7 +1124,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_world_counter, 0);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_atomic_load_acquire(&jl_world_counter), 0);
     expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
     JL_GC_POP();
     return expr;
@@ -1206,7 +1135,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand1(jl_value_t *expr, jl_module_t *inmodule
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_world_counter, 0);
+    expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_atomic_load_acquire(&jl_world_counter), 0);
     expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
     JL_GC_POP();
     return expr;
@@ -1243,18 +1172,45 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *
                                                  const char *file, int line)
 {
     JL_TIMING(LOWERING);
-    JL_GC_PUSH1(&expr);
+    jl_array_t *kwargs = NULL;
+    JL_GC_PUSH2(&expr, &kwargs);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, inmodule);
     value_t arg = julia_to_scm(fl_ctx, expr);
     value_t e = fl_applyn(fl_ctx, 4, symbol_value(symbol(fl_ctx, "jl-expand-to-thunk-warn")), arg,
                           symbol(fl_ctx, file), fixnum(line), fl_ctx->F);
     expr = scm_to_julia(fl_ctx, e, inmodule);
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
+    jl_sym_t *warn_sym = jl_symbol("warn");
+    if (jl_is_expr(expr) && ((jl_expr_t*)expr)->head == warn_sym) {
+        size_t nargs = jl_expr_nargs(expr);
+        for (int i = 0; i < nargs - 1; i++) {
+            jl_value_t *warning = jl_exprarg(expr, i);
+            size_t nargs = 0;
+            if (jl_is_expr(warning) && ((jl_expr_t*)warning)->head == warn_sym)
+                 nargs = jl_expr_nargs(warning);
+            int kwargs_len = (int)nargs - 6;
+            if (nargs < 6 || kwargs_len % 2 != 0) {
+                jl_error("julia-logmsg: bad argument list - expected "
+                         ":warn level (symbol) group (symbol) id file line msg . kwargs");
+            }
+            jl_value_t *level = jl_exprarg(warning, 0);
+            jl_value_t *group = jl_exprarg(warning, 1);
+            jl_value_t *id = jl_exprarg(warning, 2);
+            jl_value_t *file = jl_exprarg(warning, 3);
+            jl_value_t *line = jl_exprarg(warning, 4);
+            jl_value_t *msg = jl_exprarg(warning, 5);
+            kwargs = jl_alloc_vec_any(kwargs_len);
+            for (int i = 0; i < kwargs_len; ++i) {
+                jl_array_ptr_set(kwargs, i, jl_exprarg(warning, i + 6));
+            }
+            JL_TYPECHK(logmsg, long, level);
+            jl_log(jl_unbox_long(level), NULL, group, id, file, line, (jl_value_t*)kwargs, msg);
+        }
+        expr = jl_exprarg(expr, nargs - 1);
+    }
     JL_GC_POP();
     return expr;
 }
@@ -1307,7 +1263,7 @@ JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t
     args[4] = options;
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
-    ct->world_age = jl_world_counter;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
     jl_value_t *result = jl_apply(args, 5);
     ct->world_age = last_age;
     args[0] = result; // root during error checks below
@@ -1326,7 +1282,7 @@ JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
 {
     jl_value_t *fname = jl_pchar_to_string(filename, filename_len);
     JL_GC_PUSH1(&fname);
-    jl_value_t *p = jl_parse(text, text_len, fname, 0, (jl_value_t*)all_sym);
+    jl_value_t *p = jl_parse(text, text_len, fname, 0, (jl_value_t*)jl_all_sym);
     JL_GC_POP();
     return jl_svecref(p, 0);
 }
@@ -1339,7 +1295,7 @@ JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
     jl_value_t *fname = jl_cstr_to_string("none");
     JL_GC_PUSH1(&fname);
     jl_value_t *result = jl_parse(text, text_len, fname, offset,
-                                  (jl_value_t*)(greedy ? statement_sym : atom_sym));
+                                  (jl_value_t*)(greedy ? jl_statement_sym : jl_atom_sym));
     JL_GC_POP();
     return result;
 }
diff --git a/src/ast.scm b/src/ast.scm
index bc8d847279fc9..a1615cc01e2fe 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -209,6 +209,13 @@
                                 "\n"
                                 (indented-block (cdr (cadddr e)) ilvl))
                         "")
+                    (if (length> e 5)
+                        (let ((els (cadddddr e)))
+                          (if (and (pair? els) (eq? (car els) 'block))
+                              (string (string.rep "    " ilvl) "else\n"
+                                      (indented-block (cdr els) ilvl))
+                              ""))
+                        "")
                     (if (length> e 4)
                         (let ((fin (caddddr e)))
                           (if (and (pair? fin) (eq? (car fin) 'block))
@@ -289,7 +296,7 @@
 ;; predicates and accessors
 
 (define (quoted? e)
-  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds loopinfo)))
+  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo)))
 (define (quotify e) `',e)
 (define (unquote e)
   (if (and (pair? e) (memq (car e) '(quote inert)))
diff --git a/src/atomics.h b/src/atomics.h
deleted file mode 100644
index 4a33368745aa1..0000000000000
--- a/src/atomics.h
+++ /dev/null
@@ -1,163 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#ifndef JL_ATOMICS_H
-#define JL_ATOMICS_H
-
-// Low-level atomic operations
-
-#if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
-#  error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
-#endif
-#ifdef _COMPILER_MICROSOFT_
-#  include <intrin.h>
-#  include <type_traits>
-#endif
-#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
-#  include <immintrin.h>
-#endif
-#ifndef _OS_WINDOWS_
-#  include <pthread.h>
-#endif
-#include <signal.h>
-
-enum jl_memory_order {
-    jl_memory_order_unspecified = -2,
-    jl_memory_order_invalid = -1,
-    jl_memory_order_notatomic = 0,
-    jl_memory_order_unordered,
-    jl_memory_order_monotonic,
-    jl_memory_order_consume,
-    jl_memory_order_acquire,
-    jl_memory_order_release,
-    jl_memory_order_acq_rel,
-    jl_memory_order_seq_cst
-};
-
-/**
- * Thread synchronization primitives:
- *
- * These roughly follows the c11/c++11 memory model and the act as memory
- * barriers at both the compiler level and the hardware level.
- * The only exception is the GC safepoint and GC state transitions for which
- * we use only a compiler (signal) barrier and use the signal handler to do the
- * synchronization in order to lower the mutator overhead as much as possible.
- *
- * We use the compiler intrinsics to implement a similar API to the c11/c++11
- * one instead of using it directly because, we need interoperability between
- * code written in different languages. The current c++ standard (c++14) does
- * not allow using c11 atomic functions or types and there's currently no
- * guarantee that the two types are compatible (although most of them probably
- * are). We also need to access these atomic variables from the LLVM JIT code
- * which is very hard unless the layout of the object is fully specified.
- */
-#define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
-#define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
-#define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
-
-
-#  define jl_atomic_fetch_add_relaxed(obj, arg)         \
-    __atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
-#  define jl_atomic_fetch_add(obj, arg)                 \
-    __atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_add_fetch(obj, arg)                 \
-    __atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_fetch_and_relaxed(obj, arg)         \
-    __atomic_fetch_and(obj, arg, __ATOMIC_RELAXED)
-#  define jl_atomic_fetch_and(obj, arg)                 \
-    __atomic_fetch_and(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_fetch_or_relaxed(obj, arg)          \
-    __atomic_fetch_or(obj, arg, __ATOMIC_RELAXED)
-#  define jl_atomic_fetch_or(obj, arg)                  \
-    __atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_cmpswap(obj, expected, desired)    \
-    __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-#  define jl_atomic_cmpswap_relaxed(obj, expected, desired)    \
-    __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
-// TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
-#  define jl_atomic_exchange(obj, desired)              \
-    __atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST)
-#  define jl_atomic_exchange_relaxed(obj, desired)      \
-    __atomic_exchange_n(obj, desired, __ATOMIC_RELAXED)
-#  define jl_atomic_store(obj, val)                     \
-    __atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
-#  define jl_atomic_store_relaxed(obj, val)             \
-    __atomic_store_n(obj, val, __ATOMIC_RELAXED)
-
-#  if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
-    !(defined(_CPU_X86_) || defined(_CPU_X86_64_))
-// ICC and Clang doesn't have this bug...
-#    define jl_atomic_store_release(obj, val)           \
-    __atomic_store_n(obj, val, __ATOMIC_RELEASE)
-#  else
-// Workaround a GCC bug when using store with release order by using the
-// stronger version instead.
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
-// fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27
-#    define jl_atomic_store_release(obj, val) do {      \
-        jl_signal_fence();                              \
-        __atomic_store_n(obj, val, __ATOMIC_RELEASE);   \
-    } while (0)
-#  endif
-#  define jl_atomic_load(obj)                   \
-    __atomic_load_n(obj, __ATOMIC_SEQ_CST)
-#  define jl_atomic_load_acquire(obj)           \
-    __atomic_load_n(obj, __ATOMIC_ACQUIRE)
-#ifdef JL_TSAN_ENABLED
-// For the sake of tsan, call these loads consume ordering since they will act
-// as such on the processors we support while normally, the compiler would
-// upgrade this to acquire ordering, which is strong (and slower) than we want.
-#  define jl_atomic_load_relaxed(obj)           \
-    __atomic_load_n(obj, __ATOMIC_CONSUME)
-#else
-#  define jl_atomic_load_relaxed(obj)           \
-    __atomic_load_n(obj, __ATOMIC_RELAXED)
-#endif
-
-#ifdef __clang_analyzer__
-// for the purposes of the analyzer, we can turn these into non-atomic expressions with similar properties
-// (for the sake of the analyzer, we don't care if it is an exact match for behavior)
-
-#undef jl_atomic_exchange
-#undef jl_atomic_exchange_relaxed
-#define jl_atomic_exchange(obj, desired) \
-    (__extension__({ \
-            __typeof__((obj)) p__analyzer__ = (obj); \
-            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
-            *p__analyzer__ = (desired); \
-            temp__analyzer__; \
-        }))
-#define jl_atomic_exchange_relaxed jl_atomic_exchange
-
-#undef jl_atomic_cmpswap
-#undef jl_atomic_cmpswap_relaxed
-#define jl_atomic_cmpswap(obj, expected, desired) \
-    (__extension__({ \
-            __typeof__((obj)) p__analyzer__ = (obj); \
-            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
-            __typeof__((expected)) x__analyzer__ = (expected); \
-            if (temp__analyzer__ == *x__analyzer__) \
-                *p__analyzer__ = (desired); \
-            else \
-                *x__analyzer__ = temp__analyzer__; \
-            temp__analyzer__ == *x__analyzer__; \
-        }))
-#define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
-
-#undef jl_atomic_store
-#undef jl_atomic_store_release
-#undef jl_atomic_store_relaxed
-#define jl_atomic_store(obj, val)         (*(obj) = (val))
-#define jl_atomic_store_release jl_atomic_store
-#define jl_atomic_store_relaxed jl_atomic_store
-
-#undef jl_atomic_load
-#undef jl_atomic_load_acquire
-#undef jl_atomic_load_relaxed
-#define jl_atomic_load(obj)         (*(obj))
-#define jl_atomic_load_acquire jl_atomic_load
-#define jl_atomic_load_relaxed jl_atomic_load
-
-#endif
-
-
-#endif // JL_ATOMICS_H
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index 49d3cd7fe87e1..e0b328e664d6c 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -12,11 +12,13 @@ extern "C" {
 #ifdef DEFINE_BUILTIN_GLOBALS
 #define DECLARE_BUILTIN(name) \
     JL_CALLABLE(jl_f_##name); \
-    jl_value_t *jl_builtin_##name
+    JL_DLLEXPORT jl_value_t *jl_builtin_##name; \
+    JL_DLLEXPORT jl_fptr_args_t jl_f_##name##_addr = &jl_f_##name
 #else
 #define DECLARE_BUILTIN(name) \
     JL_CALLABLE(jl_f_##name); \
-    extern jl_value_t *jl_builtin_##name
+    JL_DLLEXPORT extern jl_value_t *jl_builtin_##name; \
+    JL_DLLEXPORT extern jl_fptr_args_t jl_f_##name##_addr
 #endif
 
 DECLARE_BUILTIN(applicable);
@@ -53,6 +55,11 @@ DECLARE_BUILTIN(typeof);
 DECLARE_BUILTIN(_typevar);
 
 JL_CALLABLE(jl_f_invoke_kwsorter);
+#ifdef DEFINE_BUILTIN_GLOBALS
+JL_DLLEXPORT jl_fptr_args_t jl_f_invoke_kwsorter_addr = &jl_f_invoke_kwsorter;
+#else
+JL_DLLEXPORT extern jl_fptr_args_t jl_f_invoke_kwsorter_addr;
+#endif
 JL_CALLABLE(jl_f__structtype);
 JL_CALLABLE(jl_f__abstracttype);
 JL_CALLABLE(jl_f__primitivetype);
diff --git a/src/builtins.c b/src/builtins.c
index 7ef93faaa6368..cae89319f4052 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -706,7 +706,7 @@ static jl_value_t *do_apply( jl_value_t **args, uint32_t nargs, jl_value_t *iter
     }
     if (arg_heap) {
         // optimization: keep only the first root, free the others
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
         ((void**)roots)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(1);
 #endif
     }
@@ -735,7 +735,7 @@ JL_CALLABLE(jl_f__apply_pure)
         // because, why not :)
         // and `promote` works better this way
         size_t last_age = ct->world_age;
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         ret = do_apply(args, nargs, NULL);
         ct->world_age = last_age;
         ct->ptls->in_pure_callback = last_in;
@@ -753,14 +753,14 @@ JL_CALLABLE(jl_f__call_latest)
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
     if (!ct->ptls->in_pure_callback)
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
     jl_value_t *ret = jl_apply(args, nargs);
     ct->world_age = last_age;
     return ret;
 }
 
 // Like call_in_world, but runs in the specified world.
-// If world > jl_world_counter, run in the latest world.
+// If world > jl_atomic_load_acquire(&jl_world_counter), run in the latest world.
 JL_CALLABLE(jl_f__call_in_world)
 {
     JL_NARGSV(_apply_in_world, 2);
@@ -768,9 +768,11 @@ JL_CALLABLE(jl_f__call_in_world)
     size_t last_age = ct->world_age;
     JL_TYPECHK(_apply_in_world, ulong, args[0]);
     size_t world = jl_unbox_ulong(args[0]);
-    world = world <= jl_world_counter ? world : jl_world_counter;
-    if (!ct->ptls->in_pure_callback)
-        ct->world_age = world;
+    if (!ct->ptls->in_pure_callback) {
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        if (ct->world_age > world)
+            ct->world_age = world;
+    }
     jl_value_t *ret = jl_apply(&args[1], nargs - 1);
     ct->world_age = last_age;
     return ret;
@@ -810,19 +812,19 @@ JL_CALLABLE(jl_f_svec)
 
 enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing)
 {
-    if (order == not_atomic_sym)
+    if (order == jl_not_atomic_sym)
         return jl_memory_order_notatomic;
-    if (order == unordered_sym && (loading || storing))
+    if (order == jl_unordered_sym && (loading ^ storing))
         return jl_memory_order_unordered;
-    if (order == monotonic_sym && (loading || storing))
+    if (order == jl_monotonic_sym && (loading || storing))
         return jl_memory_order_monotonic;
-    if (order == acquire_sym && loading)
+    if (order == jl_acquire_sym && loading)
         return jl_memory_order_acquire;
-    if (order == release_sym && storing)
+    if (order == jl_release_sym && storing)
         return jl_memory_order_release;
-    if (order == acquire_release_sym && loading && storing)
+    if (order == jl_acquire_release_sym && loading && storing)
         return jl_memory_order_acq_rel;
-    if (order == sequentially_consistent_sym)
+    if (order == jl_sequentially_consistent_sym)
         return jl_memory_order_seq_cst;
     return jl_memory_order_invalid;
 }
@@ -1557,9 +1559,9 @@ JL_CALLABLE(jl_f__typebody)
             // able to compute the layout of the object before needing to
             // publish it, so we must assume it cannot be inlined, if that
             // check passes, then we also still need to check the fields too.
-            if (!dt->name->mutabl && !references_name((jl_value_t*)dt->super, dt->name, 1)) {
+            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 1))) {
                 int mayinlinealloc = 1;
-                size_t i, nf = jl_svec_len(ft);
+                size_t i;
                 for (i = 0; i < nf; i++) {
                     jl_value_t *fld = jl_svecref(ft, i);
                     if (references_name(fld, dt->name, 1)) {
@@ -1652,14 +1654,13 @@ static unsigned intrinsic_nargs[num_intrinsics];
 
 JL_CALLABLE(jl_f_intrinsic_call)
 {
-    JL_NARGSV(intrinsic_call, 1);
     JL_TYPECHK(intrinsic_call, intrinsic, F);
     enum intrinsic f = (enum intrinsic)*(uint32_t*)jl_data_ptr(F);
     if (f == cglobal && nargs == 1)
         f = cglobal_auto;
     unsigned fargs = intrinsic_nargs[f];
     if (!fargs)
-        jl_error("this intrinsic must be compiled to be called");
+        jl_errorf("`%s` must be compiled to be called", jl_intrinsic_name(f));
     JL_NARGS(intrinsic_call, fargs, fargs);
 
     union {
@@ -1685,7 +1686,7 @@ JL_CALLABLE(jl_f_intrinsic_call)
         default:
             assert(0 && "unexpected number of arguments to an intrinsic function");
     }
-    gc_debug_critical_error();
+    jl_gc_debug_critical_error();
     abort();
 }
 
@@ -1762,7 +1763,9 @@ static void add_builtin(const char *name, jl_value_t *v)
 jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b)
 {
     assert(jl_isa(b, (jl_value_t*)jl_builtin_type));
-    return ((jl_typemap_entry_t*)jl_gf_mtable(b)->cache)->func.linfo->cache->specptr.fptr1;
+    jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_atomic_load_relaxed(&jl_gf_mtable(b)->cache);
+    jl_code_instance_t *ci = jl_atomic_load_relaxed(&entry->func.linfo->cache);
+    return jl_atomic_load_relaxed(&ci->specptr.fptr1);
 }
 
 static jl_value_t *add_builtin_func(const char *name, jl_fptr_args_t fptr)
diff --git a/src/ccall.cpp b/src/ccall.cpp
index b9a6ddc6151a7..426abf6b7dcb1 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -1,13 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // --- the ccall, cglobal, and llvm intrinsics ---
-#include "llvm/Support/Path.h" // for llvm::sys::path
-#include <llvm/Bitcode/BitcodeReader.h>
-#include <llvm/Linker/Linker.h>
-
-#ifdef _OS_WINDOWS_
-extern const char jl_crtdll_basename[];
-#endif
 
 // somewhat unusual variable, in that aotcompile wants to get the address of this for a sanity check
 GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M)
@@ -31,8 +24,12 @@ static bool runtime_sym_gvs(jl_codegen_params_t &emission_context, const char *f
         symMap = &emission_context.symMapExe;
     }
     else if ((intptr_t)f_lib == (intptr_t)JL_LIBJULIA_INTERNAL_DL_LIBNAME) {
+        libptrgv = prepare_global_in(M, jldlli_var);
+        symMap = &emission_context.symMapDlli;
+    }
+    else if ((intptr_t)f_lib == (intptr_t)JL_LIBJULIA_DL_LIBNAME) {
         libptrgv = prepare_global_in(M, jldll_var);
-        symMap = &emission_context.symMapDl;
+        symMap = &emission_context.symMapDll;
     }
     else
 #endif
@@ -530,7 +527,7 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
 
     jl_value_t *ptr = static_eval(ctx, arg);
     if (ptr == NULL) {
-        if (jl_is_expr(arg) && ((jl_expr_t*)arg)->head == call_sym && jl_expr_nargs(arg) == 3 &&
+        if (jl_is_expr(arg) && ((jl_expr_t*)arg)->head == jl_call_sym && jl_expr_nargs(arg) == 3 &&
             jl_is_globalref(jl_exprarg(arg,0)) && jl_globalref_mod(jl_exprarg(arg,0)) == jl_core_module &&
             jl_globalref_name(jl_exprarg(arg,0)) == jl_symbol("tuple")) {
             // attempt to interpret a non-constant 2-tuple expression as (func_name, lib_name()), where
@@ -573,10 +570,22 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
         if (f_name != NULL) {
             // just symbol, default to JuliaDLHandle
             // will look in process symbol table
+            if (!llvmcall) {
+                void *symaddr;
+                std::string iname("i");
+                iname += f_name;
+                if (jl_dlsym(jl_libjulia_internal_handle, iname.c_str(), &symaddr, 0)) {
+#ifdef _OS_WINDOWS_
+                    f_lib = JL_LIBJULIA_INTERNAL_DL_LIBNAME;
+#endif
+                    f_name = jl_symbol_name(jl_symbol(iname.c_str()));
+                }
 #ifdef _OS_WINDOWS_
-            if (!llvmcall)
-                f_lib = jl_dlfind_win32(f_name);
+                else {
+                    f_lib = jl_dlfind_win32(f_name);
+                }
 #endif
+            }
         }
         else if (jl_is_cpointer_type(jl_typeof(ptr))) {
             fptr = *(void(**)(void))jl_data_ptr(ptr);
@@ -1267,13 +1276,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         return args[6 + i];
     };
 
-    auto _is_libjulia_func = [&] (uintptr_t ptr, const char *name) {
+    auto _is_libjulia_func = [&] (uintptr_t ptr, StringRef name) {
         if ((uintptr_t)fptr == ptr)
             return true;
         if (f_lib) {
 #ifdef _OS_WINDOWS_
             if ((f_lib == JL_EXE_LIBNAME) || // preventing invalid pointer access
                 (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
+                (f_lib == JL_LIBJULIA_DL_LIBNAME) ||
                 (!strcmp(f_lib, jl_crtdll_basename))) {
                 // libjulia-like
             }
@@ -1283,9 +1293,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
             return false;
 #endif
         }
-        return f_name && !strcmp(f_name, name);
+        return f_name && f_name == name;
     };
-#define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), #name)
+#define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), StringRef(XSTR(name)))
 
     // emit arguments
     jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nccallargs);
diff --git a/src/ccalltest.c b/src/ccalltest.c
index 23137adf7b9a8..64a6a3aabfb0b 100644
--- a/src/ccalltest.c
+++ b/src/ccalltest.c
@@ -9,6 +9,18 @@
 #include "../src/support/platform.h"
 #include "../src/support/dtypes.h"
 
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+#  define DLLEXPORT __declspec(dllexport)
+#else
+# if defined(_OS_LINUX_)
+#  define DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#endif
+
+
 #ifdef _P64
 #define jint int64_t
 #define PRIjint PRId64
@@ -28,12 +40,8 @@ int c_int = 0;
 int xs[300] = {0,0,0,1,0};
 
 //int testUcharX(unsigned char x);
-#ifdef _COMPILER_MICROSOFT_
-int __declspec(noinline)
-#else
 int __attribute__((noinline))
-#endif
-JL_DLLEXPORT testUcharX(unsigned char x) {
+DLLEXPORT testUcharX(unsigned char x) {
     return xs[x];
 }
 
@@ -47,41 +55,41 @@ typedef struct {
     jint imag;
 } complex_t;
 
-JL_DLLEXPORT complex_t ctest(complex_t a) {
+DLLEXPORT complex_t ctest(complex_t a) {
     a.real += 1;
     a.imag -= 2;
     return a;
 }
 
-JL_DLLEXPORT complex double cgtest(complex double a) {
+DLLEXPORT complex double cgtest(complex double a) {
     //Unpack a ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(a), cimag(a));
     a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex double *cgptest(complex double *a) {
+DLLEXPORT complex double *cgptest(complex double *a) {
     //Unpack a ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(*a), cimag(*a));
     *a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex float cftest(complex float a) {
+DLLEXPORT complex float cftest(complex float a) {
     //Unpack a ComplexPair{Float32} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(a), cimag(a));
     a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex float *cfptest(complex float *a) {
+DLLEXPORT complex float *cfptest(complex float *a) {
     //Unpack a ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(*a), cimag(*a));
     *a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex_t *cptest(complex_t *a) {
+DLLEXPORT complex_t *cptest(complex_t *a) {
     //Unpack a ComplexPair{Int} struct pointer
     if (verbose) fprintf(stderr,"%" PRIjint " + %" PRIjint " i\n", a->real, a->imag);
     a->real += 1;
@@ -89,7 +97,7 @@ JL_DLLEXPORT complex_t *cptest(complex_t *a) {
     return a;
 }
 
-JL_DLLEXPORT complex_t *cptest_static(complex_t *a) {
+DLLEXPORT complex_t *cptest_static(complex_t *a) {
     if (verbose) fprintf(stderr,"%" PRIjint " + %" PRIjint " i\n", a->real, a->imag);
     complex_t *b = (complex_t*)malloc_s(sizeof(complex_t));
     b->real = a->real;
@@ -331,7 +339,7 @@ typedef struct {
 #endif // _COMPILER_INTEL_
 
 
-JL_DLLEXPORT struct1 test_1(struct1 a, float b) {
+DLLEXPORT struct1 test_1(struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", a.x, a.y, b);
     a.x += b * 1;
@@ -339,7 +347,7 @@ JL_DLLEXPORT struct1 test_1(struct1 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct1 test_1long_a(jint x1, jint x2, jint x3, struct1 a, float b) {
+DLLEXPORT struct1 test_1long_a(jint x1, jint x2, jint x3, struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %" PRIjint ", %" PRIjint ") & %g + %g i & %g\n", x1, x2, x3, a.x, a.y, b);
     a.x += b + x1 + x2 + x3;
@@ -347,7 +355,7 @@ JL_DLLEXPORT struct1 test_1long_a(jint x1, jint x2, jint x3, struct1 a, float b)
     return a;
 }
 
-JL_DLLEXPORT struct1 test_1long_b(jint x1, double x2, jint x3, struct1 a, float b) {
+DLLEXPORT struct1 test_1long_b(jint x1, double x2, jint x3, struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %g, %" PRIjint ") & %g + %g i & %g\n", x1, x2, x3, a.x, a.y, b);
     a.x += b + x1 + x2 + x3;
@@ -355,7 +363,7 @@ JL_DLLEXPORT struct1 test_1long_b(jint x1, double x2, jint x3, struct1 a, float
     return a;
 }
 
-JL_DLLEXPORT struct1 test_1long_c(jint x1, double x2, jint x3, jint x4, struct1 a, float b) {
+DLLEXPORT struct1 test_1long_c(jint x1, double x2, jint x3, jint x4, struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %g, %" PRIjint ", %" PRIjint ") & %g + %g i & %g\n", x1, x2, x3, x4, a.x, a.y, b);
     a.x += b + x1 + x2 + x3 + x4;
@@ -363,7 +371,7 @@ JL_DLLEXPORT struct1 test_1long_c(jint x1, double x2, jint x3, jint x4, struct1
     return a;
 }
 
-JL_DLLEXPORT struct2a test_2a(struct2a a, int32_t b) {
+DLLEXPORT struct2a test_2a(struct2a a, int32_t b) {
     //Unpack a ComplexPair{Int32} struct
     if (verbose) fprintf(stderr,"%" PRId32 " + %" PRId32 " i & %" PRId32 "\n", a.x.x, a.y.y, b);
     a.x.x += b*1;
@@ -371,7 +379,7 @@ JL_DLLEXPORT struct2a test_2a(struct2a a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct2b test_2b(struct2b a, int32_t b) {
+DLLEXPORT struct2b test_2b(struct2b a, int32_t b) {
     //Unpack a ComplexPair{Int32} struct
     if (verbose) fprintf(stderr,"%" PRId32 " + %" PRId32 " i & %" PRId32 "\n", a.x, a.y, b);
     a.x += b*1;
@@ -379,7 +387,7 @@ JL_DLLEXPORT struct2b test_2b(struct2b a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct3a test_3a(struct3a a, int64_t b) {
+DLLEXPORT struct3a test_3a(struct3a a, int64_t b) {
     //Unpack a ComplexPair{Int64} struct
     if (verbose) fprintf(stderr,"%" PRId64 " + %" PRId64 " i & %" PRId64 "\n", a.x.x, a.y.y, b);
     a.x.x += b*1;
@@ -387,7 +395,7 @@ JL_DLLEXPORT struct3a test_3a(struct3a a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct3b test_3b(struct3b a, int64_t b) {
+DLLEXPORT struct3b test_3b(struct3b a, int64_t b) {
     //Unpack a ComplexPair{Int64} struct
     if (verbose) fprintf(stderr,"%" PRId64 " + %" PRId64 " i & %" PRId64 "\n", a.x, a.y, b);
     a.x += b*1;
@@ -395,7 +403,7 @@ JL_DLLEXPORT struct3b test_3b(struct3b a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct4 test_4(struct4 a, int32_t b) {
+DLLEXPORT struct4 test_4(struct4 a, int32_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId32 ",%" PRId32 " & %" PRId32 "\n", a.x, a.y, a.z, b);
     a.x += b*1;
     a.y -= b*2;
@@ -403,7 +411,7 @@ JL_DLLEXPORT struct4 test_4(struct4 a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct5 test_5(struct5 a, int32_t b) {
+DLLEXPORT struct5 test_5(struct5 a, int32_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId32 ",%" PRId32 ",%" PRId32 " & %" PRId32 "\n", a.x, a.y, a.z, a.a, b);
     a.x += b*1;
     a.y -= b*2;
@@ -413,7 +421,7 @@ JL_DLLEXPORT struct5 test_5(struct5 a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct6 test_6(struct6 a, int64_t b) {
+DLLEXPORT struct6 test_6(struct6 a, int64_t b) {
     if (verbose) fprintf(stderr,"%" PRId64 ",%" PRId64 ",%" PRId64 " & %" PRId64 "\n", a.x, a.y, a.z, b);
     a.x += b*1;
     a.y -= b*2;
@@ -421,28 +429,28 @@ JL_DLLEXPORT struct6 test_6(struct6 a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct7 test_7(struct7 a, int8_t b) {
+DLLEXPORT struct7 test_7(struct7 a, int8_t b) {
     if (verbose) fprintf(stderr,"%" PRId64 ",%" PRId8 " & %" PRId8 "\n", a.x, a.y, b);
     a.x += b*1;
     a.y -= b*2;
     return a;
 }
 
-JL_DLLEXPORT struct8 test_8(struct8 a, int8_t b) {
+DLLEXPORT struct8 test_8(struct8 a, int8_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId8 " & %" PRId8 "\n", a.x, a.y, b);
     a.x += b*1;
     a.y -= b*2;
     return a;
 }
 
-JL_DLLEXPORT struct9 test_9(struct9 a, int16_t b) {
+DLLEXPORT struct9 test_9(struct9 a, int16_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId16 " & %" PRId16 "\n", a.x, a.y, b);
     a.x += b*1;
     a.y -= b*2;
     return a;
 }
 
-JL_DLLEXPORT struct10 test_10(struct10 a, int8_t b) {
+DLLEXPORT struct10 test_10(struct10 a, int8_t b) {
     if (verbose) fprintf(stderr,"%" PRId8 ",%" PRId8 ",%" PRId8 ",%" PRId8 " & %" PRId8 "\n", a.x, a.y, a.z, a.a, b);
     a.x += b*1;
     a.y -= b*2;
@@ -452,14 +460,14 @@ JL_DLLEXPORT struct10 test_10(struct10 a, int8_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct11 test_11(struct11 a, float b) {
+DLLEXPORT struct11 test_11(struct11 a, float b) {
     //Unpack a nested ComplexPair{Float32} struct
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", creal(a.x), cimag(a.x), b);
     a.x += b*1 - (b*2.0*I);
     return a;
 }
 
-JL_DLLEXPORT struct12 test_12(struct12 a, float b) {
+DLLEXPORT struct12 test_12(struct12 a, float b) {
     //Unpack two nested ComplexPair{Float32} structs
     if (verbose) fprintf(stderr,"%g + %g i & %g + %g i & %g\n",
                          creal(a.x), cimag(a.x), creal(a.y), cimag(a.y), b);
@@ -468,14 +476,14 @@ JL_DLLEXPORT struct12 test_12(struct12 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct13 test_13(struct13 a, double b) {
+DLLEXPORT struct13 test_13(struct13 a, double b) {
     //Unpack a nested ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", creal(a.x), cimag(a.x), b);
     a.x += b*1 - (b*2.0*I);
     return a;
 }
 
-JL_DLLEXPORT struct14 test_14(struct14 a, float b) {
+DLLEXPORT struct14 test_14(struct14 a, float b) {
     //The C equivalent of a  ComplexPair{Float32} struct (but without special complex ABI)
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", a.x, a.y, b);
     a.x += b*1;
@@ -483,7 +491,7 @@ JL_DLLEXPORT struct14 test_14(struct14 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct15 test_15(struct15 a, double b) {
+DLLEXPORT struct15 test_15(struct15 a, double b) {
     //The C equivalent of a  ComplexPair{Float64} struct (but without special complex ABI)
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", a.x, a.y, b);
     a.x += b*1;
@@ -491,7 +499,7 @@ JL_DLLEXPORT struct15 test_15(struct15 a, double b) {
     return a;
 }
 
-JL_DLLEXPORT struct16 test_16(struct16 a, float b) {
+DLLEXPORT struct16 test_16(struct16 a, float b) {
     //Unpack a struct with non-obvious packing requirements
     if (verbose) fprintf(stderr,"%g %g %g %g %g %g & %g\n", a.x, a.y, a.z, a.a, a.b, a.c, b);
     a.x += b*1;
@@ -503,7 +511,7 @@ JL_DLLEXPORT struct16 test_16(struct16 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct17 test_17(struct17 a, int8_t b) {
+DLLEXPORT struct17 test_17(struct17 a, int8_t b) {
     //Unpack a struct with non-obvious packing requirements
     if (verbose) fprintf(stderr,"%d %d & %d\n", (int)a.a, (int)a.b, (int)b);
     a.a += b*1;
@@ -511,7 +519,7 @@ JL_DLLEXPORT struct17 test_17(struct17 a, int8_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct18 test_18(struct18 a, int8_t b) {
+DLLEXPORT struct18 test_18(struct18 a, int8_t b) {
     //Unpack a struct with non-obvious packing requirements
     if (verbose) fprintf(stderr,"%d %d %d & %d\n",
                          (int)a.a, (int)a.b, (int)a.c, (int)b);
@@ -526,7 +534,7 @@ JL_DLLEXPORT struct18 test_18(struct18 a, int8_t b) {
 // However, it happens to have the same calling convention with `[2 x i64]`
 // when used as first argument or return value.
 #define int128_t struct3b
-JL_DLLEXPORT int128_t test_128(int128_t a, int64_t b) {
+DLLEXPORT int128_t test_128(int128_t a, int64_t b) {
     //Unpack a Int128
     if (verbose) fprintf(stderr,"0x%016" PRIx64 "%016" PRIx64 " & %" PRId64 "\n", a.y, a.x, b);
     a.x += b*1;
@@ -535,7 +543,7 @@ JL_DLLEXPORT int128_t test_128(int128_t a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct_big test_big(struct_big a) {
+DLLEXPORT struct_big test_big(struct_big a) {
     //Unpack a "big" struct { int, int, char }
     if (verbose) fprintf(stderr,"%" PRIjint " %" PRIjint " %c\n", a.x, a.y, a.z);
     a.x += 1;
@@ -544,7 +552,7 @@ JL_DLLEXPORT struct_big test_big(struct_big a) {
     return a;
 }
 
-JL_DLLEXPORT struct_big test_big_long(jint x1, jint x2, jint x3, struct_big a) {
+DLLEXPORT struct_big test_big_long(jint x1, jint x2, jint x3, struct_big a) {
     //Unpack a "big" struct { int, int, char }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %" PRIjint ", %" PRIjint ") %" PRIjint " %" PRIjint " %c\n", x1, x2, x3, a.x, a.y, a.z);
     a.x += 1 + x1 + x2 + x3;
@@ -554,7 +562,7 @@ JL_DLLEXPORT struct_big test_big_long(jint x1, jint x2, jint x3, struct_big a) {
 }
 
 #define test_huge(suffix, reg) \
-JL_DLLEXPORT struct_huge##suffix test_huge##suffix(char a, struct_huge##suffix b, char c) { \
+DLLEXPORT struct_huge##suffix test_huge##suffix(char a, struct_huge##suffix b, char c) { \
     if (verbose) fprintf(stderr,"%c-%c\n", a, c); \
     b.reg *= 39; \
     return b; \
@@ -577,7 +585,7 @@ test_huge(5b, r1);
 
 // Enough arguments for architectures that uses registers for integer or
 // floating point arguments to spill.
-JL_DLLEXPORT int test_long_args_intp(int *a1, int *a2, int *a3, int *a4,
+DLLEXPORT int test_long_args_intp(int *a1, int *a2, int *a3, int *a4,
                                      int *a5, int *a6, int *a7, int *a8,
                                      int *a9, int *a10, int *a11, int *a12,
                                      int *a13, int *a14)
@@ -586,7 +594,7 @@ JL_DLLEXPORT int test_long_args_intp(int *a1, int *a2, int *a3, int *a4,
             *a11 + *a12 + *a13 + *a14);
 }
 
-JL_DLLEXPORT int test_long_args_int(int a1, int a2, int a3, int a4,
+DLLEXPORT int test_long_args_int(int a1, int a2, int a3, int a4,
                                     int a5, int a6, int a7, int a8,
                                     int a9, int a10, int a11, int a12,
                                     int a13, int a14)
@@ -595,7 +603,7 @@ JL_DLLEXPORT int test_long_args_int(int a1, int a2, int a3, int a4,
             a11 + a12 + a13 + a14);
 }
 
-JL_DLLEXPORT float test_long_args_float(float a1, float a2, float a3,
+DLLEXPORT float test_long_args_float(float a1, float a2, float a3,
                                         float a4, float a5, float a6,
                                         float a7, float a8, float a9,
                                         float a10, float a11, float a12,
@@ -605,7 +613,7 @@ JL_DLLEXPORT float test_long_args_float(float a1, float a2, float a3,
             a11 + a12 + a13 + a14);
 }
 
-JL_DLLEXPORT double test_long_args_double(double a1, double a2, double a3,
+DLLEXPORT double test_long_args_double(double a1, double a2, double a3,
                                           double a4, double a5, double a6,
                                           double a7, double a8, double a9,
                                           double a10, double a11, double a12,
@@ -620,59 +628,59 @@ typedef struct {
     int *b;
 } struct_spill_pint;
 
-JL_DLLEXPORT int test_spill_int1(int *v1, struct_spill_pint s)
+DLLEXPORT int test_spill_int1(int *v1, struct_spill_pint s)
 {
     return *v1 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int2(int *v1, int *v2, struct_spill_pint s)
+DLLEXPORT int test_spill_int2(int *v1, int *v2, struct_spill_pint s)
 {
     return *v1 + *v2 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int3(int *v1, int *v2, int *v3, struct_spill_pint s)
+DLLEXPORT int test_spill_int3(int *v1, int *v2, int *v3, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int4(int *v1, int *v2, int *v3, int *v4,
+DLLEXPORT int test_spill_int4(int *v1, int *v2, int *v3, int *v4,
                                  struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int5(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int5(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int6(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int6(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int7(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int7(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, int *v7, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *v7 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int8(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int8(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, int *v7, int *v8, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *v7 + *v8 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int9(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int9(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, int *v7, int *v8, int *v9,
                                  struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *v7 + *v8 + *v9 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int10(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int10(int *v1, int *v2, int *v3, int *v4, int *v5,
                                   int *v6, int *v7, int *v8, int *v9, int *v10,
                                   struct_spill_pint s)
 {
@@ -685,79 +693,79 @@ typedef struct {
     float b;
 } struct_spill_float;
 
-JL_DLLEXPORT float test_spill_float1(float v1, struct_spill_float s)
+DLLEXPORT float test_spill_float1(float v1, struct_spill_float s)
 {
     return v1 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float2(float v1, float v2, struct_spill_float s)
+DLLEXPORT float test_spill_float2(float v1, float v2, struct_spill_float s)
 {
     return v1 + v2 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float3(float v1, float v2, float v3,
+DLLEXPORT float test_spill_float3(float v1, float v2, float v3,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float4(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float4(float v1, float v2, float v3, float v4,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float5(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float5(float v1, float v2, float v3, float v4,
                                      float v5, struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float6(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float6(float v1, float v2, float v3, float v4,
                                      float v5, float v6, struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float7(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float7(float v1, float v2, float v3, float v4,
                                      float v5, float v6, float v7,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + v7 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float8(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float8(float v1, float v2, float v3, float v4,
                                      float v5, float v6, float v7, float v8,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float9(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float9(float v1, float v2, float v3, float v4,
                                      float v5, float v6, float v7, float v8,
                                      float v9, struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float10(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float10(float v1, float v2, float v3, float v4,
                                       float v5, float v6, float v7, float v8,
                                       float v9, float v10, struct_spill_float s)
 {
     return (v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9 + v10 + s.a + s.b);
 }
 
-JL_DLLEXPORT int get_c_int(void)
+DLLEXPORT int get_c_int(void)
 {
     return c_int;
 }
 
-JL_DLLEXPORT void set_c_int(int i)
+DLLEXPORT void set_c_int(int i)
 {
     c_int = i;
 }
 
-JL_DLLEXPORT void finalizer_cptr(void* v)
+DLLEXPORT void finalizer_cptr(void* v)
 {
     set_c_int(-1);
 }
@@ -766,7 +774,7 @@ JL_DLLEXPORT void finalizer_cptr(void* v)
 //////////////////////////////////
 // Turn off verbose for automated tests, leave on for debugging
 
-JL_DLLEXPORT void set_verbose(int level) {
+DLLEXPORT void set_verbose(int level) {
     verbose = level;
 }
 
@@ -774,7 +782,7 @@ JL_DLLEXPORT void set_verbose(int level) {
 //////////////////////////////////
 // Other tests
 
-JL_DLLEXPORT void *test_echo_p(void *p) {
+DLLEXPORT void *test_echo_p(void *p) {
     return p;
 }
 
@@ -782,7 +790,7 @@ JL_DLLEXPORT void *test_echo_p(void *p) {
 
 #include <xmmintrin.h>
 
-JL_DLLEXPORT __m128i test_m128i(__m128i a, __m128i b, __m128i c, __m128i d)
+DLLEXPORT __m128i test_m128i(__m128i a, __m128i b, __m128i c, __m128i d)
 {
     // 64-bit x86 has only level 2 SSE, which does not have a <4 x int32> multiplication,
     // so we use floating-point instead, and assume caller knows about the hack.
@@ -791,7 +799,7 @@ JL_DLLEXPORT __m128i test_m128i(__m128i a, __m128i b, __m128i c, __m128i d)
                                                     _mm_cvtepi32_ps(_mm_sub_epi32(c,d)))));
 }
 
-JL_DLLEXPORT __m128 test_m128(__m128 a, __m128 b, __m128 c, __m128 d)
+DLLEXPORT __m128 test_m128(__m128 a, __m128 b, __m128 c, __m128 d)
 {
     return _mm_add_ps(a, _mm_mul_ps(b, _mm_sub_ps(c, d)));
 }
@@ -800,7 +808,7 @@ JL_DLLEXPORT __m128 test_m128(__m128 a, __m128 b, __m128 c, __m128 d)
 
 #ifdef _CPU_AARCH64_
 
-JL_DLLEXPORT __int128 test_aa64_i128_1(int64_t v1, __int128 v2)
+DLLEXPORT __int128 test_aa64_i128_1(int64_t v1, __int128 v2)
 {
     return v1 * 2 - v2;
 }
@@ -810,7 +818,7 @@ typedef struct {
     __int128 v2;
 } struct_aa64_1;
 
-JL_DLLEXPORT struct_aa64_1 test_aa64_i128_2(int64_t v1, __int128 v2,
+DLLEXPORT struct_aa64_1 test_aa64_i128_2(int64_t v1, __int128 v2,
                                             struct_aa64_1 v3)
 {
     struct_aa64_1 x = {(int32_t)v1 / 2 + 1 - v3.v1, v2 * 2 - 1 - v3.v2};
@@ -822,12 +830,12 @@ typedef struct {
     double v2;
 } struct_aa64_2;
 
-JL_DLLEXPORT __fp16 test_aa64_fp16_1(int v1, float v2, double v3, __fp16 v4)
+DLLEXPORT __fp16 test_aa64_fp16_1(int v1, float v2, double v3, __fp16 v4)
 {
     return (__fp16)(v1 + v2 * 2 + v3 * 3 + v4 * 4);
 }
 
-JL_DLLEXPORT struct_aa64_2 test_aa64_fp16_2(int v1, float v2,
+DLLEXPORT struct_aa64_2 test_aa64_fp16_2(int v1, float v2,
                                             double v3, __fp16 v4)
 {
     struct_aa64_2 x = {v4 / 2 + 1, v1 * 2 + v2 * 4 - v3};
@@ -836,7 +844,7 @@ JL_DLLEXPORT struct_aa64_2 test_aa64_fp16_2(int v1, float v2,
 
 #include <arm_neon.h>
 
-JL_DLLEXPORT int64x2_t test_aa64_vec_1(int32x2_t v1, float _v2, int32x2_t v3)
+DLLEXPORT int64x2_t test_aa64_vec_1(int32x2_t v1, float _v2, int32x2_t v3)
 {
     int v2 = (int)_v2;
     return vmovl_s32(v1 * v2 + v3);
@@ -854,7 +862,7 @@ typedef struct {
     int16x8_t v1;
 } struct_aa64_4;
 
-JL_DLLEXPORT struct_aa64_3 test_aa64_vec_2(struct_aa64_3 v1, struct_aa64_4 v2)
+DLLEXPORT struct_aa64_3 test_aa64_vec_2(struct_aa64_3 v1, struct_aa64_4 v2)
 {
     // The cast below is to workaround GCC issue.
     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96990
@@ -945,21 +953,21 @@ test_huge(3_ppc64_hva, vf1[0]);
 test_huge(4_ppc64_hva, v1[0]);
 test_huge(5_ppc64_hva, v1[0]);
 
-JL_DLLEXPORT int64_t test_ppc64_vec1long(
+DLLEXPORT int64_t test_ppc64_vec1long(
         int64_t d1, int64_t d2, int64_t d3, int64_t d4, int64_t d5, int64_t d6,
         int64_t d7, int64_t d8, int64_t d9, struct_huge1_ppc64 vs)
 {
     return d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + vs.m + vs.v[0] + vs.v[1] + vs.v[2] + vs.v[3];
 }
 
-JL_DLLEXPORT int64_t test_ppc64_vec1long_vec(
+DLLEXPORT int64_t test_ppc64_vec1long_vec(
         int64_t d1, int64_t d2, int64_t d3, int64_t d4, int64_t d5, int64_t d6,
         int64_t d7, int64_t d8, int64_t d9, float32x4_t vs)
 {
     return d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + vs[0] + vs[1] + vs[2] + vs[3];
 }
 
-JL_DLLEXPORT float32x4_t test_ppc64_vec2(int64_t d1, float32x4_t a, float32x4_t b, float32x4_t c, float32x4_t d,
+DLLEXPORT float32x4_t test_ppc64_vec2(int64_t d1, float32x4_t a, float32x4_t b, float32x4_t c, float32x4_t d,
                                          float32x4_t e, float32x4_t f, float32x4_t g, float32x4_t h, float32x4_t i,
                                          float32x4_t j, float32x4_t k, float32x4_t l, float32x4_t m, float32x4_t n)
 {
@@ -973,13 +981,13 @@ JL_DLLEXPORT float32x4_t test_ppc64_vec2(int64_t d1, float32x4_t a, float32x4_t
 
 #endif
 
-JL_DLLEXPORT int threadcall_args(int a, int b) {
+DLLEXPORT int threadcall_args(int a, int b) {
     return a + b;
 }
 
-JL_DLLEXPORT void c_exit_finalizer(void* v) {
+DLLEXPORT void c_exit_finalizer(void* v) {
     printf("c_exit_finalizer: %d, %u", *(int*)v, (unsigned)((uintptr_t)v & (uintptr_t)1));
 }
 
 // global variable for cglobal testing
-JL_DLLEXPORT const int global_var = 1;
+DLLEXPORT const int global_var = 1;
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index 23d8b7437b823..fabeb8b62e747 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -205,12 +205,12 @@ static intptr_t get_anon_hdl(void)
     return -1;
 }
 
-static size_t map_offset = 0;
+static _Atomic(size_t) map_offset{0};
 // Multiple of 128MB.
 // Hopefully no one will set a ulimit for this to be a problem...
 static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
 static size_t map_size = 0;
-static jl_mutex_t shared_map_lock;
+static uv_mutex_t shared_map_lock;
 
 static size_t get_map_size_inc()
 {
@@ -239,7 +239,7 @@ static intptr_t init_shared_map()
     anon_hdl = get_anon_hdl();
     if (anon_hdl == -1)
         return -1;
-    map_offset = 0;
+    jl_atomic_store_relaxed(&map_offset, 0);
     map_size = get_map_size_inc();
     int ret = ftruncate(anon_hdl, map_size);
     if (ret != 0) {
@@ -256,7 +256,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
     *id = off;
     size_t map_size_inc = get_map_size_inc();
     if (__unlikely(off + size > map_size)) {
-        JL_LOCK_NOGC(&shared_map_lock);
+        uv_mutex_lock(&shared_map_lock);
         size_t old_size = map_size;
         while (off + size > map_size)
             map_size += map_size_inc;
@@ -267,7 +267,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
                 abort();
             }
         }
-        JL_UNLOCK_NOGC(&shared_map_lock);
+        uv_mutex_unlock(&shared_map_lock);
     }
     return create_shared_map(size, off);
 }
@@ -305,6 +305,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
 // Use `get_self_mem_fd` which has a guard to call this only once.
 static int _init_self_mem()
 {
+    uv_mutex_init(&shared_map_lock);
     struct utsname kernel;
     uname(&kernel);
     int major, minor;
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 8c7bdad2a5903..c3519fc553d6b 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -2,14 +2,6 @@
 
 // utility procedures used in code generation
 
-static Instruction *tbaa_decorate(MDNode *md, Instruction *inst)
-{
-    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
-    if (isa<LoadInst>(inst) && md == tbaa_const)
-        inst->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(md->getContext(), None));
-    return inst;
-}
-
 static Value *track_pjlvalue(jl_codectx_t &ctx, Value *V)
 {
     assert(V->getType() == T_pjlvalue);
@@ -136,7 +128,7 @@ static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_value_t *jt, DIBui
         size_t ntypes = jl_datatype_nfields(jdt);
         std::vector<llvm::Metadata*> Elements(ntypes);
         for (unsigned i = 0; i < ntypes; i++) {
-            jl_value_t *el = jl_svecref(jdt->types, i);
+            jl_value_t *el = jl_field_type_concrete(jdt, i);
             DIType *di;
             if (jl_field_isptr(jdt, i))
                 di = jl_pvalue_dillvmt;
@@ -234,7 +226,7 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
                                 false, GlobalVariable::PrivateLinkage,
                                 NULL, localname);
     // LLVM passes sometimes strip metadata when moving load around
-    // since the load at the new location satisfy the same condition as the origional one.
+    // since the load at the new location satisfy the same condition as the original one.
     // Mark the global as constant to LLVM code using our own metadata
     // which is much less likely to be striped.
     gv->setMetadata("julia.constgv", MDNode::get(gv->getContext(), None));
@@ -517,7 +509,7 @@ static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed
 }
 
 extern "C" JL_DLLEXPORT
-Type *jl_type_to_llvm(jl_value_t *jt, bool *isboxed)
+Type *jl_type_to_llvm_impl(jl_value_t *jt, bool *isboxed)
 {
     return _julia_type_to_llvm(NULL, jt, isboxed);
 }
@@ -1407,7 +1399,11 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
     return im1;
 }
 
-static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, MDNode *tbaa_dest = nullptr, bool isVolatile = false);
+static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest, MDNode *tbaa_dest, bool isVolatile = false);
+static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt)
+{
+    return emit_unbox(ctx, to, x, jt, nullptr, nullptr, false);
+}
 static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef<Value*>);
 static void emit_write_barrier(jl_codectx_t&, Value*, Value*);
 static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*);
@@ -1542,17 +1538,54 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
 }
 
 static jl_cgval_t typed_store(jl_codectx_t &ctx,
-        Value *ptr, Value *idx_0based, const jl_cgval_t &rhs, const jl_cgval_t &cmp,
+        Value *ptr, Value *idx_0based, jl_cgval_t rhs, jl_cgval_t cmp,
         jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope,
         Value *parent,  // for the write barrier, NULL if no barrier needed
         bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
-        bool needlock, bool issetfield, bool isreplacefield, bool maybe_null_if_boxed)
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const std::string &fname)
 {
+    auto newval = [&](const jl_cgval_t &lhs) {
+        const jl_cgval_t argv[3] = { cmp, lhs, rhs };
+        jl_cgval_t ret;
+        if (modifyop) {
+            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+        }
+        else {
+            Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+            ret = mark_julia_type(ctx, callval, true, jl_any_type);
+        }
+        if (!jl_subtype(ret.typ, jltype)) {
+            emit_typecheck(ctx, ret, jltype, fname);
+            ret = update_julia_type(ctx, ret, jltype);
+        }
+        return ret;
+    };
     assert(!needlock || parent != nullptr);
-    jl_cgval_t oldval = rhs;
     Type *elty = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, jltype);
-    if (type_is_ghost(elty))
-        return oldval;
+    if (type_is_ghost(elty)) {
+        if (isStrongerThanMonotonic(Order))
+            ctx.builder.CreateFence(Order);
+        if (issetfield) {
+            return rhs;
+        }
+        else if (isreplacefield) {
+            Value *Success = emit_f_is(ctx, cmp, ghostValue(jltype));
+            Success = ctx.builder.CreateZExt(Success, T_int8);
+            const jl_cgval_t argv[2] = {ghostValue(jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        else if (isswapfield) {
+            return ghostValue(jltype);
+        }
+        else { // modifyfield
+            jl_cgval_t oldval = ghostValue(jltype);
+            const jl_cgval_t argv[2] = { oldval, newval(oldval) };
+            jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+    }
     Value *intcast = nullptr;
     if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
         const DataLayout &DL = jl_data_layout;
@@ -1568,13 +1601,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         if (nb != nb2)
             elty = Type::getIntNTy(jl_LLVMContext, nb2);
     }
-    Value *r;
-    if (!isboxed)
-        r = emit_unbox(ctx, realelty, rhs, jltype);
-    else
-        r = boxed(ctx, rhs);
-    if (realelty != elty)
-        r = ctx.builder.CreateZExt(r, elty);
+    Value *r = nullptr;
+    if (issetfield || isswapfield || isreplacefield)  {
+        if (!isboxed)
+            r = emit_unbox(ctx, realelty, rhs, jltype);
+        else
+            r = boxed(ctx, rhs);
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
+    }
     Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
     if (ptr->getType() != ptrty)
         ptr = ctx.builder.CreateBitCast(ptr, ptrty);
@@ -1584,32 +1619,22 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         alignment = sizeof(void*);
     else if (!alignment)
         alignment = julia_alignment(jltype);
-    Instruction *instr = nullptr;
+    Value *instr = nullptr;
     Value *Compare = nullptr;
     Value *Success = nullptr;
-    BasicBlock *DoneBB = issetfield || (!isreplacefield && !isboxed) ? nullptr : BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
+    BasicBlock *DoneBB = nullptr;
     if (needlock)
         emit_lockstate_value(ctx, parent, true);
-    if (issetfield || Order == AtomicOrdering::NotAtomic) {
-        if (!issetfield) {
-            instr = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+    jl_cgval_t oldval = rhs;
+    if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) {
+        if (isswapfield) {
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
             if (aliasscope)
-                instr->setMetadata("noalias", aliasscope);
+                load->setMetadata("noalias", aliasscope);
             if (tbaa)
-                tbaa_decorate(tbaa, instr);
+                tbaa_decorate(tbaa, load);
             assert(realelty == elty);
-            if (isreplacefield) {
-                oldval = mark_julia_type(ctx, instr, isboxed, jltype);
-                Value *first_ptr = nullptr;
-                if (maybe_null_if_boxed)
-                    first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
-                Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
-                    return emit_f_is(ctx, oldval, cmp);
-                });
-                BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
-                ctx.builder.CreateCondBr(Success, BB, DoneBB);
-                ctx.builder.SetInsertPoint(BB);
-            }
+            instr = load;
         }
         StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
         store->setOrdering(Order);
@@ -1617,20 +1642,38 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             store->setMetadata("noalias", aliasscope);
         if (tbaa)
             tbaa_decorate(tbaa, store);
-        if (DoneBB)
-            ctx.builder.CreateBr(DoneBB);
     }
-    else if (isboxed || isreplacefield) {
-        // we have to handle isboxed here as a workaround for really bad LLVM design issue: plain Xchg only works with integers
+    else if (isswapfield && !isboxed) {
+        // we can't handle isboxed here as a workaround for really bad LLVM
+        // design issue: plain Xchg only works with integers
+#if JL_LLVM_VERSION >= 130000
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
+#else
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
+        store->setAlignment(Align(alignment));
+#endif
+        if (aliasscope)
+            store->setMetadata("noalias", aliasscope);
+        if (tbaa)
+            tbaa_decorate(tbaa, store);
+        instr = store;
+    }
+    else {
+        // replacefield, modifyfield, or swapfield (isboxed && atomic)
+        DoneBB = BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
         bool needloop;
         PHINode *Succ = nullptr, *Current = nullptr;
         if (isreplacefield) {
-            if (!isboxed) {
+            if (Order == AtomicOrdering::NotAtomic) {
+                needloop = false;
+            }
+            else if (!isboxed) {
+                assert(jl_is_concrete_type(jltype));
                 needloop = ((jl_datatype_t*)jltype)->layout->haspadding;
                 Value *SameType = emit_isa(ctx, cmp, jltype, nullptr).first;
                 if (SameType != ConstantInt::getTrue(jl_LLVMContext)) {
                     BasicBlock *SkipBB = BasicBlock::Create(jl_LLVMContext, "skip_xchg", ctx.f);
-                    BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+                    BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "ok_xchg", ctx.f);
                     ctx.builder.CreateCondBr(SameType, BB, SkipBB);
                     ctx.builder.SetInsertPoint(SkipBB);
                     LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
@@ -1643,7 +1686,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                     ctx.builder.CreateBr(DoneBB);
                     ctx.builder.SetInsertPoint(DoneBB);
                     Succ = ctx.builder.CreatePHI(T_int1, 2);
-                    Succ->addIncoming(ConstantInt::get(T_int1, 0), SkipBB);
+                    Succ->addIncoming(ConstantInt::get(T_int1, false), SkipBB);
                     Current = ctx.builder.CreatePHI(instr->getType(), 2);
                     Current->addIncoming(instr, SkipBB);
                     ctx.builder.SetInsertPoint(BB);
@@ -1652,59 +1695,123 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 if (realelty != elty)
                     Compare = ctx.builder.CreateZExt(Compare, elty);
             }
-            else if (cmp.isboxed) {
+            else if (cmp.isboxed || cmp.constant || jl_pointer_egal(jltype)) {
                 Compare = boxed(ctx, cmp);
-                needloop = !jl_is_mutable_datatype(jltype);
+                needloop = !jl_pointer_egal(jltype) && !jl_pointer_egal(cmp.typ);
+                if (needloop && !cmp.isboxed) // try to use the same box in the compare now and later
+                    cmp = mark_julia_type(ctx, Compare, true, cmp.typ);
             }
             else {
-                Compare = V_rnull;
+                Compare = V_rnull; // TODO: does this need to be an invalid bit pattern?
                 needloop = true;
             }
         }
-        else {
+        else { // swap or modify
             LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
-            Current->setOrdering(AtomicOrdering::Monotonic);
+            Current->setOrdering(Order == AtomicOrdering::NotAtomic ? Order : AtomicOrdering::Monotonic);
             if (aliasscope)
                 Current->setMetadata("noalias", aliasscope);
             if (tbaa)
                 tbaa_decorate(tbaa, Current);
             Compare = Current;
-            needloop = true;
+            needloop = !isswapfield || Order != AtomicOrdering::NotAtomic;
         }
-        BasicBlock *BB;
+        BasicBlock *BB = NULL;
+        PHINode *CmpPhi = NULL;
         if (needloop) {
             BasicBlock *From = ctx.builder.GetInsertBlock();
             BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
             ctx.builder.CreateBr(BB);
             ctx.builder.SetInsertPoint(BB);
-            PHINode *Cmp = ctx.builder.CreatePHI(r->getType(), 2);
-            Cmp->addIncoming(Compare, From);
-            Compare = Cmp;
-        }
-        if (Order == AtomicOrdering::Unordered)
-            Order = AtomicOrdering::Monotonic;
-        if (!isreplacefield)
-            FailOrder = AtomicOrdering::Monotonic;
-        else if (FailOrder == AtomicOrdering::Unordered)
-            FailOrder = AtomicOrdering::Monotonic;
+            CmpPhi = ctx.builder.CreatePHI(elty, 2);
+            CmpPhi->addIncoming(Compare, From);
+            Compare = CmpPhi;
+        }
+        if (ismodifyfield) {
+            if (needlock)
+                emit_lockstate_value(ctx, parent, false);
+            Value *realCompare = Compare;
+            if (realelty != elty)
+                realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
+            if (intcast) {
+                ctx.builder.CreateStore(realCompare, ctx.builder.CreateBitCast(intcast, realCompare->getType()->getPointerTo()));
+                if (maybe_null_if_boxed)
+                    realCompare = ctx.builder.CreateLoad(intcast);
+            }
+            if (maybe_null_if_boxed) {
+                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
+                if (first_ptr)
+                    null_pointer_check(ctx, first_ptr, nullptr);
+            }
+            if (intcast)
+                oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack);
+            else
+                oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
+            rhs = newval(oldval);
+            if (!isboxed)
+                r = emit_unbox(ctx, realelty, rhs, jltype);
+            else
+                r = boxed(ctx, rhs);
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
+            if (needlock)
+                emit_lockstate_value(ctx, parent, true);
+            cmp = oldval;
+        }
+        Value *Done;
+        if (Order == AtomicOrdering::NotAtomic) {
+            // modifyfield or replacefield
+            assert(elty == realelty && !intcast);
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            if (aliasscope)
+                load->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, load);
+            Value *first_ptr = nullptr;
+            if (maybe_null_if_boxed && !ismodifyfield)
+                first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
+            oldval = mark_julia_type(ctx, load, isboxed, jltype);
+            Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
+                return emit_f_is(ctx, oldval, cmp);
+            });
+            if (needloop && ismodifyfield)
+                CmpPhi->addIncoming(load, ctx.builder.GetInsertBlock());
+            assert(Succ == nullptr);
+            BasicBlock *XchgBB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+            ctx.builder.CreateCondBr(Success, XchgBB, needloop && ismodifyfield ? BB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
+            auto *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            ctx.builder.CreateBr(DoneBB);
+            instr = load;
+        }
+        else {
+            if (Order == AtomicOrdering::Unordered)
+                Order = AtomicOrdering::Monotonic;
+            if (!isreplacefield)
+                FailOrder = AtomicOrdering::Monotonic;
+            else if (FailOrder == AtomicOrdering::Unordered)
+                FailOrder = AtomicOrdering::Monotonic;
 #if JL_LLVM_VERSION >= 130000
-        auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
 #else
-        auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
-        store->setAlignment(Align(alignment));
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
+            store->setAlignment(Align(alignment));
 #endif
-        if (aliasscope)
-            store->setMetadata("noalias", aliasscope);
-        if (tbaa)
-            tbaa_decorate(tbaa, store);
-        instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
-        Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
-        Value *Done = Success;
-        if (needloop) {
-            if (isreplacefield) {
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
+            Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
+            Done = Success;
+            if (isreplacefield && needloop) {
                 Value *realinstr = instr;
                 if (realelty != elty)
-                    realinstr = ctx.builder.CreateTrunc(instr, realelty);
+                    realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
                 if (intcast) {
                     ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
                     oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack);
@@ -1724,7 +1831,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 });
                 Done = ctx.builder.CreateNot(Done);
             }
-            cast<PHINode>(Compare)->addIncoming(instr, ctx.builder.GetInsertBlock());
+            if (needloop)
+                ctx.builder.CreateCondBr(Done, DoneBB, BB);
+            else
+                ctx.builder.CreateBr(DoneBB);
+            if (needloop)
+                CmpPhi->addIncoming(instr, ctx.builder.GetInsertBlock());
         }
         if (Succ != nullptr) {
             Current->addIncoming(instr, ctx.builder.GetInsertBlock());
@@ -1732,31 +1844,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             Succ->addIncoming(Success, ctx.builder.GetInsertBlock());
             Success = Succ;
         }
-        if (needloop)
-            ctx.builder.CreateCondBr(Done, DoneBB, BB);
-        else
-            ctx.builder.CreateBr(DoneBB);
-    }
-    else {
-#if JL_LLVM_VERSION >= 130000
-        instr = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
-#else
-        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
-        store->setAlignment(Align(alignment));
-        instr = store;
-#endif
-        if (aliasscope)
-            instr->setMetadata("noalias", aliasscope);
-        if (tbaa)
-            tbaa_decorate(tbaa, instr);
-        assert(DoneBB == nullptr);
     }
     if (DoneBB)
         ctx.builder.SetInsertPoint(DoneBB);
     if (needlock)
         emit_lockstate_value(ctx, parent, false);
     if (parent != NULL) {
-        BasicBlock *DoneBB;
         if (isreplacefield) {
             // TOOD: avoid this branch if we aren't making a write barrier
             BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg_wb", ctx.f);
@@ -1773,7 +1866,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             ctx.builder.SetInsertPoint(DoneBB);
         }
     }
-    if (!issetfield) {
+    if (ismodifyfield) {
+        const jl_cgval_t argv[2] = { oldval, rhs };
+        jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+    }
+    else if (!issetfield) { // swapfield or replacefield
         if (realelty != elty)
             instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
         if (intcast) {
@@ -1787,10 +1885,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         oldval = mark_julia_type(ctx, instr, isboxed, jltype);
         if (isreplacefield) {
-            // TODO: do better here
-            jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
-            instr = emit_jlcall(ctx, jltuple_func, V_rnull, argv, 2, JLCALL_F_CC);
-            oldval = mark_julia_type(ctx, instr, true, jl_any_type);
+            Success = ctx.builder.CreateZExt(Success, T_int8);
+            const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
     }
     return oldval;
@@ -1936,9 +2034,10 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     if (!strct.ispointer()) { // unboxed
         assert(jl_is_concrete_immutable((jl_value_t*)stt));
         bool isboxed = is_datatype_all_pointers(stt);
-        bool issame = is_tupletype_homogeneous(stt->types);
+        jl_svec_t *types = stt->types;
+        bool issame = is_tupletype_homogeneous(types);
         if (issame) {
-            jl_value_t *jft = jl_svecref(stt->types, 0);
+            jl_value_t *jft = jl_svecref(types, 0);
             if (strct.isghost) {
                 (void)idx0();
                 *ret = ghostValue(jft);
@@ -1978,7 +2077,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
                         ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i)),
                         fld);
             }
-            jl_value_t *jft = issame ? jl_svecref(stt->types, 0) : (jl_value_t*)jl_any_type;
+            jl_value_t *jft = issame ? jl_svecref(types, 0) : (jl_value_t*)jl_any_type;
             if (isboxed && maybe_null)
                 null_pointer_check(ctx, fld);
             *ret = mark_julia_type(ctx, fld, isboxed, jft);
@@ -2020,9 +2119,9 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             *ret = mark_julia_type(ctx, fld, true, jl_any_type);
             return true;
         }
-        else if (is_tupletype_homogeneous(stt->types)) {
+        else if (is_tupletype_homogeneous(jl_get_fieldtypes(stt))) {
             assert(nfields > 0); // nf == 0 trapped by all_pointers case
-            jl_value_t *jft = jl_svecref(stt->types, 0);
+            jl_value_t *jft = jl_svecref(stt->types, 0); // n.b. jl_get_fieldtypes assigned stt->types for here
             assert(jl_is_concrete_type(jft));
             idx = idx0();
             Value *ptr = maybe_decay_tracked(ctx, data_pointer(ctx, strct));
@@ -2047,22 +2146,25 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     return false;
 }
 
-static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, jl_value_t *jfty, size_t fsz, size_t al, MDNode *tbaa, bool mutabl)
+static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
+        jl_value_t *jfty, size_t fsz, size_t al, MDNode *tbaa, bool mutabl,
+        unsigned union_max, MDNode *tbaa_ptindex)
 {
-    Instruction *tindex0 = tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, Align(1)));
-    //tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
-    //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
-    //    ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) }));
+    Instruction *tindex0 = tbaa_decorate(tbaa_ptindex, ctx.builder.CreateAlignedLoad(T_int8, ptindex, Align(1)));
+    tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
+        ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
+        ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) }));
     Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), tindex0);
-    if (mutabl) {
+    if (fsz > 0 && mutabl) {
         // move value to an immutable stack slot (excluding tindex)
-        Type *ET = IntegerType::get(jl_LLVMContext, 8 * al);
-        AllocaInst *lv = emit_static_alloca(ctx, ET);
-        lv->setOperand(0, ConstantInt::get(T_int32, (fsz + al - 1) / al));
+        Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (fsz + al - 1) / al);
+        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        if (al > 1)
+            lv->setAlignment(Align(al));
         emit_memcpy(ctx, lv, tbaa, addr, tbaa, fsz, al);
         addr = lv;
     }
-    return mark_julia_slot(addr, jfty, tindex, tbaa);
+    return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa);
 }
 
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
@@ -2136,7 +2238,8 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         else if (jl_is_uniontype(jfty)) {
             size_t fsz = 0, al = 0;
-            bool isptr = !jl_islayout_inline(jfty, &fsz, &al);
+            int union_max = jl_islayout_inline(jfty, &fsz, &al);
+            bool isptr = (union_max == 0);
             assert(!isptr && fsz == jl_field_size(jt, idx) - 1); (void)isptr;
             Value *ptindex;
             if (isboxed) {
@@ -2146,7 +2249,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             else {
                 ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz);
             }
-            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, jt->name->mutabl);
+            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, jt->name->mutabl, union_max, tbaa_unionselbyte);
         }
         assert(jl_is_concrete_type(jfty));
         if (!jt->name->mutabl && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
@@ -2392,7 +2495,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo,
         ctx.builder.CreateStructGEP(jl_array_llvmt,
             emit_bitcast(ctx, t, jl_parray_llvmt),
             0); // index (not offset) of data field in jl_parray_llvmt
-    // Normally allocated array of 0 dimention always have a inline pointer.
+    // Normally allocated array of 0 dimension always have a inline pointer.
     // However, we can't rely on that here since arrays can also be constructed from C pointers.
     MDNode *tbaa = arraytype_constshape(tinfo.typ) ? tbaa_const : tbaa_arrayptr;
     PointerType *PT = cast<PointerType>(addr->getType());
@@ -2658,8 +2761,14 @@ static jl_value_t *static_constant_instance(Constant *constant, jl_value_t *jt)
     size_t nargs;
     if (const auto *CC = dyn_cast<ConstantAggregate>(constant))
         nargs = CC->getNumOperands();
-    else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant))
+    else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant)) {
+#if JL_LLVM_VERSION >= 130000
+        // SVE: Elsewhere we use `getMinKownValue`
+        nargs = CAZ->getElementCount().getFixedValue();
+#else
         nargs = CAZ->getNumElements();
+#endif
+    }
     else if (const auto *CDS = dyn_cast<ConstantDataSequential>(constant))
         nargs = CDS->getNumElements();
     else
@@ -2970,7 +3079,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
     if (jt == (jl_value_t*)jl_nothing_type)
         return track_pjlvalue(ctx, literal_pointer_val(ctx, jl_nothing));
     if (vinfo.isboxed) {
-        assert(vinfo.V == vinfo.Vboxed);
+        assert(vinfo.V == vinfo.Vboxed && vinfo.V != nullptr);
         assert(vinfo.V->getType() == T_prjlvalue);
         return vinfo.V;
     }
@@ -3105,9 +3214,9 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
 // allocation for known size object
 static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
 {
-    Value *ptls_ptr = emit_bitcast(ctx, get_current_ptls(ctx), T_pint8);
+    Value *current_task = get_current_task(ctx);
     Function *F = prepare_call(jl_alloc_obj_func);
-    auto call = ctx.builder.CreateCall(F, {ptls_ptr, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)});
+    auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)});
     call->setAttributes(F->getAttributes());
     return call;
 }
@@ -3130,6 +3239,9 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
 
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*> ptrs)
 {
+    // if there are no child objects we can skip emission
+    if (ptrs.empty())
+        return;
     SmallVector<Value*, 8> decay_ptrs;
     decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, T_prjlvalue)));
     for (auto ptr : ptrs) {
@@ -3143,9 +3255,10 @@ static void find_perm_offsets(jl_datatype_t *typ, SmallVector<unsigned,4> &res,
     // This is a inlined field at `offset`.
     if (!typ->layout || typ->layout->npointers == 0)
         return;
-    size_t nf = jl_svec_len(typ->types);
+    jl_svec_t *types = jl_get_fieldtypes(typ);
+    size_t nf = jl_svec_len(types);
     for (size_t i = 0; i < nf; i++) {
-        jl_value_t *_fld = jl_svecref(typ->types, i);
+        jl_value_t *_fld = jl_svecref(types, i);
         if (!jl_is_datatype(_fld))
             continue;
         jl_datatype_t *fld = (jl_datatype_t*)_fld;
@@ -3173,12 +3286,13 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
 
 static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_datatype_t *sty, const jl_cgval_t &strct, size_t idx0,
-        const jl_cgval_t &rhs, const jl_cgval_t &cmp,
+        jl_cgval_t rhs, jl_cgval_t cmp,
         bool checked, bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
-        bool needlock, bool issetfield, bool isreplacefield)
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        const jl_cgval_t *modifyop, const std::string &fname)
 {
     if (!sty->name->mutabl && checked) {
-        std::string msg = "setfield!: immutable struct of type "
+        std::string msg = fname + ": immutable struct of type "
             + std::string(jl_symbol_name(sty->name->name))
             + " cannot be changed";
         emit_error(ctx, msg);
@@ -3193,43 +3307,84 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
                 emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8),
                 ConstantInt::get(T_size, byte_offset)); // TODO: use emit_struct_gep
     }
-    jl_value_t *jfty = jl_svecref(sty->types, idx0);
+    jl_value_t *jfty = jl_field_type(sty, idx0);
     if (!jl_field_isptr(sty, idx0) && jl_is_uniontype(jfty)) {
         size_t fsz = 0, al = 0;
-        bool isptr = !jl_islayout_inline(jfty, &fsz, &al);
+        int union_max = jl_islayout_inline(jfty, &fsz, &al);
+        bool isptr = (union_max == 0);
         assert(!isptr && fsz == jl_field_size(sty, idx0) - 1); (void)isptr;
         // compute tindex from rhs
         jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
         if (rhs_union.typ == jl_bottom_type)
             return jl_cgval_t();
-        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
-        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
         Value *ptindex = ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8), ConstantInt::get(T_size, fsz));
         if (needlock)
             emit_lockstate_value(ctx, strct, true);
+        BasicBlock *ModifyBB;
+        if (ismodifyfield) {
+            ModifyBB = BasicBlock::Create(jl_LLVMContext, "modify_xchg", ctx.f);
+            ctx.builder.CreateBr(ModifyBB);
+            ctx.builder.SetInsertPoint(ModifyBB);
+        }
         jl_cgval_t oldval = rhs;
         if (!issetfield)
-            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true);
-        Value *Success;
-        BasicBlock *DoneBB;
-        if (isreplacefield) {
-            BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, tbaa_unionselbyte);
+        Value *Success = NULL;
+        BasicBlock *DoneBB = NULL;
+        if (isreplacefield || ismodifyfield) {
+            if (ismodifyfield) {
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, false);
+                const jl_cgval_t argv[3] = { cmp, oldval, rhs };
+                if (modifyop) {
+                    rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+                }
+                else {
+                    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+                    rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+                }
+                if (!jl_subtype(rhs.typ, jfty)) {
+                    emit_typecheck(ctx, rhs, jfty, fname);
+                    rhs = update_julia_type(ctx, rhs, jfty);
+                }
+                rhs_union = convert_julia_type(ctx, rhs, jfty);
+                if (rhs_union.typ == jl_bottom_type)
+                    return jl_cgval_t();
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, true);
+                cmp = oldval;
+                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, tbaa_unionselbyte);
+            }
+            BasicBlock *XchgBB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
             DoneBB = BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
             Success = emit_f_is(ctx, oldval, cmp);
-            ctx.builder.CreateCondBr(Success, BB, DoneBB);
-            ctx.builder.SetInsertPoint(BB);
+            ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? ModifyBB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
         }
+        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
+        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
         tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
         // copy data
         if (!rhs.isghost) {
             emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
         }
-        if (isreplacefield) {
+        if (isreplacefield || ismodifyfield) {
             ctx.builder.CreateBr(DoneBB);
             ctx.builder.SetInsertPoint(DoneBB);
         }
         if (needlock)
             emit_lockstate_value(ctx, strct, false);
+        if (isreplacefield) {
+            Success = ctx.builder.CreateZExt(Success, T_int8);
+            jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        else if (ismodifyfield) {
+            jl_cgval_t argv[2] = {oldval, rhs};
+            jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
         return oldval;
     }
     else {
@@ -3240,7 +3395,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
             wb ? maybe_bitcast(ctx, data_pointer(ctx, strct), T_pjlvalue) : nullptr,
             isboxed, Order, FailOrder, align,
-            needlock, issetfield, isreplacefield, maybe_null);
+            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, modifyop, fname);
     }
 }
 
@@ -3283,7 +3438,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             }
 
             for (unsigned i = 0; i < na; i++) {
-                jl_value_t *jtype = jl_svecref(sty->types, i);
+                jl_value_t *jtype = jl_svecref(sty->types, i); // n.b. ty argument must be concrete
                 jl_cgval_t fval_info = argv[i];
                 emit_typecheck(ctx, fval_info, jtype, "new");
                 fval_info = update_julia_type(ctx, fval_info, jtype);
@@ -3418,8 +3573,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 need_wb = !rhs.isboxed;
             else
                 need_wb = false;
-            emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new");
-            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), false, need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false);
+            emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new"); // n.b. ty argument must be concrete
+            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), false, need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, nullptr, "");
         }
         return strctinfo;
     }
@@ -3436,16 +3591,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
 
 static void emit_signal_fence(jl_codectx_t &ctx)
 {
-#if defined(_CPU_ARM_) || defined(_CPU_AARCH64_)
-    // LLVM generates very inefficient code (and might include function call)
-    // for signal fence. Fallback to the poor man signal fence with
-    // inline asm instead.
-    // https://llvm.org/bugs/show_bug.cgi?id=27545
-    ctx.builder.CreateCall(InlineAsm::get(FunctionType::get(T_void, false), "",
-                                      "~{memory}", true));
-#else
     ctx.builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
-#endif
 }
 
 static Value *emit_defer_signal(jl_codectx_t &ctx)
@@ -3457,7 +3603,7 @@ static Value *emit_defer_signal(jl_codectx_t &ctx)
     return ctx.builder.CreateInBoundsGEP(T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
 }
 
-#ifndef NDEBUG
+#ifndef JL_NDEBUG
 static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
 {
     return
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 675afc3453fba..38bd012ff46fc 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -19,9 +19,6 @@
 
 #if defined(__GNUC__)
 #define USED_FUNC __attribute__((used))
-#elif defined(_COMPILER_MICROSOFT_)
-// Does MSVC have this?
-#define USED_FUNC
 #else
 #define USED_FUNC
 #endif
@@ -708,6 +705,10 @@ bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD) {
   return declHasAnnotation(FD, "julia_not_safepoint");
 }
 
+#if LLVM_VERSION_MAJOR >= 13
+#define endswith_lower endswith_insensitive
+#endif
+
 bool GCChecker::isGCTrackedType(QualType QT) {
   return isValueCollection(QT) ||
          isJuliaType(
@@ -744,8 +745,7 @@ bool GCChecker::isGCTrackedType(QualType QT) {
                    Name.endswith_lower("jl_method_match_t") ||
                    Name.endswith_lower("jl_vararg_t") ||
                    Name.endswith_lower("jl_opaque_closure_t") ||
-                   // Probably not technically true for these, but let's allow
-                   // it
+                   // Probably not technically true for these, but let's allow it
                    Name.endswith_lower("typemap_intersection_env") ||
                    Name.endswith_lower("interpreter_state") ||
                    Name.endswith_lower("jl_typeenv_t") ||
@@ -775,7 +775,7 @@ bool GCChecker::isGCTracked(const Expr *E) {
 
 bool GCChecker::isGloballyRootedType(QualType QT) const {
   return isJuliaType(
-      [](StringRef Name) { return Name.endswith_lower("jl_sym_t"); }, QT);
+      [](StringRef Name) { return Name.endswith("jl_sym_t"); }, QT);
 }
 
 bool GCChecker::isSafepoint(const CallEvent &Call) const {
@@ -813,8 +813,8 @@ bool GCChecker::isSafepoint(const CallEvent &Call) const {
       if (FD->getBuiltinID() != 0 || FD->isTrivial())
         isCalleeSafepoint = false;
       else if (FD->getDeclName().isIdentifier() &&
-               (FD->getName().startswith_lower("uv_") ||
-                FD->getName().startswith_lower("unw_") ||
+               (FD->getName().startswith("uv_") ||
+                FD->getName().startswith("unw_") ||
                 FD->getName().startswith("_U")) &&
                FD->getName() != "uv_run")
         isCalleeSafepoint = false;
@@ -952,13 +952,13 @@ bool GCChecker::processAllocationOfResult(const CallEvent &Call,
         // global roots.
         StringRef FDName =
             FD->getDeclName().isIdentifier() ? FD->getName() : "";
-        if (FDName.startswith_lower("jl_box_")) {
+        if (FDName.startswith("jl_box_") || FDName.startswith("ijl_box_")) {
           SVal Arg = Call.getArgSVal(0);
           if (auto CI = Arg.getAs<nonloc::ConcreteInt>()) {
             const llvm::APSInt &Value = CI->getValue();
             bool GloballyRooted = false;
             const int64_t NBOX_C = 1024;
-            if (FDName.startswith_lower("jl_box_u")) {
+            if (FDName.startswith("jl_box_u") || FDName.startswith("ijl_box_u")) {
               if (Value < NBOX_C) {
                 GloballyRooted = true;
               }
@@ -1068,10 +1068,10 @@ void GCChecker::checkDerivingExpr(const Expr *Result, const Expr *Parent,
     // TODO: We may want to refine this. This is to track pointers through the
     // array list in jl_module_t.
     bool ParentIsModule = isJuliaType(
-        [](StringRef Name) { return Name.endswith_lower("jl_module_t"); },
+        [](StringRef Name) { return Name.endswith("jl_module_t"); },
         Parent->getType());
     bool ResultIsArrayList = isJuliaType(
-        [](StringRef Name) { return Name.endswith_lower("arraylist_t"); },
+        [](StringRef Name) { return Name.endswith("arraylist_t"); },
         Result->getType());
     if (!(ParentIsModule && ResultIsArrayList) && isGCTracked(Parent)) {
       ResultTracked = false;
@@ -1428,7 +1428,7 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
     C.addTransition(
         State->set<GCValueMap>(Sym, ValueState::getRooted(nullptr, -1)));
     return true;
-  } else if (name == "jl_gc_enable") {
+  } else if (name == "jl_gc_enable" || name == "ijl_gc_enable") {
     ProgramStateRef State = C.getState();
     // Check for a literal argument
     SVal Arg = C.getSVal(CE->getArg(0));
@@ -1452,6 +1452,23 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
     C.addTransition(State->BindExpr(CE, C.getLocationContext(), Result));
     return true;
   }
+  else if (name == "uv_mutex_lock") {
+    ProgramStateRef State = C.getState();
+    if (State->get<SafepointDisabledAt>() == (unsigned)-1) {
+      C.addTransition(State->set<SafepointDisabledAt>(C.getStackFrame()->getIndex()));
+      return true;
+    }
+  }
+  else if (name == "uv_mutex_unlock") {
+    ProgramStateRef State = C.getState();
+    const auto *LCtx = C.getLocationContext();
+    const auto *FD = dyn_cast<FunctionDecl>(LCtx->getDecl());
+    if (State->get<SafepointDisabledAt>() == (unsigned)C.getStackFrame()->getIndex() &&
+        !isFDAnnotatedNotSafepoint(FD)) {
+      C.addTransition(State->set<SafepointDisabledAt>(-1));
+      return true;
+    }
+  }
   return false;
 }
 
diff --git a/src/clangsa/ImplicitAtomics.cpp b/src/clangsa/ImplicitAtomics.cpp
new file mode 100644
index 0000000000000..ed4ce6c1944a8
--- /dev/null
+++ b/src/clangsa/ImplicitAtomics.cpp
@@ -0,0 +1,204 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
+
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class ImplicitAtomicsChecker
+    : public Checker< check::PreStmt<CastExpr>,
+                      check::PreStmt<BinaryOperator>,
+                      check::PreStmt<UnaryOperator>,
+                      check::PreCall> {
+                      //check::Bind
+                      //check::Location
+  BugType ImplicitAtomicsBugType{this, "Implicit Atomic seq_cst synchronization", "Atomics"};
+
+  void reportBug(const Stmt *S, CheckerContext &C) const;
+  void reportBug(const Stmt *S, CheckerContext &C, StringRef desc) const;
+  void reportBug(const CallEvent &S, CheckerContext &C, StringRef desc="") const;
+
+public:
+  //void checkLocation(SVal location, bool isLoad, const Stmt* S,
+  //                   CheckerContext &C) const;
+  //void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
+  void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
+  void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
+  void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+};
+} // end anonymous namespace
+
+// Checks if RD has name in Names and is in std namespace
+static bool hasStdClassWithName(const CXXRecordDecl *RD,
+                                ArrayRef<llvm::StringLiteral> Names) {
+  // or could check ASTContext::getQualifiedTemplateName()->isDerivedFrom() ?
+  if (!RD || !RD->getDeclContext()->isStdNamespace())
+    return false;
+  if (RD->getDeclName().isIdentifier()) {
+    StringRef Name = RD->getName();
+    return llvm::any_of(Names, [&Name](StringRef GivenName) -> bool {
+      return Name == GivenName;
+    });
+  }
+  return false;
+}
+
+constexpr llvm::StringLiteral STD_PTR_NAMES[] = {"atomic", "atomic_ref"};
+
+static bool isStdAtomic(const CXXRecordDecl *RD) {
+  return hasStdClassWithName(RD, STD_PTR_NAMES);
+}
+
+static bool isStdAtomicCall(const Expr *E) {
+  return E && isStdAtomic(E->IgnoreImplicit()->getType()->getAsCXXRecordDecl());
+}
+
+static bool isStdAtomic(const Expr *E) {
+  return E->getType()->isAtomicType();
+}
+
+void ImplicitAtomicsChecker::reportBug(const CallEvent &S, CheckerContext &C, StringRef desc) const {
+    reportBug(S.getOriginExpr(), C, desc);
+}
+
+// try to find the "best" node to attach this to, so we generate fewer duplicate reports
+void ImplicitAtomicsChecker::reportBug(const Stmt *S, CheckerContext &C) const {
+  while (1) {
+    const auto *expr = dyn_cast<Expr>(S);
+    if (!expr)
+      break;
+    expr = expr->IgnoreParenCasts();
+    if (const auto *UO = dyn_cast<UnaryOperator>(expr))
+      S = UO->getSubExpr();
+    else if (const auto *BO = dyn_cast<BinaryOperator>(expr))
+      S = isStdAtomic(BO->getLHS()) ? BO->getLHS() :
+             isStdAtomic(BO->getRHS()) ? BO->getRHS() :
+             BO->getLHS();
+    else
+      break;
+  }
+  reportBug(S, C, "");
+}
+
+void ImplicitAtomicsChecker::reportBug(const Stmt *S, CheckerContext &C, StringRef desc) const {
+  SmallString<100> buf;
+  llvm::raw_svector_ostream os(buf);
+  os << ImplicitAtomicsBugType.getDescription() << desc;
+  PathDiagnosticLocation N = PathDiagnosticLocation::createBegin(
+    S, C.getSourceManager(), C.getLocationContext());
+  auto report = std::make_unique<BasicBugReport>(ImplicitAtomicsBugType, buf.str(), N);
+  C.emitReport(std::move(report));
+}
+
+void ImplicitAtomicsChecker::checkPreStmt(const CastExpr *CE, CheckerContext &C) const {
+  //if (isStdAtomic(CE) != isStdAtomic(CE->getSubExpr())) { // AtomicToNonAtomic or NonAtomicToAtomic CastExpr
+  if (CE->getCastKind() == CK_AtomicToNonAtomic) {
+    reportBug(CE, C);
+  }
+}
+
+void ImplicitAtomicsChecker::checkPreStmt(const UnaryOperator *UOp,
+                                          CheckerContext &C) const {
+  if (UOp->getOpcode() == UO_AddrOf)
+    return;
+  const Expr *Sub = UOp->getSubExpr();
+  if (isStdAtomic(UOp) || isStdAtomic(Sub))
+    reportBug(UOp, C);
+}
+
+void ImplicitAtomicsChecker::checkPreStmt(const BinaryOperator *BOp,
+                                          CheckerContext &C) const {
+  const Expr *Lhs = BOp->getLHS();
+  const Expr *Rhs = BOp->getRHS();
+  if (isStdAtomic(Lhs) || isStdAtomic(Rhs) || isStdAtomic(BOp))
+    reportBug(BOp, C);
+}
+
+void ImplicitAtomicsChecker::checkPreCall(const CallEvent &Call,
+                                          CheckerContext &C) const {
+  const auto *MC = dyn_cast<CXXInstanceCall>(&Call);
+  if (!MC || !isStdAtomicCall(MC->getCXXThisExpr()))
+    return;
+  if (const auto *OC = dyn_cast<CXXMemberOperatorCall>(&Call)) {
+    OverloadedOperatorKind OOK = OC->getOverloadedOperator();
+    if (CXXOperatorCallExpr::isAssignmentOp(OOK) || OOK == OO_PlusPlus || OOK == OO_MinusMinus) {
+      reportBug(Call, C, " (std::atomic)");
+    }
+  }
+  else if (const auto *Convert = dyn_cast<CXXConversionDecl>(MC->getDecl())) {
+    reportBug(Call, C, " (std::atomic)");
+  }
+}
+
+
+//// These seem probably unnecessary:
+//
+//static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false) {
+//  const Expr *E = nullptr;
+//
+//  // Walk through lvalue casts to get the original expression
+//  // that syntactically caused the load.
+//  if (const Expr *expr = dyn_cast<Expr>(S))
+//    E = expr->IgnoreParenLValueCasts();
+//
+//  if (IsBind) {
+//    const VarDecl *VD;
+//    const Expr *Init;
+//    std::tie(VD, Init) = parseAssignment(S);
+//    if (VD && Init)
+//      E = Init;
+//  }
+//  return E;
+//}
+//
+//// load or bare symbol
+//void ImplicitAtomicsChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
+//                                           CheckerContext &C) const {
+//  const Expr *expr = getDereferenceExpr(S);
+//  assert(expr);
+//  if (isStdAtomic(expr))
+//    reportBug(S, C);
+//}
+//
+//// auto &r = *l, or store
+//void ImplicitAtomicsChecker::checkBind(SVal L, SVal V, const Stmt *S,
+//                                       CheckerContext &C) const {
+//  const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true);
+//  assert(expr);
+//  if (isStdAtomic(expr))
+//    reportBug(S, C, " (bind)");
+//}
+
+namespace clang {
+namespace ento {
+void registerImplicitAtomicsChecker(CheckerManager &mgr) {
+  mgr.registerChecker<ImplicitAtomicsChecker>();
+}
+bool shouldRegisterImplicitAtomicsChecker(const CheckerManager &mgr) {
+  return true;
+}
+} // namespace ento
+} // namespace clang
+
+#ifdef CLANG_PLUGIN
+extern "C" const char clang_analyzerAPIVersionString[] =
+    CLANG_ANALYZER_API_VERSION_STRING;
+extern "C" void clang_registerCheckers(CheckerRegistry &registry) {
+  registry.addChecker<ImplicitAtomicsChecker>(
+      "julia.ImplicitAtomics", "Flags implicit atomic operations", ""
+  );
+}
+#endif
diff --git a/src/clangsa/ImplicitAtomics2.cpp b/src/clangsa/ImplicitAtomics2.cpp
new file mode 100644
index 0000000000000..b9ffc43bc22f8
--- /dev/null
+++ b/src/clangsa/ImplicitAtomics2.cpp
@@ -0,0 +1,155 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang-tidy/ClangTidy.h"
+#include "clang-tidy/ClangTidyCheck.h"
+#include "clang-tidy/ClangTidyModule.h"
+#include "clang-tidy/ClangTidyModuleRegistry.h"
+
+using namespace clang;
+using namespace clang::tidy;
+using namespace clang::ast_matchers;
+
+class ImplicitAtomicsChecker : public ClangTidyCheck {
+  void reportBug(const Stmt *S, StringRef desc="");
+
+public:
+  ImplicitAtomicsChecker(StringRef Name, ClangTidyContext *Context);
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+};
+
+// Checks if RD has name in Names and is in std namespace
+static bool hasStdClassWithName(const CXXRecordDecl *RD,
+                                ArrayRef<llvm::StringLiteral> Names) {
+  // or could check ASTContext::getQualifiedTemplateName()->isDerivedFrom() ?
+  if (!RD || !RD->getDeclContext()->isStdNamespace())
+    return false;
+  if (RD->getDeclName().isIdentifier()) {
+    StringRef Name = RD->getName();
+    return llvm::any_of(Names, [&Name](StringRef GivenName) -> bool {
+      return Name == GivenName;
+    });
+  }
+  return false;
+}
+
+constexpr llvm::StringLiteral STD_PTR_NAMES[] = {"atomic", "atomic_ref"};
+
+static bool isStdAtomic(const CXXRecordDecl *RD) {
+  return hasStdClassWithName(RD, STD_PTR_NAMES);
+}
+
+static bool isStdAtomicCall(const Expr *E) {
+  return E && isStdAtomic(E->IgnoreImplicit()->getType()->getAsCXXRecordDecl());
+}
+
+static bool isStdAtomic(const Expr *E) {
+  return E->getType()->isAtomicType();
+}
+
+void ImplicitAtomicsChecker::reportBug(const Stmt *S, StringRef desc) {
+  // try to find the "best" node to attach this to, so we generate fewer duplicate reports
+  while (1) {
+    const auto *expr = dyn_cast<Expr>(S);
+    if (!expr)
+      break;
+    expr = expr->IgnoreParenCasts();
+    if (const auto *UO = dyn_cast<UnaryOperator>(expr))
+      S = UO->getSubExpr();
+    else if (const auto *BO = dyn_cast<BinaryOperator>(expr))
+      S = isStdAtomic(BO->getLHS()) ? BO->getLHS() :
+             isStdAtomic(BO->getRHS()) ? BO->getRHS() :
+             BO->getLHS();
+    else
+      break;
+  }
+  SmallString<100> buf;
+  llvm::raw_svector_ostream os(buf);
+  os << "Implicit Atomic seq_cst synchronization" << desc;
+  diag(S->getBeginLoc(), buf.str());
+}
+
+
+ImplicitAtomicsChecker::
+    ImplicitAtomicsChecker(StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context) {
+}
+
+void ImplicitAtomicsChecker::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(castExpr(hasCastKind(CK_AtomicToNonAtomic))
+                         .bind("cast"),
+                     this);
+  Finder->addMatcher(unaryOperator(unless(hasAnyOperatorName("&")))
+                         .bind("unary-op"),
+                     this);
+  Finder->addMatcher(binaryOperator()
+                         .bind("binary-op"),
+                     this);
+  Finder->addMatcher(cxxOperatorCallExpr()
+                         .bind("cxxcall"),
+                     this);
+  Finder->addMatcher(cxxMemberCallExpr()
+                         .bind("cxxcall"),
+                     this);
+}
+
+void ImplicitAtomicsChecker::check(const MatchFinder::MatchResult &Result) {
+  if (const auto *UOp = Result.Nodes.getNodeAs<UnaryOperator>("unary-op")) {
+    const Expr *Sub = UOp->getSubExpr();
+    if (isStdAtomic(UOp) || isStdAtomic(Sub))
+      reportBug(UOp);
+  }
+  if (const auto *BOp = Result.Nodes.getNodeAs<BinaryOperator>("binary-op")) {
+    const Expr *Lhs = BOp->getLHS();
+    const Expr *Rhs = BOp->getRHS();
+    if (isStdAtomic(Lhs) || isStdAtomic(Rhs) || isStdAtomic(BOp))
+      reportBug(BOp);
+  }
+  if (const auto *CE = Result.Nodes.getNodeAs<CastExpr>("cast")) {
+    reportBug(CE);
+  }
+  if (const auto *Call = Result.Nodes.getNodeAs<CallExpr>("cxxcall")) {
+    if (const auto *OC = dyn_cast<CXXOperatorCallExpr>(Call)) {
+      const auto *CXXThisExpr = OC->getArg(0);
+      if (isStdAtomicCall(CXXThisExpr)) {
+        OverloadedOperatorKind OOK = OC->getOperator();
+        if (CXXOperatorCallExpr::isAssignmentOp(OOK) || OOK == OO_PlusPlus || OOK == OO_MinusMinus) {
+          reportBug(CXXThisExpr, " (std::atomic operator)");
+        }
+      }
+    }
+    else if (const auto *OC = dyn_cast<CXXMemberCallExpr>(Call)) {
+      const auto *CXXThisExpr = OC->getImplicitObjectArgument();
+      if (isStdAtomicCall(CXXThisExpr)) {
+        if (isa<CXXConversionDecl>(OC->getMethodDecl())) {
+          reportBug(CXXThisExpr, " (std::atomic cast)");
+        }
+      }
+    }
+  }
+}
+
+class ImplicitAtomicsCheckerModule : public ClangTidyModule {
+public:
+  void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
+    CheckFactories.registerCheck<ImplicitAtomicsChecker>("concurrency-implicit-atomics");
+  }
+};
+
+namespace clang {
+namespace tidy {
+
+// Register the ImplicitAtomicsCheckerModule using this statically initialized variable.
+static ClangTidyModuleRegistry::Add<::ImplicitAtomicsCheckerModule>
+    X("concurrency-module", "Adds my concurrency checks.");
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the ImplicitAtomicsCheckerModule.
+volatile int ImplicitAtomicsCheckerModuleAnchorSource = 0;
+
+} // namespace tidy
+} // namespace clang
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
new file mode 100644
index 0000000000000..f328906cf2b4b
--- /dev/null
+++ b/src/codegen-stubs.c
@@ -0,0 +1,134 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// This file provides a fallback implementation of the codegen plugin interface,
+// used when libjulia-codegen is not available.
+
+#include "julia.h"
+#include "julia_internal.h"
+
+#include "intrinsics.h"
+
+#define UNAVAILABLE { jl_errorf("%s: not available in this build of Julia", __func__); }
+
+JL_DLLEXPORT void jl_dump_native_fallback(void *native_code,
+        const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
+        const char *sysimg_data, size_t sysimg_len) UNAVAILABLE
+JL_DLLEXPORT int32_t jl_get_llvm_gv_fallback(void *native_code, jl_value_t *p) UNAVAILABLE
+
+JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
+JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE
+JL_DLLEXPORT void *jl_get_llvmf_defn_fallback(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
+
+JL_DLLEXPORT void *jl_LLVMCreateDisasm_fallback(const char *TripleName, void *DisInfo, int TagType, void *GetOpInfo, void *SymbolLookUp) UNAVAILABLE
+JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_fallback(void *DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize) UNAVAILABLE
+
+JL_DLLEXPORT void jl_init_codegen_fallback(void) { }
+
+JL_DLLEXPORT int jl_getFunctionInfo_fallback(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
+                       jl_method_instance_t **linfos, size_t n)
+{
+    (void)sysimage_base; (void)fptrs; (void)linfos; (void)n;
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_fallback(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+{
+    return NULL;
+}
+
+JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t *unspec)
+{
+    jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
+}
+
+JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_fallback(void)
+{
+    return 0;
+}
+
+JL_DLLEXPORT int jl_compile_extern_c_fallback(void *llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void jl_teardown_codegen_fallback(void)
+{
+}
+
+JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void jl_lock_profile_fallback(void)
+{
+}
+
+JL_DLLEXPORT void jl_unlock_profile_fallback(void)
+{
+}
+
+JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, const jl_cgparams_t *cgparams, int _policy) UNAVAILABLE
+
+JL_DLLEXPORT void jl_dump_compiles_fallback(void *s)
+{
+}
+
+JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+
+JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+
+JL_DLLEXPORT void jl_get_function_id_fallback(void *native_code, jl_code_instance_t *ncode,
+        int32_t *func_idx, int32_t *specfunc_idx) UNAVAILABLE
+
+JL_DLLEXPORT void *jl_get_llvm_context_fallback(void *native_code) UNAVAILABLE
+
+JL_DLLEXPORT void *jl_get_llvm_function_fallback(void *native_code, uint32_t idx) UNAVAILABLE
+
+JL_DLLEXPORT void *jl_get_llvm_module_fallback(void *native_code) UNAVAILABLE
+
+JL_DLLEXPORT void *jl_type_to_llvm_fallback(jl_value_t *jt, bool_t *isboxed) UNAVAILABLE
+
+JL_DLLEXPORT jl_value_t *jl_get_libllvm_fallback(void) JL_NOTSAFEPOINT
+{
+    return jl_nothing;
+}
+
+JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraJuliaLICMPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddAllocOptPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_fallback(void *PM, bool_t imaging_mode) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_fallback(void *PM, bool_t Strong) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 68300633f293f..2ed36f21fa1e2 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -84,6 +84,10 @@
 #endif
 #include <llvm/Target/TargetMachine.h>
 
+#include "llvm/Support/Path.h" // for llvm::sys::path
+#include <llvm/Bitcode/BitcodeReader.h>
+#include <llvm/Linker/Linker.h>
+
 using namespace llvm;
 
 typedef Instruction TerminatorInst;
@@ -110,19 +114,7 @@ extern "C" {
 
 #include "builtin_proto.h"
 
-#ifdef HAVE_SSP
-extern uintptr_t __stack_chk_guard;
 extern void __stack_chk_fail();
-#else
-JL_DLLEXPORT uintptr_t __stack_chk_guard = (uintptr_t)0xBAD57ACCBAD67ACC; // 0xBADSTACKBADSTACK
-JL_DLLEXPORT void __stack_chk_fail()
-{
-    /* put your panic function or similar in here */
-    fprintf(stderr, "fatal error: stack corruption detected\n");
-    gc_debug_critical_error();
-    abort(); // end with abort, since the compiler destroyed the stack upon entry to this function, there's no going back now
-}
-#endif
 
 #ifdef _OS_WINDOWS_
 #if defined(_CPU_X86_64_)
@@ -145,10 +137,6 @@ extern void _chkstk(void);
 #endif
 }
 
-#if defined(_COMPILER_MICROSOFT_) && !defined(__alignof__)
-#define __alignof__ __alignof
-#endif
-
 // llvm state
 extern JITEventListener *CreateJuliaJITEventListener();
 
@@ -387,43 +375,48 @@ static AttributeList get_attrs_zext(LLVMContext &C)
 
 // global vars
 static const auto jlRTLD_DEFAULT_var = new JuliaVariable{
-    "jl_RTLD_DEFAULT_handle",
+    XSTR(jl_RTLD_DEFAULT_handle),
     true,
     [](LLVMContext &C) { return T_pint8; },
 };
 #ifdef _OS_WINDOWS_
 static const auto jlexe_var = new JuliaVariable{
-    "jl_exe_handle",
+    XSTR(jl_exe_handle),
     true,
     [](LLVMContext &C) { return T_pint8; },
 };
 static const auto jldll_var = new JuliaVariable{
-    "jl_libjulia_internal_handle",
+    XSTR(jl_libjulia_handle),
+    true,
+    [](LLVMContext &C) { return T_pint8; },
+};
+static const auto jldlli_var = new JuliaVariable{
+    XSTR(jl_libjulia_internal_handle),
     true,
     [](LLVMContext &C) { return T_pint8; },
 };
 #endif //_OS_WINDOWS_
 
 static const auto jlstack_chk_guard_var = new JuliaVariable{
-    "__stack_chk_guard",
+    XSTR(__stack_chk_guard),
     true,
     get_pjlvalue,
 };
 
 static const auto jlgetworld_global = new JuliaVariable{
-    "jl_world_counter",
+    XSTR(jl_world_counter),
     false,
     [](LLVMContext &C) { return (Type*)T_size; },
 };
 
 static const auto jlboxed_int8_cache = new JuliaVariable{
-    "jl_boxed_int8_cache",
+    XSTR(jl_boxed_int8_cache),
     true,
     [](LLVMContext &C) { return (Type*)ArrayType::get(T_pjlvalue, 256); },
 };
 
 static const auto jlboxed_uint8_cache = new JuliaVariable{
-    "jl_boxed_uint8_cache",
+    XSTR(jl_boxed_uint8_cache),
     true,
     [](LLVMContext &C) { return (Type*)ArrayType::get(T_pjlvalue, 256); },
 };
@@ -440,96 +433,96 @@ static const auto jlpgcstack_func = new JuliaFunction{
 // Symbols are not gc-tracked, but we'll treat them as callee rooted anyway,
 // because they may come from a gc-rooted location
 static const auto jlnew_func = new JuliaFunction{
-    "jl_new_structv",
+    XSTR(jl_new_structv),
     get_func_sig,
     get_func_attrs,
 };
 static const auto jlsplatnew_func = new JuliaFunction{
-    "jl_new_structt",
+    XSTR(jl_new_structt),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
             {T_prjlvalue, T_prjlvalue}, false); },
     get_func_attrs,
 };
 static const auto jlthrow_func = new JuliaFunction{
-    "jl_throw",
+    XSTR(jl_throw),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlerror_func = new JuliaFunction{
-    "jl_error",
+    XSTR(jl_error),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_pint8}, false); },
     get_attrs_noreturn,
 };
 static const auto jlatomicerror_func = new JuliaFunction{
-    "jl_atomic_error",
+    XSTR(jl_atomic_error),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_pint8}, false); },
     get_attrs_noreturn,
 };
 static const auto jltypeerror_func = new JuliaFunction{
-    "jl_type_error",
+    XSTR(jl_type_error),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_pint8, T_prjlvalue, PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlundefvarerror_func = new JuliaFunction{
-    "jl_undefined_var_error",
+    XSTR(jl_undefined_var_error),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlboundserrorv_func = new JuliaFunction{
-    "jl_bounds_error_ints",
+    XSTR(jl_bounds_error_ints),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted), T_psize, T_size}, false); },
     get_attrs_noreturn,
 };
 static const auto jlboundserror_func = new JuliaFunction{
-    "jl_bounds_error_int",
+    XSTR(jl_bounds_error_int),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted), T_size}, false); },
     get_attrs_noreturn,
 };
 static const auto jlvboundserror_func = new JuliaFunction{
-    "jl_bounds_error_tuple_int",
+    XSTR(jl_bounds_error_tuple_int),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_pprjlvalue, T_size, T_size}, false); },
     get_attrs_noreturn,
 };
 static const auto jluboundserror_func = new JuliaFunction{
-    "jl_bounds_error_unboxed_int",
+    XSTR(jl_bounds_error_unboxed_int),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {PointerType::get(T_int8, AddressSpace::Derived), T_pjlvalue, T_size}, false); },
     get_attrs_noreturn,
 };
 static const auto jlcheckassign_func = new JuliaFunction{
-    "jl_checked_assignment",
+    XSTR(jl_checked_assignment),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_pjlvalue, PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
 static const auto jldeclareconst_func = new JuliaFunction{
-    "jl_declare_constant",
+    XSTR(jl_declare_constant),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jlgetbindingorerror_func = new JuliaFunction{
-    "jl_get_binding_or_error",
+    XSTR(jl_get_binding_or_error),
     [](LLVMContext &C) { return FunctionType::get(T_pjlvalue,
                 {T_pjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jlboundp_func = new JuliaFunction{
-    "jl_boundp",
+    XSTR(jl_boundp),
     [](LLVMContext &C) { return FunctionType::get(T_int32,
                 {T_pjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jltopeval_func = new JuliaFunction{
-    "jl_toplevel_eval",
+    XSTR(jl_toplevel_eval),
     [](LLVMContext &C) { return FunctionType::get(T_pjlvalue,
                 {T_pjlvalue, T_pjlvalue}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -538,7 +531,7 @@ static const auto jltopeval_func = new JuliaFunction{
             None); },
 };
 static const auto jlcopyast_func = new JuliaFunction{
-    "jl_copy_ast",
+    XSTR(jl_copy_ast),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -547,7 +540,7 @@ static const auto jlcopyast_func = new JuliaFunction{
             None); },
 };
 //static const auto jlnsvec_func = new JuliaFunction{
-//    "jl_svec",
+//    XSTR(jl_svec),
 //    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
 //                {T_size}, true); },
 //    [](LLVMContext &C) { return AttributeList::get(C,
@@ -556,12 +549,12 @@ static const auto jlcopyast_func = new JuliaFunction{
 //            None); },
 //};
 static const auto jlapplygeneric_func = new JuliaFunction{
-    "jl_apply_generic",
+    XSTR(jl_apply_generic),
     get_func_sig,
     get_func_attrs,
 };
 static const auto jlinvoke_func = new JuliaFunction{
-    "jl_invoke",
+    XSTR(jl_invoke),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue, T_pprjlvalue, T_uint32, T_prjlvalue}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -571,19 +564,19 @@ static const auto jlinvoke_func = new JuliaFunction{
              Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); },
 };
 static const auto jlmethod_func = new JuliaFunction{
-    "jl_method_def",
+    XSTR(jl_method_def),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jlgenericfunction_func = new JuliaFunction{
-    "jl_generic_function_def",
+    XSTR(jl_generic_function_def),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
                 {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jllockvalue_func = new JuliaFunction{
-    "jl_lock_value",
+    XSTR(jl_lock_value),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -592,7 +585,7 @@ static const auto jllockvalue_func = new JuliaFunction{
             {Attributes(C, {Attribute::NoCapture})}); },
 };
 static const auto jlunlockvalue_func = new JuliaFunction{
-    "jl_unlock_value",
+    XSTR(jl_unlock_value),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -601,35 +594,35 @@ static const auto jlunlockvalue_func = new JuliaFunction{
             {Attributes(C, {Attribute::NoCapture})}); },
 };
 static const auto jlenter_func = new JuliaFunction{
-    "jl_enter_handler",
+    XSTR(jl_enter_handler),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_pint8}, false); },
     nullptr,
 };
 static const auto jl_current_exception_func = new JuliaFunction{
-    "jl_current_exception",
+    XSTR(jl_current_exception),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue, false); },
     nullptr,
 };
 static const auto jlleave_func = new JuliaFunction{
-    "jl_pop_handler",
+    XSTR(jl_pop_handler),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_int32}, false); },
     nullptr,
 };
 static const auto jl_restore_excstack_func = new JuliaFunction{
-    "jl_restore_excstack",
+    XSTR(jl_restore_excstack),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_size}, false); },
     nullptr,
 };
 static const auto jl_excstack_state_func = new JuliaFunction{
-    "jl_excstack_state",
+    XSTR(jl_excstack_state),
     [](LLVMContext &C) { return FunctionType::get(T_size, false); },
     nullptr,
 };
 static const auto jlegalx_func = new JuliaFunction{
-    "jl_egal__unboxed",
+    XSTR(jl_egal__unboxed),
     [](LLVMContext &C) {
         Type *T = PointerType::get(T_jlvalue, AddressSpace::Derived);
         return FunctionType::get(T_int32, {T, T, T_prjlvalue}, false); },
@@ -641,14 +634,14 @@ static const auto jlegalx_func = new JuliaFunction{
 static const auto jl_alloc_obj_func = new JuliaFunction{
     "julia.gc_alloc_obj",
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_pint8, T_size, T_prjlvalue}, false); },
+                {T_ppjlvalue, T_size, T_prjlvalue}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet::get(C, makeArrayRef({Attribute::getWithAllocSizeArgs(C, 1, None)})), // returns %1 bytes
             Attributes(C, {Attribute::NoAlias, Attribute::NonNull}),
             None); },
 };
 static const auto jl_newbits_func = new JuliaFunction{
-    "jl_new_bits",
+    XSTR(jl_new_bits),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue, T_pint8}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -683,23 +676,23 @@ static const auto jl_write_barrier_func = new JuliaFunction{
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
             AttributeSet(),
-            None); },
+            {Attributes(C, {Attribute::ReadOnly})}); },
 };
 static const auto jlisa_func = new JuliaFunction{
-    "jl_isa",
+    XSTR(jl_isa),
     [](LLVMContext &C) { return FunctionType::get(T_int32,
             {T_prjlvalue, T_prjlvalue}, false); },
     nullptr,
 };
 
 static const auto jlsubtype_func = new JuliaFunction{
-    "jl_subtype",
+    XSTR(jl_subtype),
     [](LLVMContext &C) { return FunctionType::get(T_int32,
             {T_prjlvalue, T_prjlvalue}, false); },
     nullptr,
 };
 static const auto jlapplytype_func = new JuliaFunction{
-    "jl_instantiate_type_in_env",
+    XSTR(jl_instantiate_type_in_env),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
             {T_pjlvalue, T_pjlvalue, T_pprjlvalue}, false); },
     [](LLVMContext &C) {
@@ -711,7 +704,7 @@ static const auto jlapplytype_func = new JuliaFunction{
     },
 };
 static const auto jl_object_id__func = new JuliaFunction{
-    "jl_object_id_",
+    XSTR(jl_object_id_),
     [](LLVMContext &C) { return FunctionType::get(T_size,
             {T_prjlvalue, PointerType::get(T_int8, AddressSpace::Derived)}, false); },
     nullptr,
@@ -730,7 +723,7 @@ static const auto setjmp_func = new JuliaFunction{
             None); },
 };
 static const auto memcmp_func = new JuliaFunction{
-    "memcmp",
+    XSTR(memcmp),
     [](LLVMContext &C) { return FunctionType::get(T_int32,
             {T_pint8, T_pint8, T_size}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -740,25 +733,25 @@ static const auto memcmp_func = new JuliaFunction{
     // TODO: inferLibFuncAttributes(*memcmp_func, TLI);
 };
 static const auto jldlsym_func = new JuliaFunction{
-    "jl_load_and_lookup",
+    XSTR(jl_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(T_pvoidfunc,
             {T_pint8, T_pint8, PointerType::get(T_pint8, 0)}, false); },
     nullptr,
 };
 static const auto jllazydlsym_func = new JuliaFunction{
-    "jl_lazy_load_and_lookup",
+    XSTR(jl_lazy_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(T_pvoidfunc,
             {T_prjlvalue, T_pint8}, false); },
     nullptr,
 };
 static const auto jltypeassert_func = new JuliaFunction{
-    "jl_typeassert",
+    XSTR(jl_typeassert),
     [](LLVMContext &C) { return FunctionType::get(T_void,
             {T_prjlvalue, T_prjlvalue}, false); },
     nullptr,
 };
 static const auto jlgetnthfieldchecked_func = new JuliaFunction{
-    "jl_get_nth_field_checked",
+    XSTR(jl_get_nth_field_checked),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
             {T_prjlvalue, T_size}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -767,7 +760,7 @@ static const auto jlgetnthfieldchecked_func = new JuliaFunction{
             None); },
 };
 static const auto jlgetcfunctiontrampoline_func = new JuliaFunction{
-    "jl_get_cfunction_trampoline",
+    XSTR(jl_get_cfunction_trampoline),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
             {
                 T_prjlvalue, // f (object)
@@ -784,18 +777,18 @@ static const auto jlgetcfunctiontrampoline_func = new JuliaFunction{
             None); },
 };
 static const auto diff_gc_total_bytes_func = new JuliaFunction{
-    "jl_gc_diff_total_bytes",
+    XSTR(jl_gc_diff_total_bytes),
     [](LLVMContext &C) { return FunctionType::get(T_int64, false); },
     nullptr,
 };
 static const auto sync_gc_total_bytes_func = new JuliaFunction{
-    "jl_gc_sync_total_bytes",
+    XSTR(jl_gc_sync_total_bytes),
     [](LLVMContext &C) { return FunctionType::get(T_int64,
             {T_int64}, false); },
     nullptr,
 };
 static const auto jlarray_data_owner_func = new JuliaFunction{
-    "jl_array_data_owner",
+    XSTR(jl_array_data_owner),
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
             {T_prjlvalue}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -805,7 +798,7 @@ static const auto jlarray_data_owner_func = new JuliaFunction{
 };
 #define BOX_FUNC(ct,rt,at,attrs)                                              \
 static const auto box_##ct##_func = new JuliaFunction{                        \
-    "jl_box_"#ct,                                                             \
+    XSTR(jl_box_##ct),                                                        \
     [](LLVMContext &C) { return FunctionType::get(rt,                         \
             {at}, false); },                                                  \
     attrs,                                                                    \
@@ -857,55 +850,22 @@ static const auto pointer_from_objref_func = new JuliaFunction{
             None); },
 };
 
-static const auto jltuple_func = new JuliaFunction{"jl_f_tuple", get_func_sig, get_func_attrs};
-static const std::map<jl_fptr_args_t, JuliaFunction*> builtin_func_map = {
-    { &jl_f_is,                 new JuliaFunction{"jl_f_is", get_func_sig, get_func_attrs} },
-    { &jl_f_typeof,             new JuliaFunction{"jl_f_typeof", get_func_sig, get_func_attrs} },
-    { &jl_f_sizeof,             new JuliaFunction{"jl_f_sizeof", get_func_sig, get_func_attrs} },
-    { &jl_f_issubtype,          new JuliaFunction{"jl_f_issubtype", get_func_sig, get_func_attrs} },
-    { &jl_f_isa,                new JuliaFunction{"jl_f_isa", get_func_sig, get_func_attrs} },
-    { &jl_f_typeassert,         new JuliaFunction{"jl_f_typeassert", get_func_sig, get_func_attrs} },
-    { &jl_f_ifelse,             new JuliaFunction{"jl_f_ifelse", get_func_sig, get_func_attrs} },
-    { &jl_f__apply_iterate,     new JuliaFunction{"jl_f__apply_iterate", get_func_sig, get_func_attrs} },
-    { &jl_f__apply_pure,        new JuliaFunction{"jl_f__apply_pure", get_func_sig, get_func_attrs} },
-    { &jl_f__call_latest,       new JuliaFunction{"jl_f__call_latest", get_func_sig, get_func_attrs} },
-    { &jl_f__call_in_world,     new JuliaFunction{"jl_f__call_in_world", get_func_sig, get_func_attrs} },
-    { &jl_f_throw,              new JuliaFunction{"jl_f_throw", get_func_sig, get_func_attrs} },
-    { &jl_f_tuple,              jltuple_func },
-    { &jl_f_svec,               new JuliaFunction{"jl_f_svec", get_func_sig, get_func_attrs} },
-    { &jl_f_applicable,         new JuliaFunction{"jl_f_applicable", get_func_sig, get_func_attrs} },
-    { &jl_f_invoke,             new JuliaFunction{"jl_f_invoke", get_func_sig, get_func_attrs} },
-    { &jl_f_invoke_kwsorter,    new JuliaFunction{"jl_f_invoke_kwsorter", get_func_sig, get_func_attrs} },
-    { &jl_f_isdefined,          new JuliaFunction{"jl_f_isdefined", get_func_sig, get_func_attrs} },
-    { &jl_f_getfield,           new JuliaFunction{"jl_f_getfield", get_func_sig, get_func_attrs} },
-    { &jl_f_setfield,           new JuliaFunction{"jl_f_setfield", get_func_sig, get_func_attrs} },
-    { &jl_f_swapfield,          new JuliaFunction{"jl_f_swapfield", get_func_sig, get_func_attrs} },
-    { &jl_f_modifyfield,        new JuliaFunction{"jl_f_modifyfield", get_func_sig, get_func_attrs} },
-    { &jl_f_fieldtype,          new JuliaFunction{"jl_f_fieldtype", get_func_sig, get_func_attrs} },
-    { &jl_f_nfields,            new JuliaFunction{"jl_f_nfields", get_func_sig, get_func_attrs} },
-    { &jl_f__expr,              new JuliaFunction{"jl_f__expr", get_func_sig, get_func_attrs} },
-    { &jl_f__typevar,           new JuliaFunction{"jl_f__typevar", get_func_sig, get_func_attrs} },
-    { &jl_f_arrayref,           new JuliaFunction{"jl_f_arrayref", get_func_sig, get_func_attrs} },
-    { &jl_f_const_arrayref,     new JuliaFunction{"jl_f_const_arrayref", get_func_sig, get_func_attrs} },
-    { &jl_f_arrayset,           new JuliaFunction{"jl_f_arrayset", get_func_sig, get_func_attrs} },
-    { &jl_f_arraysize,          new JuliaFunction{"jl_f_arraysize", get_func_sig, get_func_attrs} },
-    { &jl_f_apply_type,         new JuliaFunction{"jl_f_apply_type", get_func_sig, get_func_attrs} },
-};
+static const auto jltuple_func = new JuliaFunction{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
+static std::map<jl_fptr_args_t, JuliaFunction*> builtin_func_map;
 
-static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction{"jl_new_opaque_closure_jlcall", get_func_sig, get_func_attrs};
+static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
 
 static int globalUnique = 0;
 
 // --- code generation ---
 extern "C" {
-    int jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
     jl_cgparams_t jl_default_cgparams = {1, 1, 0,
 #ifdef _OS_WINDOWS_
         0,
 #else
         1,
 #endif
-        jl_default_debug_info_kind,
+        (int) DICompileUnit::DebugEmissionKind::FullDebug,
         jl_rettype_inferred, NULL };
 }
 
@@ -946,11 +906,13 @@ static bool jl_is_pointerfree(jl_value_t* t)
 
 // these queries are usually related, but we split them out here
 // for convenience and clarity (and because it changes the calling convention)
+// n.b. this must include jl_is_datatype_singleton (ghostType) and primitive types
 static bool deserves_stack(jl_value_t* t)
 {
     if (!jl_is_concrete_immutable(t))
         return false;
-    return jl_datatype_isinlinealloc((jl_datatype_t*)t, 0);
+    jl_datatype_t *dt = (jl_datatype_t*)t;
+    return jl_is_datatype_singleton(dt) || jl_datatype_isinlinealloc(dt, 0);
 }
 static bool deserves_argbox(jl_value_t* t)
 {
@@ -1159,15 +1121,17 @@ static Value *get_current_ptls(jl_codectx_t &ctx);
 static Value *get_current_signal_page(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv);
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
-static Instruction *tbaa_decorate(MDNode *md, Instruction *inst);
+Instruction *tbaa_decorate(MDNode *md, Instruction *inst);
 
 static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
 {
@@ -1770,7 +1734,7 @@ static void jl_init_function(Function *F)
 #ifdef JL_DISABLE_FPO
     F->addFnAttr("frame-pointer", "all");
 #endif
-#if !defined(JL_ASAN_ENABLED) && !defined(_OS_WINDOWS_)
+#if !defined(_COMPILER_ASAN_ENABLED_) && !defined(_OS_WINDOWS_)
     // ASAN won't like us accessing undefined memory causing spurious issues,
     // and Windows has platform-specific handling which causes it to mishandle
     // this annotation. Other platforms should just ignore this if they don't
@@ -1813,7 +1777,7 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
     // not invalid, consider if specialized signature is worthwhile
     if (prefer_specsig)
         return std::make_pair(true, false);
-    if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype))
+    if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type)
         return std::make_pair(true, false);
     if (jl_is_uniontype(rettype)) {
         bool allunbox;
@@ -1822,6 +1786,8 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
         if (nbytes > 0)
             return std::make_pair(true, false); // some elements of the union could be returned unboxed avoiding allocation
     }
+    if (jl_nparams(sig) <= 3) // few parameters == more efficient to pass directly
+        return std::make_pair(true, false);
     bool allSingleton = true;
     for (size_t i = 0; i < jl_nparams(sig); i++) {
         jl_value_t *sigt = jl_tparam(sig, i);
@@ -1839,28 +1805,12 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
 
 // Logging for code coverage and memory allocation
 
-const int logdata_blocksize = 32; // target getting nearby lines in the same general cache area and reducing calls to malloc by chunking
-typedef uint64_t logdata_block[logdata_blocksize];
-typedef StringMap< std::vector<logdata_block*> > logdata_t;
+JL_DLLEXPORT void jl_coverage_alloc_line(StringRef filename, int line);
+JL_DLLEXPORT uint64_t *jl_coverage_data_pointer(StringRef filename, int line);
+JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line);
 
-static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
+static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const char *name)
 {
-    unsigned block = line / logdata_blocksize;
-    line = line % logdata_blocksize;
-    if (vec.size() <= block)
-        vec.resize(block + 1);
-    if (vec[block] == NULL) {
-        vec[block] = (logdata_block*)calloc(1, sizeof(logdata_block));
-    }
-    logdata_block &data = *vec[block];
-    if (data[line] == 0)
-        data[line] = 1;
-    return &data[line];
-}
-
-static void visitLine(jl_codectx_t &ctx, std::vector<logdata_block*> &vec, int line, Value *addend, const char* name)
-{
-    uint64_t *ptr = allocLine(vec, line);
     Value *pv = ConstantExpr::getIntToPtr(
         ConstantInt::get(T_size, (uintptr_t)ptr),
         T_pint64);
@@ -1872,28 +1822,16 @@ static void visitLine(jl_codectx_t &ctx, std::vector<logdata_block*> &vec, int l
 
 // Code coverage
 
-static logdata_t coverageData;
-
 static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 {
     assert(!imaging_mode);
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
-    visitLine(ctx, coverageData[filename], line, ConstantInt::get(T_int64, 1), "lcnt");
-}
-
-static void coverageAllocLine(StringRef filename, int line)
-{
-    assert(!imaging_mode);
-    if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
-        return;
-    allocLine(coverageData[filename], line);
+    visitLine(ctx, jl_coverage_data_pointer(filename, line), ConstantInt::get(T_int64, 1), "lcnt");
 }
 
 // Memory allocation log (malloc_log)
 
-static logdata_t mallocData;
-
 static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Value *sync)
 {
     assert(!imaging_mode);
@@ -1902,143 +1840,7 @@ static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Val
     Value *addend = sync
         ? ctx.builder.CreateCall(prepare_call(sync_gc_total_bytes_func), {sync})
         : ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
-    visitLine(ctx, mallocData[filename], line, addend, "bytecnt");
-}
-
-// Resets the malloc counts.
-extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
-{
-    logdata_t::iterator it = mallocData.begin();
-    for (; it != mallocData.end(); it++) {
-        std::vector<logdata_block*> &bytes = (*it).second;
-        std::vector<logdata_block*>::iterator itb;
-        for (itb = bytes.begin(); itb != bytes.end(); itb++) {
-            if (*itb) {
-                logdata_block &data = **itb;
-                for (int i = 0; i < logdata_blocksize; i++) {
-                    if (data[i] > 0)
-                        data[i] = 1;
-                }
-            }
-        }
-    }
-    jl_gc_sync_total_bytes(0);
-}
-
-static void write_log_data(logdata_t &logData, const char *extension)
-{
-    std::string base = std::string(jl_options.julia_bindir);
-    base = base + "/../share/julia/base/";
-    logdata_t::iterator it = logData.begin();
-    for (; it != logData.end(); it++) {
-        std::string filename(it->first());
-        std::vector<logdata_block*> &values = it->second;
-        if (!values.empty()) {
-            if (!isabspath(filename.c_str()))
-                filename = base + filename;
-            std::ifstream inf(filename.c_str());
-            if (!inf.is_open())
-                continue;
-            std::string outfile = filename + extension;
-            std::ofstream outf(outfile.c_str(), std::ofstream::trunc | std::ofstream::out | std::ofstream::binary);
-            if (outf.is_open()) {
-                inf.exceptions(std::ifstream::badbit);
-                outf.exceptions(std::ifstream::failbit | std::ifstream::badbit);
-                char line[1024];
-                int l = 1;
-                unsigned block = 0;
-                while (!inf.eof()) {
-                    inf.getline(line, sizeof(line));
-                    if (inf.fail()) {
-                        if (inf.eof())
-                            break; // no content on trailing line
-                        // Read through lines longer than sizeof(line)
-                        inf.clear();
-                        inf.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
-                    }
-                    logdata_block *data = NULL;
-                    if (block < values.size()) {
-                        data = values[block];
-                    }
-                    uint64_t value = data ? (*data)[l] : 0;
-                    if (++l >= logdata_blocksize) {
-                        l = 0;
-                        block++;
-                    }
-                    outf.width(9);
-                    if (value == 0)
-                        outf << '-';
-                    else
-                        outf << (value - 1);
-                    outf.width(0);
-                    outf << " " << line << '\n';
-                }
-                outf.close();
-            }
-            inf.close();
-        }
-    }
-}
-
-static void write_lcov_data(logdata_t &logData, const std::string &outfile)
-{
-    std::ofstream outf(outfile.c_str(), std::ofstream::ate | std::ofstream::out | std::ofstream::binary);
-    //std::string base = std::string(jl_options.julia_bindir);
-    //base = base + "/../share/julia/base/";
-    logdata_t::iterator it = logData.begin();
-    for (; it != logData.end(); it++) {
-        StringRef filename = it->first();
-        const std::vector<logdata_block*> &values = it->second;
-        if (!values.empty()) {
-            outf << "SF:" << filename.str() << '\n';
-            size_t n_covered = 0;
-            size_t n_instrumented = 0;
-            size_t lno = 0;
-            for (auto &itv : values) {
-                if (itv) {
-                    logdata_block &data = *itv;
-                    for (int i = 0; i < logdata_blocksize; i++) {
-                        auto cov = data[i];
-                        if (cov > 0) {
-                            n_instrumented++;
-                            if (cov > 1)
-                                n_covered++;
-                            outf << "DA:" << lno << ',' << (cov - 1) << '\n';
-                        }
-                        lno++;
-                    }
-                }
-                else {
-                    lno += logdata_blocksize;
-                }
-            }
-            outf << "LH:" << n_covered << '\n';
-            outf << "LF:" << n_instrumented << '\n';
-            outf << "end_of_record\n";
-        }
-    }
-    outf.close();
-}
-
-extern "C" void jl_write_coverage_data(const char *output)
-{
-    if (output) {
-        StringRef output_pattern(output);
-        if (output_pattern.endswith(".info"))
-            write_lcov_data(coverageData, jl_format_filename(output_pattern));
-    }
-    else {
-        std::string stm;
-        raw_string_ostream(stm) << "." << jl_getpid() << ".cov";
-        write_log_data(coverageData, stm.c_str());
-    }
-}
-
-extern "C" void jl_write_malloc_log(void)
-{
-    std::string stm;
-    raw_string_ostream(stm) << "." << jl_getpid() << ".mem";
-    write_log_data(mallocData, stm.c_str());
+    visitLine(ctx, jl_malloc_data_pointer(filename, line), addend, "bytecnt");
 }
 
 // --- constant determination ---
@@ -2059,6 +1861,7 @@ static void cg_bdw(jl_codectx_t &ctx, jl_binding_t *b)
 
 static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, size_t nargs)
 {
+    assert(nargs > 1);
     jl_value_t **v = (jl_value_t**)alloca(sizeof(jl_value_t*) * nargs);
     for (size_t i = 0; i < nargs; i++) {
         if (!args[i].constant)
@@ -2118,7 +1921,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     }
     if (jl_is_expr(ex)) {
         jl_expr_t *e = (jl_expr_t*)ex;
-        if (e->head == call_sym) {
+        if (e->head == jl_call_sym) {
             jl_value_t *f = static_eval(ctx, jl_exprarg(e, 0));
             if (f) {
                 if (jl_array_dim0(e->args) == 3 && f == jl_builtin_getfield) {
@@ -2168,7 +1971,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                 }
             }
         }
-        else if (e->head == static_parameter_sym) {
+        else if (e->head == jl_static_parameter_sym) {
             size_t idx = jl_unbox_long(jl_exprarg(e, 0));
             if (idx <= jl_svec_len(ctx.linfo->sparam_vals)) {
                 jl_value_t *e = jl_svecref(ctx.linfo->sparam_vals, idx - 1);
@@ -2225,7 +2028,7 @@ static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
     for(int i=s; i <= l; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts,i);
         if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == assign_sym) {
+            if (((jl_expr_t*)st)->head == jl_assign_sym) {
                 jl_value_t *ar = jl_exprarg(st, 0);
                 if (jl_is_slot(ar)) {
                     av.insert(jl_slot_number(ar)-1);
@@ -2242,7 +2045,7 @@ static void mark_volatile_vars(jl_array_t *stmts, std::vector<jl_varinfo_t> &slo
     for (int i = 0; i < (int)slength; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
         if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == enter_sym) {
+            if (((jl_expr_t*)st)->head == jl_enter_sym) {
                 int last = jl_unbox_long(jl_exprarg(st, 0));
                 std::set<int> as = assigned_in_try(stmts, i + 1, last);
                 for (int j = 0; j < (int)slength; j++) {
@@ -2273,14 +2076,14 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
     }
     else if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        if (e->head == method_sym) {
+        if (e->head == jl_method_sym) {
             simple_use_analysis(ctx, jl_exprarg(e, 0));
             if (jl_expr_nargs(e) > 1) {
                 simple_use_analysis(ctx, jl_exprarg(e, 1));
                 simple_use_analysis(ctx, jl_exprarg(e, 2));
             }
         }
-        else if (e->head == assign_sym) {
+        else if (e->head == jl_assign_sym) {
             // don't consider assignment LHS as a variable "use"
             simple_use_analysis(ctx, jl_exprarg(e, 1));
         }
@@ -2376,25 +2179,18 @@ static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const
                                Value *nullcheck1, Value *nullcheck2)
 {
     if (jl_pointer_egal(arg1.typ) || jl_pointer_egal(arg2.typ)) {
-        assert((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant) &&
-                "Expected unboxed cases to be handled earlier");
-        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.V;
-        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.V;
-        varg1 = maybe_decay_tracked(ctx, varg1);
-        varg2 = maybe_decay_tracked(ctx, varg2);
-        if (cast<PointerType>(varg1->getType())->getAddressSpace() != cast<PointerType>(varg2->getType())->getAddressSpace()) {
-            varg1 = decay_derived(ctx, varg1);
-            varg2 = decay_derived(ctx, varg2);
-        }
-        return ctx.builder.CreateICmpEQ(emit_bitcast(ctx, varg1, T_pint8),
-                                        emit_bitcast(ctx, varg2, T_pint8));
+        // if we can be certain we won't try to load from the pointer (because
+        // we know boxed is trivial), we can skip the separate null checks
+        // and just do the ICmpEQ test
+        if (!arg1.TIndex && !arg2.TIndex)
+            nullcheck1 = nullcheck2 = nullptr;
     }
-
     return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] {
-        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, value_to_pointer(ctx, arg1).V, T_pjlvalue);
-        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, value_to_pointer(ctx, arg2).V, T_pjlvalue);
-        varg1 = decay_derived(ctx, varg1);
-        varg2 = decay_derived(ctx, varg2);
+        Value *varg1 = decay_derived(ctx, boxed(ctx, arg1));
+        Value *varg2 = decay_derived(ctx, boxed(ctx, arg2));
+        if (jl_pointer_egal(arg1.typ) || jl_pointer_egal(arg2.typ)) {
+            return ctx.builder.CreateICmpEQ(varg1, varg2);
+        }
         Value *neq = ctx.builder.CreateICmpNE(varg1, varg2);
         return emit_guarded_test(ctx, neq, true, [&] {
             Value *dtarg = emit_typeof_boxed(ctx, arg1);
@@ -2632,27 +2428,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
         });
     }
 
-    // If either sides is boxed or can be trivially boxed,
-    // we'll prefer to do a pointer check.
-    // At this point, we know that at least one of the arguments isn't a constant
-    // so a runtime content check will involve at least one load from the
-    // pointer (and likely a type check)
-    // so a pointer comparison should be no worse than that even in imaging mode
-    // when the constant pointer has to be loaded.
-    // Note that we ignore nullcheck, since in the case where it may be set, we
-    // also knew the types of both fields must be the same so there cannot be
-    // any unboxed values on either side.
-    if (jl_pointer_egal(rt1) || jl_pointer_egal(rt2)) {
-        // n.b. Vboxed == isboxed || Tindex
-        if (!(arg1.Vboxed || arg1.constant) || !(arg2.Vboxed || arg2.constant))
-            return ConstantInt::get(T_int1, 0);
-        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, T_pjlvalue);
-        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, T_pjlvalue);
-        return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
-    }
-
-    // TODO: handle the case where arg1.typ != arg2.typ, or when one of these isn't union,
-    //       or when the union can be pointer
+    // TODO: handle the case where arg1.typ is not exactly arg2.typ, or when
+    // one of these isn't union, or when the union can be pointer
     if (arg1.TIndex && arg2.TIndex && jl_egal(arg1.typ, arg2.typ) &&
         jl_is_uniontype(arg1.typ) && is_uniontype_allunboxed(arg1.typ))
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] {
@@ -2662,6 +2439,102 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     return emit_box_compare(ctx, arg1, arg2, nullcheck1, nullcheck2);
 }
 
+static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
+                           const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+{
+    bool issetfield = f == jl_builtin_setfield;
+    bool isreplacefield = f == jl_builtin_replacefield;
+    bool isswapfield = f == jl_builtin_swapfield;
+    bool ismodifyfield = f == jl_builtin_modifyfield;
+    const jl_cgval_t undefval;
+    const jl_cgval_t &obj = argv[1];
+    const jl_cgval_t &fld = argv[2];
+    jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
+    const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
+    if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
+        const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if (isreplacefield && nargs == 6) {
+        const jl_cgval_t &ord = argv[6];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
+    }
+
+    jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
+    if (jl_is_datatype(uty) && jl_struct_try_layout(uty)) {
+        ssize_t idx = -1;
+        if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
+            idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
+        }
+        else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
+            ssize_t i = jl_unbox_long(fld.constant);
+            if (i > 0 && i <= jl_datatype_nfields(uty))
+                idx = i - 1;
+        }
+        if (idx != -1) {
+            jl_value_t *ft = jl_field_type(uty, idx);
+            if (!jl_has_free_typevars(ft)) {
+                if (!ismodifyfield && !jl_subtype(val.typ, ft)) {
+                    emit_typecheck(ctx, val, ft, fname);
+                    val = update_julia_type(ctx, val, ft);
+                }
+                // TODO: attempt better codegen for approximate types
+                bool isboxed = jl_field_isptr(uty, idx);
+                bool isatomic = jl_field_isatomic(uty, idx);
+                bool needlock = isatomic && !isboxed && jl_datatype_size(jl_field_type(uty, idx)) > MAX_ATOMIC_SIZE;
+                if (isatomic == (order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            issetfield ?
+                            (isatomic ? "setfield!: atomic field cannot be written non-atomically"
+                                      : "setfield!: non-atomic field cannot be written atomically") :
+                            isreplacefield ?
+                            (isatomic ? "replacefield!: atomic field cannot be written non-atomically"
+                                      : "replacefield!: non-atomic field cannot be written atomically") :
+                            isswapfield ?
+                            (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
+                                      : "swapfield!: non-atomic field cannot be written atomically") :
+                            (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
+                                      : "modifyfield!: non-atomic field cannot be written atomically"));
+                    *ret = jl_cgval_t();
+                    return true;
+                }
+                if (isatomic == (fail_order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
+                                      : "replacefield!: non-atomic field cannot be accessed atomically"));
+                    *ret = jl_cgval_t();
+                    return true;
+                }
+                *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true, true,
+                        (needlock || order <= jl_memory_order_notatomic)
+                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                        : get_llvm_atomic_order(order),
+                        (needlock || fail_order <= jl_memory_order_notatomic)
+                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                        : get_llvm_atomic_order(fail_order),
+                        needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
+                        modifyop, fname);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     emit_function(
         jl_method_instance_t *lam,
@@ -2843,28 +2716,28 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     *ret = ghostValue(ety);
                 }
                 else if (!isboxed && jl_is_uniontype(ety)) {
-                    Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
-                    Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                    // isbits union selector bytes are stored after a->maxsize
-                    Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
-                    Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
+                    Value *data = emit_arrayptr(ctx, ary, ary_ex);
                     Value *offset = emit_arrayoffset(ctx, ary, nd);
-                    Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
-                    Value *selidx_m = emit_arraylen(ctx, ary);
-                    Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                    Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
+                    Value *ptindex;
+                    if (elsz == 0) {
+                        ptindex = data;
+                    }
+                    else {
+                        Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
+                        data = emit_bitcast(ctx, data, AT->getPointerTo());
+                        // isbits union selector bytes are stored after a->maxsize
+                        Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
+                        Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
+                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
+                        Value *selidx_m = emit_arraylen(ctx, ary);
+                        Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
+                        ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
+                        data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
+                    }
                     ptindex = emit_bitcast(ctx, ptindex, T_pint8);
                     ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
                     ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
-                    Instruction *tindex = tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, Align(1)));
-                    tindex->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
-                        ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
-                        ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) }));
-                    AllocaInst *lv = emit_static_alloca(ctx, AT);
-                    if (al > 1)
-                        lv->setAlignment(Align(al));
-                    emit_memcpy(ctx, lv, tbaa_arraybuf, ctx.builder.CreateInBoundsGEP(AT, data, idx), tbaa_arraybuf, elsz, al, false);
-                    *ret = mark_julia_slot(lv, ety, ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), tindex), tbaa_arraybuf);
+                    *ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, tbaa_arraybuf, true, union_max, tbaa_arrayselbyte);
                 }
                 else {
                     MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.aliasscope : nullptr;
@@ -2883,7 +2756,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
     else if (f == jl_builtin_arrayset && nargs >= 4) {
         const jl_cgval_t &ary = argv[2];
-        const jl_cgval_t &val = argv[3];
+        jl_cgval_t val = argv[3];
         bool indices_ok = true;
         for (size_t i = 4; i <= nargs; i++) {
             if (argv[i].typ != (jl_value_t*)jl_long_type) {
@@ -2896,101 +2769,110 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             jl_value_t *ety = jl_tparam0(aty_dt);
             jl_value_t *ndp = jl_tparam1(aty_dt);
             if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 4)) {
-                if (jl_subtype(val.typ, ety)) { // TODO: probably should just convert this to a type-assert
-                    size_t elsz = 0, al = 0;
-                    int union_max = jl_islayout_inline(ety, &elsz, &al);
-                    bool isboxed = (union_max == 0);
-                    if (isboxed)
-                        ety = (jl_value_t*)jl_any_type;
-                    jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                    ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                    jl_value_t *boundscheck = argv[1].constant;
-                    emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
-                    Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
-                    if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                        // no-op
+                if (!jl_subtype(val.typ, ety)) {
+                    emit_typecheck(ctx, val, ety, "arrayset");
+                    val = update_julia_type(ctx, val, ety);
+                }
+                size_t elsz = 0, al = 0;
+                int union_max = jl_islayout_inline(ety, &elsz, &al);
+                bool isboxed = (union_max == 0);
+                if (isboxed)
+                    ety = (jl_value_t*)jl_any_type;
+                jl_value_t *ary_ex = jl_exprarg(ex, 2);
+                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
+                jl_value_t *boundscheck = argv[1].constant;
+                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
+                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
+                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
+                    // no-op
+                }
+                else {
+                    PHINode *data_owner = NULL; // owner object against which the write barrier must check
+                    if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
+                        Value *aryv = boxed(ctx, ary);
+                        Value *flags = emit_arrayflags(ctx, ary);
+                        // the owner of the data is ary itself except if ary->how == 3
+                        flags = ctx.builder.CreateAnd(flags, 3);
+                        Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
+                        BasicBlock *curBB = ctx.builder.GetInsertBlock();
+                        BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
+                        BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
+                        ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
+                        ctx.builder.SetInsertPoint(ownedBB);
+                        // load owner pointer
+                        Instruction *own_ptr;
+                        if (jl_is_long(ndp)) {
+                            own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue,
+                                    ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue,
+                                        emit_bitcast(ctx, decay_derived(ctx, aryv), T_pprjlvalue),
+                                        jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
+                                    Align(sizeof(void*)));
+                            tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
+                        }
+                        else {
+                            own_ptr = ctx.builder.CreateCall(
+                                prepare_call(jlarray_data_owner_func),
+                                {aryv});
+                        }
+                        ctx.builder.CreateBr(mergeBB);
+                        ctx.builder.SetInsertPoint(mergeBB);
+                        data_owner = ctx.builder.CreatePHI(T_prjlvalue, 2);
+                        data_owner->addIncoming(aryv, curBB);
+                        data_owner->addIncoming(own_ptr, ownedBB);
                     }
-                    else {
-                        PHINode *data_owner = NULL; // owner object against which the write barrier must check
-                        if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
-                            Value *aryv = boxed(ctx, ary);
-                            Value *flags = emit_arrayflags(ctx, ary);
-                            // the owner of the data is ary itself except if ary->how == 3
-                            flags = ctx.builder.CreateAnd(flags, 3);
-                            Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
-                            BasicBlock *curBB = ctx.builder.GetInsertBlock();
-                            BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
-                            BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
-                            ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
-                            ctx.builder.SetInsertPoint(ownedBB);
-                            // load owner pointer
-                            Instruction *own_ptr;
-                            if (jl_is_long(ndp)) {
-                                own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue,
-                                        ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue,
-                                            emit_bitcast(ctx, decay_derived(ctx, aryv), T_pprjlvalue),
-                                            jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
-                                        Align(sizeof(void*)));
-                                tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
-                            }
-                            else {
-                                own_ptr = ctx.builder.CreateCall(
-                                    prepare_call(jlarray_data_owner_func),
-                                    {aryv});
-                            }
-                            ctx.builder.CreateBr(mergeBB);
-                            ctx.builder.SetInsertPoint(mergeBB);
-                            data_owner = ctx.builder.CreatePHI(T_prjlvalue, 2);
-                            data_owner->addIncoming(aryv, curBB);
-                            data_owner->addIncoming(own_ptr, ownedBB);
+                    if (!isboxed && jl_is_uniontype(ety)) {
+                        Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
+                        Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
+                        Value *offset = emit_arrayoffset(ctx, ary, nd);
+                        // compute tindex from val
+                        jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
+                        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
+                        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
+                        Value *ptindex;
+                        if (elsz == 0) {
+                            ptindex = data;
                         }
-                        if (!isboxed && jl_is_uniontype(ety)) {
-                            Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
-                            Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                            // compute tindex from val
-                            jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
-                            Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
-                            tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
+                        else {
                             Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
                             Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
-                            Value *offset = emit_arrayoffset(ctx, ary, nd);
                             Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
                             Value *selidx_m = emit_arraylen(ctx, ary);
                             Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                            Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                            ptindex = emit_bitcast(ctx, ptindex, T_pint8);
-                            ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
-                            ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
-                            tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
-                            if (jl_is_datatype(val.typ) && jl_datatype_size(val.typ) == 0) {
-                                // no-op
-                            }
-                            else {
-                                // copy data
-                                Value *addr = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                                emit_unionmove(ctx, addr, tbaa_arraybuf, val, nullptr);
-                            }
+                            ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
+                            data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
                         }
-                        else {
-                            typed_store(ctx,
-                                        emit_arrayptr(ctx, ary, ary_ex, isboxed),
-                                        idx, val, jl_cgval_t(), ety,
-                                        isboxed ? tbaa_ptrarraybuf : tbaa_arraybuf,
-                                        ctx.aliasscope,
-                                        data_owner,
-                                        isboxed,
-                                        isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                        isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                        0,
-                                        false,
-                                        true,
-                                        false,
-                                        false);
+                        ptindex = emit_bitcast(ctx, ptindex, T_pint8);
+                        ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
+                        ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
+                        tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
+                        if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) {
+                            // copy data (if any)
+                            emit_unionmove(ctx, data, tbaa_arraybuf, val, nullptr);
                         }
                     }
-                    *ret = ary;
-                    return true;
+                    else {
+                        typed_store(ctx,
+                                    emit_arrayptr(ctx, ary, ary_ex, isboxed),
+                                    idx, val, jl_cgval_t(), ety,
+                                    isboxed ? tbaa_ptrarraybuf : tbaa_arraybuf,
+                                    ctx.aliasscope,
+                                    data_owner,
+                                    isboxed,
+                                    isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                    isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                    0,
+                                    false,
+                                    true,
+                                    false,
+                                    false,
+                                    false,
+                                    false,
+                                    nullptr,
+                                    "");
+                    }
                 }
+                *ret = ary;
+                return true;
             }
         }
     }
@@ -3128,87 +3010,9 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
     else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
              (f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
-             (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6))) {
-        bool issetfield = f == jl_builtin_setfield;
-        bool isreplacefield = f == jl_builtin_replacefield;
-        const jl_cgval_t undefval;
-        const jl_cgval_t &obj = argv[1];
-        const jl_cgval_t &fld = argv[2];
-        const jl_cgval_t &val = argv[isreplacefield ? 4 : 3];
-        const jl_cgval_t &cmp = isreplacefield ? argv[3] : undefval;
-        enum jl_memory_order order = jl_memory_order_notatomic;
-        if (nargs >= (isreplacefield ? 5 : 4)) {
-            const jl_cgval_t &ord = argv[isreplacefield ? 5 : 4];
-            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type,
-                    issetfield ? "setfield!" : isreplacefield ? "replacefield!" : "swapfield!");
-            if (!ord.constant)
-                return false;
-            order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
-        }
-        enum jl_memory_order fail_order = order;
-        if (isreplacefield && nargs == 6) {
-            const jl_cgval_t &ord = argv[6];
-            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "replacefield!");
-            if (!ord.constant)
-                return false;
-            fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
-        }
-        if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
-            emit_atomic_error(ctx, "invalid atomic ordering");
-            *ret = jl_cgval_t(); // unreachable
-            return true;
-        }
-
-        jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-        if (jl_is_datatype(uty) && jl_struct_try_layout(uty)) {
-            ssize_t idx = -1;
-            if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
-                idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
-            }
-            else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
-                ssize_t i = jl_unbox_long(fld.constant);
-                if (i > 0 && i <= jl_datatype_nfields(uty))
-                    idx = i - 1;
-            }
-            if (idx != -1) {
-                jl_value_t *ft = jl_svecref(uty->types, idx);
-                if (!jl_has_free_typevars(ft) && jl_subtype(val.typ, ft)) {
-                    // TODO: attempt better codegen for approximate types
-                    bool isboxed = jl_field_isptr(uty, idx);
-                    bool isatomic = jl_field_isatomic(uty, idx);
-                    bool needlock = isatomic && !isboxed && jl_datatype_size(jl_field_type(uty, idx)) > MAX_ATOMIC_SIZE;
-                    if (isatomic == (order == jl_memory_order_notatomic)) {
-                        emit_atomic_error(ctx,
-                                issetfield ?
-                                (isatomic ? "setfield!: atomic field cannot be written non-atomically"
-                                          : "setfield!: non-atomic field cannot be written atomically") :
-                                isreplacefield ?
-                                (isatomic ? "replacefield!: atomic field cannot be written non-atomically"
-                                          : "replacefield!: non-atomic field cannot be written atomically") :
-                                (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
-                                          : "swapfield!: non-atomic field cannot be written atomically"));
-                        *ret = jl_cgval_t();
-                        return true;
-                    }
-                    if (isatomic == (fail_order == jl_memory_order_notatomic)) {
-                        emit_atomic_error(ctx,
-                                (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
-                                          : "replacefield!: non-atomic field cannot be accessed atomically"));
-                        *ret = jl_cgval_t();
-                        return true;
-                    }
-                    *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true, true,
-                            (needlock || order <= jl_memory_order_notatomic)
-                            ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                            : get_llvm_atomic_order(order),
-                            (needlock || fail_order <= jl_memory_order_notatomic)
-                            ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                            : get_llvm_atomic_order(fail_order),
-                            needlock, issetfield, isreplacefield);
-                    return true;
-                }
-            }
-        }
+             (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6)) ||
+             (f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
+        return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr);
     }
 
     else if (f == jl_builtin_nfields && nargs == 1) {
@@ -3435,7 +3239,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
 // Returns T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
     // emit arguments
     SmallVector<Value*, 3> theArgs;
@@ -3459,14 +3263,14 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
 }
 // Returns T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, cc);
 }
 
 
 static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
 {
     // emit specialized call site
     bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
@@ -3546,7 +3350,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     jl_cgval_t retval;
     switch (returninfo.cc) {
         case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, inferred_retty);
+            retval = mark_julia_type(ctx, call, true, jlretty);
             break;
         case jl_returninfo_t::Register:
             retval = mark_julia_type(ctx, call, false, jlretty);
@@ -3576,20 +3380,18 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             break;
     }
     // see if inference has a different / better type for the call than the lambda
-    if (inferred_retty != retval.typ)
-        retval = update_julia_type(ctx, retval, inferred_retty);
-    return retval;
+    return update_julia_type(ctx, retval, inferred_retty);
 }
 
-static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject,
+                                          const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
 {
     auto theFptr = cast<Function>(
         jl_Module->getOrInsertFunction(specFunctionObject, jl_func_sig).getCallee());
     add_return_attr(theFptr, Attribute::NonNull);
     theFptr->addFnAttr(Thunk);
     Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, JLCALL_F_CC);
-    return mark_julia_type(ctx, ret, true, inferred_retty);
+    return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
@@ -3606,7 +3408,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
+    return emit_invoke(ctx, lival, argv, nargs, rt);
+}
 
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
+{
     bool handled = false;
     jl_cgval_t result;
     if (lival.constant) {
@@ -3618,7 +3424,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
             FunctionType *ft = ctx.f->getFunctionType();
             StringRef protoname = ctx.f->getName();
             if (ft == jl_func_sig) {
-                result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, argv, nargs, rt);
                 handled = true;
             }
             else if (ft != jl_func_sig_sparams) {
@@ -3630,12 +3436,14 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
         else {
             jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
             jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            if (ci != jl_nothing && codeinst->invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
-                if (codeinst->invoke == jl_fptr_const_return) {
+            if (ci != jl_nothing) {
+                auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                 // check if we know how to handle this specptr
+                if (invoke == jl_fptr_const_return_addr) {
                     result = mark_julia_const(codeinst->rettype_const);
                     handled = true;
                 }
-                else {
+                else if (invoke != jl_fptr_sparam_addr) {
                     bool specsig, needsparams;
                     std::tie(specsig, needsparams) = uses_specsig(mi, codeinst->rettype, ctx.params->prefer_specsig);
                     std::string name;
@@ -3644,9 +3452,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
                     if (ctx.use_cache) {
                         // optimization: emit the correct name immediately, if we know it
                         // TODO: use `emitted` map here too to try to consolidate names?
-                        if (codeinst->specptr.fptr) {
-                            if (specsig ? codeinst->isspecsig : codeinst->invoke == jl_fptr_args) {
-                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+                        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+                        if (fptr) {
+                            if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args_addr) {
+                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                                 need_to_emit = false;
                             }
                         }
@@ -3660,7 +3470,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
                     if (specsig)
                         result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, argv, nargs, &cc, &return_roots, rt);
                     else
-                        result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt);
                     handled = true;
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
@@ -3679,6 +3489,40 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
     return result;
 }
 
+static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
+{
+    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    size_t arglen = jl_array_dim0(ex->args);
+    size_t nargs = arglen - 1;
+    assert(arglen >= 2);
+    jl_cgval_t lival = emit_expr(ctx, args[0]);
+    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    for (size_t i = 0; i < nargs; ++i) {
+        argv[i] = emit_expr(ctx, args[i + 1]);
+        if (argv[i].typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
+    const jl_cgval_t &f = argv[0];
+    jl_cgval_t ret;
+    if (f.constant && f.constant == jl_builtin_modifyfield) {
+        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv, nargs - 1, &lival))
+            return ret;
+        auto it = builtin_func_map.find(jl_f_modifyfield_addr);
+        assert(it != builtin_func_map.end());
+        Value *oldnew = emit_jlcall(ctx, it->second, V_rnull, &argv[1], nargs - 1, JLCALL_F_CC);
+        return mark_julia_type(ctx, oldnew, true, rt);
+    }
+    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+        JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
+        if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
+            return emit_atomic_pointerop(ctx, fi, argv, nargs - 1, &lival);
+    }
+
+    // emit function and arguments
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, JLCALL_F_CC);
+    return mark_julia_type(ctx, callval, true, rt);
+}
+
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
@@ -3883,7 +3727,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
         }
     }
     else if (jl_is_expr(sym)) {
-        assert(((jl_expr_t*)sym)->head == static_parameter_sym && "malformed isdefined expression");
+        assert(((jl_expr_t*)sym)->head == jl_static_parameter_sym && "malformed isdefined expression");
         size_t i = jl_unbox_long(jl_exprarg(sym, 0)) - 1;
         if (jl_svec_len(ctx.linfo->sparam_vals) > 0) {
             jl_value_t *e = jl_svecref(ctx.linfo->sparam_vals, i);
@@ -4400,18 +4244,18 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     jl_expr_t *ex = (jl_expr_t*)expr;
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
     jl_sym_t *head = ex->head;
-    if (head == meta_sym || head == inbounds_sym || head == coverageeffect_sym
-            || head == aliasscope_sym || head == popaliasscope_sym) {
+    if (head == jl_meta_sym || head == jl_inbounds_sym || head == jl_coverageeffect_sym
+            || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         // some expression types are metadata and can be ignored
         // in statement position
         return;
     }
-    else if (head == leave_sym) {
+    else if (head == jl_leave_sym) {
         assert(jl_is_long(args[0]));
         ctx.builder.CreateCall(prepare_call(jlleave_func),
                            ConstantInt::get(T_int32, jl_unbox_long(args[0])));
     }
-    else if (head == pop_exception_sym) {
+    else if (head == jl_pop_exception_sym) {
         jl_cgval_t excstack_state = emit_expr(ctx, jl_exprarg(expr, 0));
         assert(excstack_state.V && excstack_state.V->getType() == T_size);
         ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), excstack_state.V);
@@ -4420,8 +4264,8 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     else {
         if (!jl_is_method(ctx.linfo->def.method) && !ctx.is_opaque_closure) {
             // TODO: inference is invalid if this has any effect (which it often does)
-            Value *world = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
-            // TODO: world->setOrdering(AtomicOrdering::Monotonic);
+            LoadInst *world = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
+            world->setOrdering(AtomicOrdering::Acquire);
             ctx.builder.CreateAlignedStore(world, ctx.world_age_field, Align(sizeof(size_t)));
         }
         assert(ssaval_result != -1);
@@ -4500,17 +4344,20 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
 
     jl_expr_t *ex = (jl_expr_t*)expr;
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    size_t nargs = jl_array_len(ex->args);
     jl_sym_t *head = ex->head;
     // this is object-disoriented.
     // however, this is a good way to do it because it should *not* be easy
     // to add new node types.
-    if (head == isdefined_sym) {
+    if (head == jl_isdefined_sym) {
+        assert(nargs == 1);
         return emit_isdefined(ctx, args[0]);
     }
-    else if (head == throw_undef_if_not_sym) {
+    else if (head == jl_throw_undef_if_not_sym) {
+        assert(nargs == 2);
         jl_sym_t *var = (jl_sym_t*)args[0];
         Value *cond = ctx.builder.CreateTrunc(emit_unbox(ctx, T_int8, emit_expr(ctx, args[1]), (jl_value_t*)jl_bool_type), T_int1);
-        if (var == getfield_undefref_sym) {
+        if (var == jl_getfield_undefref_sym) {
             raise_exception_unless(ctx, cond,
                 literal_pointer_val(ctx, jl_undefref_exception));
         }
@@ -4519,13 +4366,19 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         }
         return ghostValue(jl_nothing_type);
     }
-    else if (head == invoke_sym) {
+    else if (head == jl_invoke_sym) {
         assert(ssaval >= 0);
         jl_value_t *expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type :
             jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
         return emit_invoke(ctx, ex, expr_t);
     }
-    else if (head == call_sym) {
+    else if (head == jl_invoke_modify_sym) {
+        assert(ssaval >= 0);
+        jl_value_t *expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type :
+            jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
+        return emit_invoke_modify(ctx, ex, expr_t);
+    }
+    else if (head == jl_call_sym) {
         jl_value_t *expr_t;
         if (ssaval < 0)
             // TODO: this case is needed for the call to emit_expr in emit_llvmcall
@@ -4541,24 +4394,27 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         }
         return res;
     }
-    else if (head == foreigncall_sym) {
+    else if (head == jl_foreigncall_sym) {
         return emit_ccall(ctx, args, jl_array_dim0(ex->args));
     }
-    else if (head == cfunction_sym) {
+    else if (head == jl_cfunction_sym) {
+        assert(nargs == 5);
         jl_cgval_t fexpr_rt = emit_expr(ctx, args[1]);
         return emit_cfunction(ctx, args[0], fexpr_rt, args[2], (jl_svec_t*)args[3]);
     }
-    else if (head == assign_sym) {
+    else if (head == jl_assign_sym) {
+        assert(nargs == 2);
         emit_assignment(ctx, args[0], args[1], ssaval);
         return ghostValue(jl_nothing_type);
     }
-    else if (head == static_parameter_sym) {
+    else if (head == jl_static_parameter_sym) {
+        assert(nargs == 1);
         return emit_sparam(ctx, jl_unbox_long(args[0]) - 1);
     }
-    else if (head == method_sym) {
-        if (jl_expr_nargs(ex) == 1) {
+    else if (head == jl_method_sym) {
+        if (nargs == 1) {
             jl_value_t *mn = args[0];
-            assert(jl_expr_nargs(ex) != 1 || jl_is_symbol(mn) || jl_is_slot(mn));
+            assert(jl_is_symbol(mn) || jl_is_slot(mn));
 
             Value *bp = NULL, *name, *bp_owner = V_null;
             jl_binding_t *bnd = NULL;
@@ -4607,6 +4463,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
             emit_error(ctx, "method: invalid declaration");
             return jl_cgval_t();
         }
+        assert(nargs == 3);
         Value *a1 = boxed(ctx, emit_expr(ctx, args[1]));
         Value *a2 = boxed(ctx, emit_expr(ctx, args[2]));
         Value *mdargs[4] = {
@@ -4622,7 +4479,8 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
             jl_method_type);
         return meth;
     }
-    else if (head == const_sym) {
+    else if (head == jl_const_sym) {
+        assert(nargs == 1);
         jl_sym_t *sym = (jl_sym_t*)args[0];
         jl_module_t *mod = ctx.module;
         if (jl_is_globalref(sym)) {
@@ -4636,8 +4494,8 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
                                literal_pointer_val(ctx, bnd));
         }
     }
-    else if (head == new_sym) {
-        size_t nargs = jl_array_len(ex->args);
+    else if (head == jl_new_sym) {
+        assert(nargs > 0);
         jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
@@ -4654,8 +4512,9 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
     }
-    else if (head == splatnew_sym) {
+    else if (head == jl_splatnew_sym) {
         jl_cgval_t argv[2];
+        assert(nargs == 2);
         argv[0] = emit_expr(ctx, args[0]);
         argv[1] = emit_expr(ctx, args[1]);
         Value *typ = boxed(ctx, argv[0]);
@@ -4665,8 +4524,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
     }
-    else if (head == new_opaque_closure_sym) {
-        size_t nargs = jl_array_len(ex->args);
+    else if (head == jl_new_opaque_closure_sym) {
         assert(nargs >= 5 && "Not enough arguments in new_opaque_closure");
         SmallVector<jl_cgval_t, 5> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
@@ -4805,12 +4663,14 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
                 emit_jlcall(ctx, jl_new_opaque_closure_jlcall_func, V_rnull, argv.data(), nargs, JLCALL_F_CC),
                 true, jl_any_type);
     }
-    else if (head == exc_sym) {
+    else if (head == jl_exc_sym) {
+        assert(nargs == 0);
         return mark_julia_type(ctx,
                 ctx.builder.CreateCall(prepare_call(jl_current_exception_func)),
                 true, jl_any_type);
     }
-    else if (head == copyast_sym) {
+    else if (head == jl_copyast_sym) {
+        assert(nargs == 1);
         jl_cgval_t ast = emit_expr(ctx, args[0]);
         if (ast.typ != (jl_value_t*)jl_expr_type && ast.typ != (jl_value_t*)jl_any_type) {
             // elide call to jl_copy_ast when possible
@@ -4820,10 +4680,10 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
                 ctx.builder.CreateCall(prepare_call(jlcopyast_func),
                                        boxed(ctx, ast)), true, jl_expr_type);
     }
-    else if (head == loopinfo_sym) {
+    else if (head == jl_loopinfo_sym) {
         // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
         SmallVector<Metadata *, 8> MDs;
-        for (int i = 0, ie = jl_expr_nargs(ex); i < ie; ++i) {
+        for (int i = 0, ie = nargs; i < ie; ++i) {
             Metadata *MD = to_md_tree(args[i]);
             if (MD)
                 MDs.push_back(MD);
@@ -4834,16 +4694,15 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         I->setMetadata("julia.loopinfo", MD);
         return jl_cgval_t();
     }
-    else if (head == leave_sym || head == coverageeffect_sym
-            || head == pop_exception_sym || head == enter_sym || head == inbounds_sym
-            || head == aliasscope_sym || head == popaliasscope_sym) {
+    else if (head == jl_leave_sym || head == jl_coverageeffect_sym
+            || head == jl_pop_exception_sym || head == jl_enter_sym || head == jl_inbounds_sym
+            || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         jl_errorf("Expr(:%s) in value position", jl_symbol_name(head));
     }
-    else if (head == boundscheck_sym) {
+    else if (head == jl_boundscheck_sym) {
         return mark_julia_const(bounds_check_enabled(ctx, jl_true) ? jl_true : jl_false);
     }
-    else if (head == gc_preserve_begin_sym) {
-        size_t nargs = jl_array_len(ex->args);
+    else if (head == jl_gc_preserve_begin_sym) {
         jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
@@ -4867,7 +4726,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         jl_cgval_t tok(token, NULL, false, (jl_value_t*)jl_nothing_type, NULL);
         return tok;
     }
-    else if (head == gc_preserve_end_sym) {
+    else if (head == jl_gc_preserve_end_sym) {
         // We only support ssa values as the argument. Everything else will
         // fall back to the default behavior of preserving the argument value
         // until the end of the scope, which is correct, but not optimal.
@@ -4914,26 +4773,14 @@ static Value *get_current_task(jl_codectx_t &ctx)
     const int ptls_offset = offsetof(jl_task_t, gcstack);
     return ctx.builder.CreateInBoundsGEP(
         T_pjlvalue, emit_bitcast(ctx, ctx.pgcstack, T_ppjlvalue),
-        ConstantInt::get(T_size, -ptls_offset / sizeof(void *)),
+        ConstantInt::get(T_size, -(ptls_offset / sizeof(void *))),
         "current_task");
 }
 
 // Get PTLS through current task.
 static Value *get_current_ptls(jl_codectx_t &ctx)
 {
-    const int ptls_offset = offsetof(jl_task_t, ptls);
-    Value *pptls = ctx.builder.CreateInBoundsGEP(
-        T_pjlvalue, get_current_task(ctx),
-        ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
-        "ptls_field");
-    LoadInst *ptls_load = ctx.builder.CreateAlignedLoad(
-        emit_bitcast(ctx, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
-    // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
-    tbaa_decorate(tbaa_gcframe, ptls_load);
-    // Using `CastInst::Create` to get an `Instruction*` without explicit cast:
-    auto ptls = CastInst::Create(Instruction::BitCast, ptls_load, T_ppjlvalue, "ptls");
-    ctx.builder.Insert(ptls);
-    return ptls;
+    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx));
 }
 
 // Store world age at the entry block of the function. This function should be
@@ -4965,7 +4812,7 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     std::string name;
     raw_string_ostream(name) << "tojlinvoke" << globalUnique++;
     Function *f = Function::Create(jl_func_sig,
-            GlobalVariable::PrivateLinkage,
+            GlobalVariable::InternalLinkage,
             name, M);
     jl_init_function(f);
     f->addFnAttr(Thunk);
@@ -4975,8 +4822,9 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     ctx.builder.SetInsertPoint(b0);
     Function *theFunc;
     Value *theFarg;
-    if (params.cache && codeinst->invoke != NULL) {
-        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->invoke, codeinst);
+    auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    if (params.cache && invoke != NULL) {
+        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
         theFunc = cast<Function>(
             M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(jl_LLVMContext)).getCallee());
         theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
@@ -5110,7 +4958,7 @@ static Function* gen_cfun_wrapper(
     assert(into);
     size_t nargs = sig.nccallargs;
     const char *name = "cfunction";
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     jl_code_instance_t *codeinst = NULL;
     bool nest = (!ff || unionall_env);
     jl_value_t *astrt = (jl_value_t*)jl_any_type;
@@ -5124,11 +4972,11 @@ static Function* gen_cfun_wrapper(
         // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
         codeinst = jl_compile_method_internal(lam, world);
         assert(codeinst->invoke);
-        if (codeinst->invoke == jl_fptr_args) {
+        if (codeinst->invoke == jl_fptr_args_addr) {
             callptr = codeinst->specptr.fptr;
             calltype = 1;
         }
-        else if (codeinst->invoke == jl_fptr_const_return) {
+        else if (codeinst->invoke == jl_fptr_const_return_addr) {
             // don't need the fptr
             callptr = (void*)codeinst->rettype_const;
             calltype = 2;
@@ -5157,9 +5005,50 @@ static Function* gen_cfun_wrapper(
         // add nest parameter (pointer to jl_value_t* data array) after sret arg
         assert(closure_types);
         std::vector<Type*> fargt_sig(sig.fargt_sig);
+
         fargt_sig.insert(fargt_sig.begin() + sig.sret, T_pprjlvalue);
+
+        // Shift LLVM attributes for parameters one to the right, as
+        // we are adding the extra nest parameter after sret arg.
+        std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
+        newAttributes.reserve(attributes.getNumAttrSets() + 1);
+        auto it = attributes.index_begin();
+
+        // Skip past FunctionIndex
+        if (it == AttributeList::AttrIndex::FunctionIndex) {
+            ++it;
+        }
+
+        // Move past ReturnValue and parameter return value
+        for (;it < AttributeList::AttrIndex::FirstArgIndex + sig.sret; ++it) {
+            if (attributes.hasAttributes(it)) {
+                newAttributes.emplace_back(it, attributes.getAttributes(it));
+            }
+        }
+
+        // Add the new nest attribute
+        AttrBuilder attrBuilder;
+        attrBuilder.addAttribute(Attribute::Nest);
+        newAttributes.emplace_back(it, AttributeSet::get(jl_LLVMContext, attrBuilder));
+
+        // Shift forward the rest of the attributes
+        if (attributes.getNumAttrSets() > 0) { // without this check the loop range below is invalid
+            for(;it < attributes.index_end(); ++it) {
+                if (attributes.hasAttributes(it)) {
+                    newAttributes.emplace_back(it + 1, attributes.getAttributes(it));
+                }
+            }
+        }
+
+        // Remember to add back FunctionIndex
+        if (attributes.hasAttributes(AttributeList::AttrIndex::FunctionIndex)) {
+            newAttributes.emplace_back(AttributeList::AttrIndex::FunctionIndex,
+                                       attributes.getAttributes(AttributeList::AttrIndex::FunctionIndex));
+        }
+
+        // Create the new AttributeList
+        attributes = AttributeList::get(jl_LLVMContext, newAttributes);
         functype = FunctionType::get(sig.sret ? T_void : sig.prt, fargt_sig, /*isVa*/false);
-        attributes = attributes.addAttribute(jl_LLVMContext, 1 + sig.sret, Attribute::Nest);
     }
     else {
         functype = sig.functype();
@@ -5190,7 +5079,7 @@ static Function* gen_cfun_wrapper(
     ctx.world_age_field = ctx.builder.CreateSelect(have_tls, ctx.world_age_field, dummy_world);
     Value *last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, Align(sizeof(size_t))));
     Value *world_v = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
-    // TODO: cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Monotonic);
+    cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Acquire);
 
     Value *age_ok = NULL;
     if (calltype) {
@@ -5620,7 +5509,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
     // some sanity checking and check whether there's a vararg
     size_t nargt = jl_svec_len(argt);
     bool isVa = (nargt > 0 && jl_is_vararg(jl_svecref(argt, nargt - 1)));
-    assert(!isVa);
+    assert(!isVa); (void)isVa;
 
     jl_array_t *closure_types = NULL;
     jl_value_t *sigt = NULL; // dispatch-sig = type signature with Ref{} annotations removed and applied to the env
@@ -5691,7 +5580,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         return jl_cgval_t();
     }
 #endif
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
     // try to look up this function for direct invoking
@@ -5784,7 +5673,7 @@ const char *jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t
         function_sig_t sig("cfunction", lcrt, crt, toboxed,
                            argtypes, NULL, false, CallingConv::C, false, &params);
         if (sig.err_msg.empty()) {
-            size_t world = jl_world_counter;
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
             size_t min_valid = 0;
             size_t max_valid = ~(size_t)0;
             if (sysimg_handle) {
@@ -5940,8 +5829,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
 {
     jl_returninfo_t props = {};
     SmallVector<Type*, 8> fsig;
-    Type *rt;
-    Type *srt;
+    Type *rt = NULL;
+    Type *srt = NULL;
     if (jl_is_structtype(jlrettype) && jl_is_datatype_singleton((jl_datatype_t*)jlrettype)) {
         rt = T_void;
         props.cc = jl_returninfo_t::Register;
@@ -6132,7 +6021,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     ctx.nReqArgs = nreq;
     if (va) {
         jl_sym_t *vn = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, ctx.nargs - 1);
-        if (vn != unused_sym)
+        if (vn != jl_unused_sym)
             ctx.vaSlot = ctx.nargs - 1;
     }
     toplevel = !jl_is_method(lam->def.method);
@@ -6195,7 +6084,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         jl_varinfo_t &varinfo = ctx.slots[i];
         varinfo.isArgument = true;
         jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
-        if (argname == unused_sym)
+        if (argname == jl_unused_sym)
             continue;
         jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
         // OpaqueClosure implicitly loads the env
@@ -6313,12 +6202,12 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         f->setDoesNotReturn();
 
 #ifdef USE_POLLY
-    if (!jl_has_meta(stmts, polly_sym) || jl_options.polly == JL_OPTIONS_POLLY_OFF) {
+    if (!jl_has_meta(stmts, jl_polly_sym) || jl_options.polly == JL_OPTIONS_POLLY_OFF) {
         f->addFnAttr(polly::PollySkipFnAttr);
     }
 #endif
 
-    if (jl_has_meta(stmts, noinline_sym)) {
+    if (jl_has_meta(stmts, jl_noinline_sym)) {
         f->addFnAttr(Attribute::NoInline);
     }
 
@@ -6331,7 +6220,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     f->addFnAttr(Attribute::StackProtectStrong);
 #endif
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     // TODO: enable this only when a argument like `-race` is passed to Julia
     //       add a macro for no_sanitize_thread
     f->addFnAttr(llvm::Attribute::SanitizeThread);
@@ -6407,7 +6296,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             // Go over all arguments and local variables and initialize their debug information
             for (i = 0; i < nreq; i++) {
                 jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
-                if (argname == unused_sym)
+                if (argname == jl_unused_sym)
                     continue;
                 jl_varinfo_t &varinfo = ctx.slots[i];
                 varinfo.dinfo = dbuilder.createParameterVariable(
@@ -6435,7 +6324,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             for (i = 0; i < vinfoslen; i++) {
                 jl_sym_t *s = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
                 jl_varinfo_t &varinfo = ctx.slots[i];
-                if (varinfo.isArgument || s == empty_sym || s == unused_sym)
+                if (varinfo.isArgument || s == jl_empty_sym || s == jl_unused_sym)
                     continue;
                 // LLVM 4.0: Assume the variable has default alignment
                 varinfo.dinfo = dbuilder.createAutoVariable(
@@ -6523,8 +6412,11 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed);
             assert(!isboxed);
             assert(!type_is_ghost(vtype) && "constants should already be handled");
-            // CreateAlloca is OK during prologue setup
-            Value *lv = ctx.builder.CreateAlloca(vtype, NULL, jl_symbol_name(s));
+            Value *lv = new AllocaInst(vtype, 0, jl_symbol_name(s), /*InsertBefore*/ctx.pgcstack);
+            if (CountTrackedPointers(vtype).count) {
+                StoreInst *SI = new StoreInst(Constant::getNullValue(vtype), lv, false, Align(sizeof(void*)));
+                SI->insertAfter(ctx.pgcstack);
+            }
             varinfo.value = mark_julia_slot(lv, jt, NULL, tbaa_stack);
             alloc_def_flag(ctx, varinfo);
             if (ctx.debug_enabled && varinfo.dinfo) {
@@ -6564,7 +6456,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     // get pointers for locals stored in the gc frame array (argTemp)
     for (i = 0; i < vinfoslen; i++) {
         jl_sym_t *s = slot_symbol(ctx, i);
-        if (s == unused_sym)
+        if (s == jl_unused_sym)
             continue;
         jl_varinfo_t &varinfo = ctx.slots[i];
         if (!varinfo.used) {
@@ -6636,14 +6528,14 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             jl_nth_slot_type(lam->specTypes, i);
         bool isboxed = deserves_argbox(argType);
         Type *llvmArgType = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, argType);
-        if (s == unused_sym) {
+        if (s == jl_unused_sym) {
             if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
                 ++AI;
             continue;
         }
         jl_varinfo_t &vi = ctx.slots[i];
         jl_cgval_t theArg;
-        if (s == unused_sym || vi.value.constant) {
+        if (s == jl_unused_sym || vi.value.constant) {
             assert(vi.boxroot == NULL);
             if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
                 ++AI;
@@ -6870,13 +6762,13 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
             jl_expr_t *expr = jl_is_expr(stmt) ? (jl_expr_t*)stmt : nullptr;
             if (expr) {
-                if (expr->head == aliasscope_sym) {
+                if (expr->head == jl_aliasscope_sym) {
                     MDNode *scope = mbuilder.createAliasScope("aliasscope", alias_domain);
                     scope_stack.push_back(scope);
                     MDNode *scope_list = MDNode::get(jl_LLVMContext, ArrayRef<Metadata*>(scope_stack));
                     scope_list_stack.push_back(scope_list);
                     current_aliasscope = scope_list;
-                } else if (expr->head == popaliasscope_sym) {
+                } else if (expr->head == jl_popaliasscope_sym) {
                     scope_stack.pop_back();
                     scope_list_stack.pop_back();
                     if (scope_list_stack.empty()) {
@@ -6942,7 +6834,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     };
     std::vector<unsigned> current_lineinfo, new_lineinfo;
     auto coverageVisitStmt = [&] (size_t dbg) {
-        if (dbg == 0)
+        if (dbg == 0 || dbg >= linetable.size())
             return;
         // Compute inlining stack for current line, inner frame first
         while (dbg) {
@@ -6977,7 +6869,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         // record all lines that could be covered
         for (const auto &info : linetable)
             if (do_coverage(info.is_user_code))
-                coverageAllocLine(info.file, info.line);
+                jl_coverage_alloc_line(info.file, info.line);
     }
 
     come_from_bb[0] = ctx.builder.GetInsertBlock();
@@ -7001,7 +6893,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 if (i + 2 <= stmtslen)
                     branch_targets.insert(i + 2);
             } else if (jl_is_expr(stmt)) {
-                if (((jl_expr_t*)stmt)->head == enter_sym) {
+                if (((jl_expr_t*)stmt)->head == jl_enter_sym) {
                     branch_targets.insert(i + 1);
                     if (i + 2 <= stmtslen)
                         branch_targets.insert(i + 2);
@@ -7035,8 +6927,10 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
     { // coverage for the function definition line number
         const auto &topinfo = linetable.at(0);
-        if (topinfo == linetable.at(1))
-            current_lineinfo.push_back(1);
+        if (linetable.size() > 1) {
+            if (topinfo == linetable.at(1))
+                current_lineinfo.push_back(1);
+        }
         if (do_coverage(topinfo.is_user_code))
             coverageVisitLine(ctx, topinfo.file, topinfo.line);
     }
@@ -7188,7 +7082,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             find_next_stmt(cursor + 1);
             continue;
         }
-        else if (expr && expr->head == enter_sym) {
+        else if (expr && expr->head == jl_enter_sym) {
             jl_value_t **args = (jl_value_t**)jl_array_data(expr->args);
 
             assert(jl_is_long(args[0]));
@@ -7271,7 +7165,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
 #endif
                 continue;
             }
-            assert(find(pred_begin(PhiBB), pred_end(PhiBB), FromBB) != pred_end(PhiBB)); // consistency check
+            assert(std::find(pred_begin(PhiBB), pred_end(PhiBB), FromBB) != pred_end(PhiBB)); // consistency check
             TerminatorInst *terminator = FromBB->getTerminator();
             if (!terminator->getParent()->getUniqueSuccessor()) {
                 // Can't use `llvm::SplitCriticalEdge` here because
@@ -7324,7 +7218,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     assert(lty != T_prjlvalue);
                     Value *isvalid = emit_isa(ctx, val, phiType, NULL).first;
                     emit_guarded_test(ctx, isvalid, nullptr, [&] {
-                        (void)emit_unbox(ctx, lty, val, phiType, maybe_decay_tracked(ctx, dest));
+                        (void)emit_unbox(ctx, lty, val, phiType, maybe_decay_tracked(ctx, dest), tbaa_stack);
                         return nullptr;
                     });
                 }
@@ -7352,7 +7246,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                             V = V_rnull;
                         Type *lty = julia_type_to_llvm(ctx, val.typ);
                         if (dest && !type_is_ghost(lty)) // basically, if !ghost union
-                            emit_unbox(ctx, lty, val, val.typ, dest);
+                            emit_unbox(ctx, lty, val, val.typ, dest, tbaa_stack);
                         RTindex = ConstantInt::get(T_int8, tindex);
                     }
                 }
@@ -7527,8 +7421,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         JL_UNLOCK(&m->writelock);
     }
 
-    // link the dependent llvmcall modules, but switch their function's linkage to private
-    // so that they don't show up in the execution engine.
+    // link the dependent llvmcall modules, but switch their function's linkage to internal
+    // so that they don't conflict when they show up in the execution engine.
     for (auto &Mod : ctx.llvmcall_modules) {
         SmallVector<std::string, 1> Exports;
         for (const auto &F: Mod->functions())
@@ -7538,7 +7432,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             jl_error("Failed to link LLVM bitcode");
         }
         for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::PrivateLinkage);
+            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
 
     // link in opaque closure modules
@@ -7549,7 +7443,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 Exports.push_back(F.getName().str());
         jl_merge_module(jl_Module, std::move(Mod));
         for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::PrivateLinkage);
+            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
 
     JL_GC_POP();
@@ -7664,7 +7558,7 @@ jl_compile_result_t jl_emit_codeinst(
                      // and there is something to delete (test this before calling jl_ir_flag_inlineable)
                      codeinst->inferred != jl_nothing &&
                      // don't delete inlineable code, unless it is constant
-                     (codeinst->invoke == jl_fptr_const_return || !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) &&
+                     (codeinst->invoke == jl_fptr_const_return_addr || !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) &&
                      // don't delete code when generating a precompile file
                      !imaging_mode) {
                 // if not inlineable, code won't be needed again
@@ -7697,12 +7591,14 @@ void jl_compile_workqueue(
             "invalid world for code-instance");
         StringRef preal_decl = "";
         bool preal_specsig = false;
-        if (params.cache && codeinst->invoke != NULL) {
-            if (codeinst->invoke == jl_fptr_args) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        if (params.cache && invoke != NULL) {
+            auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+            if (invoke == jl_fptr_args_addr) {
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
             }
             else if (codeinst->isspecsig) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                 preal_specsig = true;
             }
         }
@@ -7717,7 +7613,7 @@ void jl_compile_workqueue(
                 // method body. See #34993
                 if (policy != CompilationPolicy::Default &&
                     codeinst->inferred && codeinst->inferred == jl_nothing) {
-                    src = jl_type_infer(codeinst->def, jl_world_counter, 0);
+                    src = jl_type_infer(codeinst->def, jl_atomic_load_acquire(&jl_world_counter), 0);
                     if (src)
                         result = jl_emit_code(codeinst->def, src, src->rettype, params);
                 }
@@ -7747,9 +7643,9 @@ void jl_compile_workqueue(
             if (!preal_specsig) {
                 // emit specsig-to-(jl)invoke conversion
                 Function *preal = emit_tojlinvoke(codeinst, mod, params);
-                protodecl->setLinkage(GlobalVariable::PrivateLinkage);
+                protodecl->setLinkage(GlobalVariable::InternalLinkage);
                 //protodecl->setAlwaysInline();
-                protodecl->addFnAttr("no-frame-pointer-elim", "true");
+                jl_init_function(protodecl);
                 size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed
                 // TODO: maybe this can be cached in codeinst->specfptr?
                 emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, nrealargs, params, preal);
@@ -7949,7 +7845,8 @@ static void init_jit_functions(void)
     add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle);
 #ifdef _OS_WINDOWS_
     add_named_global(jlexe_var, &jl_exe_handle);
-    add_named_global(jldll_var, &jl_libjulia_internal_handle);
+    add_named_global(jldll_var, &jl_libjulia_handle);
+    add_named_global(jldlli_var, &jl_libjulia_internal_handle);
 #endif
     global_jlvalue_to_llvm(new JuliaVariable{"jl_true", true, get_pjlvalue}, &jl_true);
     global_jlvalue_to_llvm(new JuliaVariable{"jl_false", true, get_pjlvalue}, &jl_false);
@@ -8030,7 +7927,7 @@ static void init_jit_functions(void)
 #endif
 #endif
 
-#define BOX_F(ct) add_named_global("jl_box_"#ct, &jl_box_##ct);
+#define BOX_F(ct) add_named_global(XSTR(jl_box_##ct), &jl_box_##ct);
     BOX_F(int8); BOX_F(uint8);
     BOX_F(int16); BOX_F(uint16);
     BOX_F(int32); BOX_F(uint32);
@@ -8040,9 +7937,59 @@ static void init_jit_functions(void)
 #undef BOX_F
 }
 
+char jl_using_gdb_jitevents = 0;
+
+#ifdef JL_USE_INTEL_JITEVENTS
+char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
+#endif
+
+#ifdef JL_USE_OPROFILE_JITEVENTS
+char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
+#endif
+
+#ifdef JL_USE_PERF_JITEVENTS
+char jl_using_perf_jitevents = 0;
+#endif
+
+void jl_init_debuginfo(void);
+
 extern "C" void jl_init_llvm(void)
 {
-    jl_page_size = jl_getpagesize();
+    builtin_func_map =
+        { { jl_f_is_addr,                 new JuliaFunction{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
+          { jl_f_typeof_addr,             new JuliaFunction{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
+          { jl_f_sizeof_addr,             new JuliaFunction{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
+          { jl_f_issubtype_addr,          new JuliaFunction{XSTR(jl_f_issubtype), get_func_sig, get_func_attrs} },
+          { jl_f_isa_addr,                new JuliaFunction{XSTR(jl_f_isa), get_func_sig, get_func_attrs} },
+          { jl_f_typeassert_addr,         new JuliaFunction{XSTR(jl_f_typeassert), get_func_sig, get_func_attrs} },
+          { jl_f_ifelse_addr,             new JuliaFunction{XSTR(jl_f_ifelse), get_func_sig, get_func_attrs} },
+          { jl_f__apply_iterate_addr,     new JuliaFunction{XSTR(jl_f__apply_iterate), get_func_sig, get_func_attrs} },
+          { jl_f__apply_pure_addr,        new JuliaFunction{XSTR(jl_f__apply_pure), get_func_sig, get_func_attrs} },
+          { jl_f__call_latest_addr,       new JuliaFunction{XSTR(jl_f__call_latest), get_func_sig, get_func_attrs} },
+          { jl_f__call_in_world_addr,     new JuliaFunction{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
+          { jl_f_throw_addr,              new JuliaFunction{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
+          { jl_f_tuple_addr,              jltuple_func },
+          { jl_f_svec_addr,               new JuliaFunction{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
+          { jl_f_applicable_addr,         new JuliaFunction{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
+          { jl_f_invoke_addr,             new JuliaFunction{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
+          { jl_f_invoke_kwsorter_addr,    new JuliaFunction{XSTR(jl_f_invoke_kwsorter), get_func_sig, get_func_attrs} },
+          { jl_f_isdefined_addr,          new JuliaFunction{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
+          { jl_f_getfield_addr,           new JuliaFunction{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
+          { jl_f_setfield_addr,           new JuliaFunction{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
+          { jl_f_swapfield_addr,          new JuliaFunction{XSTR(jl_f_swapfield), get_func_sig, get_func_attrs} },
+          { jl_f_modifyfield_addr,        new JuliaFunction{XSTR(jl_f_modifyfield), get_func_sig, get_func_attrs} },
+          { jl_f_fieldtype_addr,          new JuliaFunction{XSTR(jl_f_fieldtype), get_func_sig, get_func_attrs} },
+          { jl_f_nfields_addr,            new JuliaFunction{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
+          { jl_f__expr_addr,              new JuliaFunction{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
+          { jl_f__typevar_addr,           new JuliaFunction{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
+          { jl_f_arrayref_addr,           new JuliaFunction{XSTR(jl_f_arrayref), get_func_sig, get_func_attrs} },
+          { jl_f_const_arrayref_addr,     new JuliaFunction{XSTR(jl_f_const_arrayref), get_func_sig, get_func_attrs} },
+          { jl_f_arrayset_addr,           new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} },
+          { jl_f_arraysize_addr,          new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} },
+          { jl_f_apply_type_addr,         new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
+        };
+
+    jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
     imaging_mode = jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental);
     jl_default_cgparams.generic_context = jl_nothing;
     jl_init_debuginfo();
@@ -8069,42 +8016,20 @@ extern "C" void jl_init_llvm(void)
 #endif
 
     // Parse command line flags after initialization
-    const char *const argv_tailmerge[] = {"", "-enable-tail-merge=0"}; // NOO TOUCHIE; NO TOUCH! See #922
-    cl::ParseCommandLineOptions(sizeof(argv_tailmerge)/sizeof(argv_tailmerge[0]), argv_tailmerge, "disable-tail-merge\n");
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-    const char *const argv_copyprop[] = {"", "-disable-copyprop"}; // llvm bug 21743
-    cl::ParseCommandLineOptions(sizeof(argv_copyprop)/sizeof(argv_copyprop[0]), argv_copyprop, "disable-copyprop\n");
-#endif
-#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
-    const char *const argv_avoidsfb[] = {"", "-x86-disable-avoid-SFB"}; // llvm bug 41629, see https://gist.github.com/vtjnash/192cab72a6cfc00256ff118238163b55
-    cl::ParseCommandLineOptions(sizeof(argv_avoidsfb)/sizeof(argv_avoidsfb[0]), argv_avoidsfb, "disable-avoidsfb\n");
-#endif
-#if JL_LLVM_VERSION >= 120000
-    // https://reviews.llvm.org/rGc068e9c8c123e7f8c8f3feb57245a012ccd09ccf
-    Optional<std::string> envValue = sys::Process::GetEnv("JULIA_LLVM_ARGS");
-    if (envValue) {
-        SmallVector<const char *, 20> newArgv;
-        BumpPtrAllocator A;
-        StringSaver Saver(A);
-        newArgv.push_back(Saver.save("Julia").data());
-
-        // Parse the value of the environment variable into a "command line"
-        // and hand it off to ParseCommandLineOptions().
-        cl::TokenizeGNUCommandLine(*envValue, Saver, newArgv);
-        int newArgc = static_cast<int>(newArgv.size());
-        cl::ParseCommandLineOptions(newArgc, &newArgv[0]);
-    }
-#else
-    cl::ParseEnvironmentOptions("Julia", "JULIA_LLVM_ARGS");
-#endif
-
+    StringMap<cl::Option*> &llvmopts = cl::getRegisteredOptions();
+    const char *const argv[1] = {"julia"};
+    cl::ParseCommandLineOptions(1, argv, "", nullptr, "JULIA_LLVM_ARGS");
+
+    // Set preferred non-default options
+    cl::Option *clopt;
+    clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
+    if (clopt->getNumOccurrences() == 0)
+        cl::ProvidePositionalOption(clopt, "0", 1);
     // if the patch adding this option has been applied, lower its limit to provide
     // better DAGCombiner performance.
-    auto &clOptions = cl::getRegisteredOptions();
-    if (clOptions.find("combiner-store-merge-dependence-limit") != clOptions.end()) {
-        const char *const argv_smdl[] = {"", "-combiner-store-merge-dependence-limit=4"};
-        cl::ParseCommandLineOptions(sizeof(argv_smdl)/sizeof(argv_smdl[0]), argv_smdl);
-    }
+    clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
+    if (clopt && clopt->getNumOccurrences() == 0)
+        cl::ProvidePositionalOption(clopt, "4", 1);
 
     TargetOptions options = TargetOptions();
     //options.PrintMachineCode = true; //Print machine code produced during JIT compiling
@@ -8189,9 +8114,41 @@ extern "C" void jl_init_llvm(void)
     jl_data_layout.reset(DL);
 
     // Register GDB event listener
-    if(jl_using_gdb_jitevents)
+#if defined(JL_DEBUG_BUILD)
+    jl_using_gdb_jitevents = 1;
+# else
+    const char *jit_gdb = getenv("ENABLE_GDBLISTENER");
+    if (jit_gdb && atoi(jit_gdb)) {
+        jl_using_gdb_jitevents = 1;
+    }
+#endif
+    if (jl_using_gdb_jitevents)
         jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
 
+#if defined(JL_USE_INTEL_JITEVENTS) || \
+    defined(JL_USE_OPROFILE_JITEVENTS) || \
+    defined(JL_USE_PERF_JITEVENTS)
+    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
+#endif
+
+#if defined(JL_USE_INTEL_JITEVENTS)
+    if (jit_profiling && atoi(jit_profiling)) {
+        jl_using_intel_jitevents = 1;
+    }
+#endif
+
+#if defined(JL_USE_OPROFILE_JITEVENTS)
+    if (jit_profiling && atoi(jit_profiling)) {
+        jl_using_oprofile_jitevents = 1;
+    }
+#endif
+
+#if defined(JL_USE_PERF_JITEVENTS)
+    if (jit_profiling && atoi(jit_profiling)) {
+        jl_using_perf_jitevents= 1;
+    }
+#endif
+
 #ifdef JL_USE_INTEL_JITEVENTS
     if (jl_using_intel_jitevents)
         jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
@@ -8206,9 +8163,11 @@ extern "C" void jl_init_llvm(void)
     if (jl_using_perf_jitevents)
         jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
 #endif
+
+    cl::PrintOptionValues();
 }
 
-extern "C" void jl_init_codegen(void)
+extern "C" JL_DLLEXPORT void jl_init_codegen_impl(void)
 {
     jl_init_llvm();
     // Now that the execution engine exists, initialize all modules
@@ -8222,7 +8181,7 @@ extern "C" void jl_init_codegen(void)
     jl_init_intrinsic_functions_codegen();
 }
 
-extern "C" void jl_teardown_codegen()
+extern "C" JL_DLLEXPORT void jl_teardown_codegen_impl()
 {
     // output LLVM timings and statistics
     reportAndResetTimings();
@@ -8288,3 +8247,30 @@ extern void jl_write_bitcode_module(void *M, char *fname) {
     raw_fd_ostream OS(fname, EC, sys::fs::OF_None);
     llvm::WriteBitcodeToFile(*(llvm::Module*)M, OS);
 }
+
+#ifdef _OS_WINDOWS_
+#include <psapi.h>
+#else
+#include <dlfcn.h>
+#endif
+
+#include <llvm-c/Core.h>
+
+extern "C" JL_DLLEXPORT jl_value_t *jl_get_libllvm_impl(void) JL_NOTSAFEPOINT
+{
+#if defined(_OS_WINDOWS_)
+    HMODULE mod;
+    if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCSTR)&llvm::DebugFlag, &mod))
+        return jl_nothing;
+
+    char path[MAX_PATH];
+    if (!GetModuleFileNameA(mod, path, sizeof(path)))
+        return jl_nothing;
+    return (jl_value_t*) jl_symbol(path);
+#else
+    Dl_info dli;
+    if (!dladdr((void*)LLVMContextCreate, &dli))
+        return jl_nothing;
+    return (jl_value_t*) jl_symbol(dli.dli_fname);
+#endif
+}
diff --git a/src/codegen_shared.h b/src/codegen_shared.h
index f56854d2b4ca5..93c913fd7a766 100644
--- a/src/codegen_shared.h
+++ b/src/codegen_shared.h
@@ -5,6 +5,11 @@
 #include <llvm/Support/Debug.h>
 #include <llvm/IR/DebugLoc.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/MDBuilder.h>
+
+#define STR(csym)           #csym
+#define XSTR(csym)          STR(csym)
+#include "julia.h"
 
 enum AddressSpace {
     Generic = 0,
@@ -16,6 +21,24 @@ enum AddressSpace {
     LastSpecial = Loaded,
 };
 
+namespace JuliaType {
+    static inline llvm::StructType* get_jlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::StructType::get(C);
+    }
+
+    static inline llvm::PointerType* get_pjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_jlvalue_ty(C), 0);
+    }
+
+    static inline llvm::PointerType* get_prjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_jlvalue_ty(C), AddressSpace::Tracked);
+    }
+
+    static inline llvm::PointerType* get_ppjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_pjlvalue_ty(C), 0);
+    }
+}
+
 // JLCALL with API arguments ([extra], arg0, arg1, arg2, ...) has the following ABI calling conventions defined:
 #define JLCALL_F_CC (CallingConv::ID)37     // (jl_value_t *arg0, jl_value_t **argv, uint32_t nargv)
 #define JLCALL_F2_CC (CallingConv::ID)38    // (jl_value_t *arg0, jl_value_t **argv, uint32_t nargv, jl_value_t *extra)
@@ -65,3 +88,66 @@ static inline void llvm_dump(llvm::DebugLoc *dbg)
     dbg->print(llvm::dbgs());
     llvm::dbgs() << "\n";
 }
+
+static inline std::pair<llvm::MDNode*,llvm::MDNode*> tbaa_make_child_with_context(llvm::LLVMContext &ctxt, const char *name, llvm::MDNode *parent=nullptr, bool isConstant=false)
+{
+    llvm::MDBuilder mbuilder(ctxt);
+    llvm::MDNode *jtbaa = mbuilder.createTBAARoot("jtbaa");
+    llvm::MDNode *tbaa_root = mbuilder.createTBAAScalarTypeNode("jtbaa", jtbaa);
+    llvm::MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
+    llvm::MDNode *n = mbuilder.createTBAAStructTagNode(scalar, scalar, 0, isConstant);
+    return std::make_pair(n, scalar);
+}
+
+static inline llvm::MDNode *get_tbaa_gcframe(llvm::LLVMContext &ctxt) {
+    return tbaa_make_child_with_context(ctxt, "jtbaa_gcframe").first;
+}
+static inline llvm::MDNode *get_tbaa_const(llvm::LLVMContext &ctxt) {
+    return tbaa_make_child_with_context(ctxt, "jtbaa_const", nullptr, true).first;
+}
+
+static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instruction *inst)
+{
+    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
+    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext()))
+        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), llvm::None));
+    return inst;
+}
+
+// bitcast a value, but preserve its address space when dealing with pointer types
+static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder, llvm::Value *v, llvm::Type *jl_value)
+{
+    using namespace llvm;
+    if (isa<PointerType>(jl_value) &&
+        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
+        // Cast to the proper address space
+        Type *jl_value_addr =
+                PointerType::get(cast<PointerType>(jl_value)->getElementType(),
+                                 v->getType()->getPointerAddressSpace());
+        return builder.CreateBitCast(v, jl_value_addr);
+    }
+    else {
+        return builder.CreateBitCast(v, jl_value);
+    }
+}
+
+// Get PTLS through current task.
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task)
+{
+    using namespace llvm;
+    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
+    auto T_size = builder.GetInsertBlock()->getModule()->getDataLayout().getIntPtrType(builder.getContext());
+    const int ptls_offset = offsetof(jl_task_t, ptls);
+    llvm::Value *pptls = builder.CreateInBoundsGEP(
+        JuliaType::get_pjlvalue_ty(builder.getContext()), current_task,
+        ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
+        "ptls_field");
+    LoadInst *ptls_load = builder.CreateAlignedLoad(
+        emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+    // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
+    tbaa_decorate(get_tbaa_gcframe(builder.getContext()), ptls_load);
+    // Using `CastInst::Create` to get an `Instruction*` without explicit cast:
+    auto ptls = CastInst::Create(Instruction::BitCast, ptls_load, T_ppjlvalue, "ptls");
+    builder.Insert(ptls);
+    return ptls;
+}
\ No newline at end of file
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index d035ab76aa6ad..80038837be0c4 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -1,100 +1,100 @@
-jl_symbol("getproperty"),
 jl_symbol("="),
-jl_symbol("Type"),
+jl_symbol("getproperty"),
+jl_symbol("apply_type"),
 jl_symbol("getfield"),
 jl_symbol("getindex"),
-jl_symbol("apply_type"),
 jl_symbol("convert"),
 jl_symbol("==="),
+jl_symbol("iterate"),
 jl_symbol("=="),
 jl_symbol("new"),
 jl_symbol("foreigncall"),
-jl_symbol("ccall"),
 jl_symbol("int.jl"),
-jl_symbol("+"),
-jl_symbol("boot.jl"),
-jl_symbol("not_int"),
+jl_symbol("throw"),
+jl_symbol("nothing"),
 jl_symbol("essentials.jl"),
-jl_symbol("sysimg.jl"),
-jl_symbol("<"),
+jl_symbol("+"),
 jl_symbol("unsafe_convert"),
+jl_symbol("not_int"),
 jl_symbol("-"),
-jl_symbol("iterate"),
+jl_symbol("boot.jl"),
 jl_symbol("number.jl"),
-jl_symbol("throw"),
-jl_symbol("promotion.jl"),
-jl_symbol("static_parameter"),
 jl_symbol("length"),
+jl_symbol("<"),
 jl_symbol("cconvert"),
+jl_symbol("Base.jl"),
+jl_symbol("promotion.jl"),
 jl_symbol("tuple.jl"),
+jl_symbol("static_parameter"),
+jl_symbol("isempty"),
+jl_symbol("<="),
 jl_symbol("array.jl"),
 jl_symbol("operators.jl"),
-jl_symbol("*"),
+jl_symbol("NamedTuple"),
 jl_symbol("bitcast"),
-jl_symbol("slt_int"),
-jl_symbol("isempty"),
-jl_symbol("indexed_iterate"),
-jl_symbol("size"),
 jl_symbol("!"),
-jl_symbol("nothing"),
-jl_symbol("NamedTuple"),
-jl_symbol("<="),
+jl_symbol("indexed_iterate"),
+jl_symbol("sle_int"),
 jl_symbol("bool.jl"),
-jl_symbol("string"),
-jl_symbol("!="),
-jl_symbol("deprecated.jl"),
-jl_symbol("_apply"),
-jl_symbol("none"),
-jl_symbol("meta"),
-jl_symbol("typeof"),
-jl_symbol("ifelse"),
-jl_symbol("name"),
+jl_symbol("Ptr"),
+jl_symbol("size"),
 jl_symbol("add_int"),
-jl_symbol("setindex!"),
+jl_symbol("slt_int"),
+jl_symbol("*"),
 jl_symbol("range.jl"),
+jl_symbol("abstractarray.jl"),
+jl_symbol("!="),
+jl_symbol("isa"),
+jl_symbol("setindex!"),
+jl_symbol("string"),
+jl_symbol("ifelse"),
 jl_symbol(":"),
-jl_symbol("depwarn"),
-jl_symbol("noinline"),
 jl_symbol(">"),
-jl_symbol("UInt8"),
-jl_symbol("abstractarray.jl"),
-jl_symbol("sub_int"),
-jl_symbol("max"),
-jl_symbol("sle_int"),
-jl_symbol("Typeof"),
-jl_symbol("mt"),
+jl_symbol("_apply_iterate"),
+jl_symbol("UInt64"),
 jl_symbol("&"),
-jl_symbol("Ptr"),
-jl_symbol("pointer.jl"),
+jl_symbol("max"),
 jl_symbol("rem"),
+jl_symbol("sub_int"),
 jl_symbol(">="),
-jl_symbol("typeassert"),
-jl_symbol("lshr_int"),
-jl_symbol("toInt64"),
-jl_symbol("trunc_int"),
+jl_symbol("UInt8"),
+jl_symbol("iterators.jl"),
+jl_symbol("Int64"),
 jl_symbol("pairs"),
 jl_symbol("and_int"),
 jl_symbol("last"),
-jl_symbol("iterators.jl"),
-jl_symbol("first"),
-jl_symbol("eq_int"),
-jl_symbol("throw_inexacterror"),
-jl_symbol("map"),
-jl_symbol("UInt64"),
+jl_symbol("typeof"),
 jl_symbol("arrayref"),
-jl_symbol("Int"),
-jl_symbol("reinterpret"),
-jl_symbol("Int64"),
-jl_symbol("setfield!"),
-jl_symbol("kwfunc"),
+jl_symbol("pointer.jl"),
+jl_symbol("toInt64"),
 jl_symbol("arraylen"),
-jl_symbol("axes"),
+jl_symbol("typeassert"),
+jl_symbol("map"),
+jl_symbol("kwfunc"),
 jl_symbol("ArgumentError"),
-jl_symbol("macro expansion"),
+jl_symbol("lshr_int"),
+jl_symbol("axes"),
+jl_symbol("reinterpret"),
+jl_symbol("Array"),
+jl_symbol("first"),
+jl_symbol("trunc_int"),
+jl_symbol("OneTo"),
+jl_symbol("haskey"),
+jl_symbol("Int"),
+jl_symbol("oneto"),
+jl_symbol("eq_int"),
+jl_symbol("throw_inexacterror"),
 jl_symbol("toUInt64"),
-jl_symbol("check_top_bit"),
-jl_symbol("is_top_bit_set"),
-jl_symbol("isa"),
+jl_symbol("arraysize"),
 jl_symbol("UInt"),
-jl_symbol("haskey"),
 jl_symbol("setproperty!"),
+jl_symbol("check_top_bit"),
+jl_symbol("promote"),
+jl_symbol("unsigned"),
+jl_symbol("is_top_bit_set"),
+jl_symbol("structdiff"),
+jl_symbol("undef"),
+jl_symbol("sizeof"),
+jl_symbol("String"),
+jl_symbol("namedtuple.jl"),
+jl_symbol("pop"),
diff --git a/src/common_symbols2.inc b/src/common_symbols2.inc
index d49528920c0e2..a28f1ef50af24 100644
--- a/src/common_symbols2.inc
+++ b/src/common_symbols2.inc
@@ -1,254 +1,254 @@
-jl_symbol("promote"),
-jl_symbol("undef"),
+jl_symbol("inbounds"),
+jl_symbol("strings/string.jl"),
+jl_symbol("Ref"),
 jl_symbol("Vector"),
-jl_symbol("parent"),
+jl_symbol("kwerr"),
 jl_symbol("_promote"),
-jl_symbol("Ref"),
-jl_symbol("push!"),
-jl_symbol("arraysize"),
-jl_symbol("jl_value_ptr"),
-jl_symbol("mutable"),
-jl_symbol("<<"),
-jl_symbol("pointer_from_objref"),
-jl_symbol("promote_typeof"),
-jl_symbol("unsigned"),
-jl_symbol("zext_int"),
-jl_symbol("strings/string.jl"),
+jl_symbol("sext_int"),
 jl_symbol("pointer"),
-jl_symbol("jl_alloc_array_1d"),
-jl_symbol("inbounds"),
+jl_symbol("similar"),
 jl_symbol("arrayset"),
-jl_symbol("data"),
+jl_symbol("axes1"),
+jl_symbol("eachindex"),
 jl_symbol("|"),
-jl_symbol(">>"),
-jl_symbol("pop"),
-jl_symbol("sizeof"),
-jl_symbol("strings/basic.jl"),
-jl_symbol("namedtuple.jl"),
-jl_symbol("structdiff"),
-jl_symbol("print"),
-jl_symbol("bitarray.jl"),
-jl_symbol("oftype"),
-jl_symbol("kwerr"),
-jl_symbol("adjoint"),
 jl_symbol("ult_int"),
-jl_symbol("isdefined"),
-jl_symbol("shl_int"),
 jl_symbol("lastindex"),
-jl_symbol("DimensionMismatch"),
-jl_symbol("abstractdict.jl"),
-jl_symbol("zero"),
+jl_symbol("setfield!"),
+jl_symbol("UnitRange"),
+jl_symbol("push!"),
 jl_symbol("Bool"),
 jl_symbol("Colon"),
-jl_symbol("copy"),
-jl_symbol("Cvoid"),
 jl_symbol("fieldtype"),
-jl_symbol("add_ptr"),
-jl_symbol("isdone"),
-jl_symbol("eachindex"),
-jl_symbol("eltype"),
-jl_symbol("float.jl"),
 jl_symbol("unitrange_last"),
-jl_symbol("strings/io.jl"),
+jl_symbol("bitarray.jl"),
+jl_symbol("<<"),
+jl_symbol("zext_int"),
+jl_symbol("Tuple"),
+jl_symbol("reflection.jl"),
+jl_symbol("TypeError"),
+jl_symbol("print"),
+jl_symbol("eltype"),
+jl_symbol(">>"),
+jl_symbol("strings/basic.jl"),
 jl_symbol("gc_preserve_begin"),
+jl_symbol("require_one_based_indexing"),
 jl_symbol("gc_preserve_end"),
-jl_symbol("tail"),
-jl_symbol("String"),
-jl_symbol("mul_int"),
+jl_symbol("DimensionMismatch"),
 jl_symbol("indices.jl"),
-jl_symbol("in"),
-jl_symbol("BlasInt"),
-jl_symbol("indices1"),
+jl_symbol("Cvoid"),
+jl_symbol("oftype"),
+jl_symbol("zero"),
+jl_symbol("float.jl"),
 jl_symbol("Any"),
-jl_symbol("min"),
-jl_symbol("Tuple"),
-jl_symbol("error"),
-jl_symbol("gcutils.jl"),
-jl_symbol("ptr"),
+jl_symbol("checkbounds"),
 jl_symbol("or_int"),
+jl_symbol("isdefined"),
 jl_symbol("dict.jl"),
+jl_symbol("strings/io.jl"),
+jl_symbol("shl_int"),
+jl_symbol("copy"),
+jl_symbol("macro expansion"),
+jl_symbol("abstractdict.jl"),
+jl_symbol("in"),
+jl_symbol("io.jl"),
+jl_symbol("BlasInt"),
 jl_symbol("Float64"),
-jl_symbol("Array"),
-jl_symbol("reflection.jl"),
-jl_symbol("transpose"),
-jl_symbol("copyto!"),
-jl_symbol("checkbounds"),
-jl_symbol("stride"),
-jl_symbol("unsafe_load"),
-jl_symbol("show"),
-jl_symbol("broadcasted"),
-jl_symbol("chkstride1"),
-jl_symbol("contents"),
-jl_symbol("_growend!"),
-jl_symbol("argtail"),
-jl_symbol("trunc"),
+jl_symbol("mul_int"),
 jl_symbol("UInt32"),
-jl_symbol("refvalue.jl"),
-jl_symbol("io.jl"),
-jl_symbol("jl_array_grow_end"),
-jl_symbol("multidimensional.jl"),
-jl_symbol("real"),
-jl_symbol("pointerref"),
-jl_symbol("jl_array_ptr"),
-jl_symbol("keys"),
-jl_symbol("Int32"),
-jl_symbol("get"),
-jl_symbol("stop"),
-jl_symbol("liblapack"),
-jl_symbol("Enums.jl"),
-jl_symbol("unsafe_length"),
-jl_symbol("one"),
-jl_symbol("broadcast.jl"),
-jl_symbol("BoundsError"),
-jl_symbol("char.jl"),
 jl_symbol("C_NULL"),
-jl_symbol("x"),
+jl_symbol("Integer"),
+jl_symbol("!=="),
+jl_symbol("merge"),
+jl_symbol("BoundsError"),
+jl_symbol("broadcasted"),
+jl_symbol("Cint"),
+jl_symbol("min"),
+jl_symbol("libblastrampoline"),
+jl_symbol("iszero"),
+jl_symbol("refvalue.jl"),
+jl_symbol("stride"),
+jl_symbol("error"),
 jl_symbol("ncodeunits"),
+jl_symbol("LinearIndices"),
+jl_symbol("Clong"),
+jl_symbol("pair.jl"),
+jl_symbol("_growend!"),
+jl_symbol("char.jl"),
+jl_symbol("copyto!"),
+jl_symbol("get"),
+jl_symbol("tail"),
+jl_symbol("real"),
+jl_symbol("Union"),
+jl_symbol("multidimensional.jl"),
 jl_symbol("enter"),
-jl_symbol("Float32"),
-jl_symbol("value"),
-jl_symbol("write"),
 jl_symbol("leave"),
-jl_symbol("isless"),
+jl_symbol("add_ptr"),
+jl_symbol("chkstride1"),
 jl_symbol("Expr"),
-jl_symbol("gmp.jl"),
-jl_symbol("AssertionError"),
+jl_symbol("write"),
+jl_symbol("broadcast.jl"),
+jl_symbol("show.jl"),
+jl_symbol("none"),
+jl_symbol("Generator"),
+jl_symbol("Int32"),
 jl_symbol("materialize"),
-jl_symbol("Union"),
-jl_symbol("Integer"),
-jl_symbol("neg_int"),
-jl_symbol("print_to_string"),
-jl_symbol("chklapackerror"),
-jl_symbol("prod"),
+jl_symbol("show"),
+jl_symbol("lock"),
+jl_symbol("unsafe_load"),
+jl_symbol("gmp.jl"),
+jl_symbol("mpfr.jl"),
+jl_symbol("Symbol"),
+jl_symbol("Pair"),
 jl_symbol("resize!"),
-jl_symbol("ldiv!"),
-jl_symbol("Cint"),
+jl_symbol("neg_int"),
+jl_symbol("strings/substring.jl"),
+jl_symbol("AssertionError"),
+jl_symbol("identity"),
+jl_symbol("one"),
+jl_symbol("reduce.jl"),
+jl_symbol("libcholmod"),
+jl_symbol("isless"),
+jl_symbol("reducedim.jl"),
 jl_symbol("checksquare"),
-jl_symbol("args"),
-jl_symbol("_length"),
-jl_symbol("!=="),
+jl_symbol("sort.jl"),
+jl_symbol("generator.jl"),
+jl_symbol("pointer_from_objref"),
+jl_symbol("Float32"),
+jl_symbol("chklapackerror"),
+jl_symbol("parent"),
+jl_symbol("task.jl"),
+jl_symbol("div"),
+jl_symbol("cholmod_common"),
+jl_symbol("ht_keyindex"),
+jl_symbol("pop_exception"),
+jl_symbol("c.jl"),
+jl_symbol("firstindex"),
+jl_symbol("some.jl"),
+jl_symbol("iobuffer.jl"),
+jl_symbol("sub_ptr"),
+jl_symbol("vect"),
+jl_symbol("unsafe_string"),
+jl_symbol("llvmcall"),
+jl_symbol("checkindex"),
+jl_symbol("_call_latest"),
+jl_symbol("rethrow"),
+jl_symbol("pointerref"),
+jl_symbol("println"),
+jl_symbol("keys"),
+jl_symbol("RefValue"),
 jl_symbol("_expr"),
-jl_symbol("merge"),
-jl_symbol("dims"),
-jl_symbol("the_exception"),
-jl_symbol("Base"),
 jl_symbol("toUInt32"),
-jl_symbol("mpfr.jl"),
-jl_symbol("<:"),
-jl_symbol("div"),
-jl_symbol("start"),
-jl_symbol("pair.jl"),
+jl_symbol("ismissing"),
+jl_symbol("throw_boundserror"),
+jl_symbol("IteratorSize"),
+jl_symbol("iddict.jl"),
+jl_symbol("to_shape"),
+jl_symbol("Csize_t"),
+jl_symbol("~"),
+jl_symbol("argtail"),
+jl_symbol("include"),
+jl_symbol("set.jl"),
+jl_symbol("isequal"),
 jl_symbol("refpointer.jl"),
-jl_symbol("chunks"),
+jl_symbol("=>"),
 jl_symbol("Val"),
-jl_symbol("show.jl"),
-jl_symbol("sort.jl"),
+jl_symbol("Base"),
+jl_symbol("%"),
+jl_symbol("collect"),
+jl_symbol("Type##kw"),
+jl_symbol("typemax"),
 jl_symbol("fill!"),
-jl_symbol("step"),
-jl_symbol("vals"),
-jl_symbol("toInt32"),
-jl_symbol("mul!"),
-jl_symbol("vect"),
-jl_symbol("len"),
-jl_symbol("ashr_int"),
-jl_symbol("~"),
-jl_symbol("count"),
-jl_symbol("ht_keyindex"),
-jl_symbol("iobuffer.jl"),
-jl_symbol("Generator"),
-jl_symbol("eval"),
-jl_symbol("f"),
-jl_symbol("throw_undef_if_not"),
 jl_symbol("ule_int"),
+jl_symbol("atomics.jl"),
 jl_symbol("libgit2"),
-jl_symbol("head"),
-jl_symbol("LinearIndices"),
-jl_symbol("collect"),
-jl_symbol("set.jl"),
-jl_symbol("lmul!"),
-jl_symbol("offset"),
-jl_symbol("abs"),
-jl_symbol("Symbol"),
-jl_symbol("identity"),
-jl_symbol("typemax"),
+jl_symbol("BigFloat"),
+jl_symbol("ashr_int"),
 jl_symbol("boundscheck"),
-jl_symbol("isequal"),
-jl_symbol("id"),
+jl_symbol("abs"),
 jl_symbol("^"),
-jl_symbol("generator.jl"),
-jl_symbol("=>"),
-jl_symbol("c.jl"),
-jl_symbol("fastmath.jl"),
-jl_symbol("copyast"),
-jl_symbol("IteratorSize"),
-jl_symbol("checkindex"),
-jl_symbol("strings/substring.jl"),
-jl_symbol("println"),
-jl_symbol("throw_boundserror"),
-jl_symbol("io"),
-jl_symbol("dict"),
-jl_symbol("Cstring"),
-jl_symbol("codeunit"),
-jl_symbol("unsafe_string"),
-jl_symbol("n"),
-jl_symbol("close"),
-jl_symbol("BigFloat"),
-jl_symbol("%"),
-jl_symbol("read"),
-jl_symbol("checked.jl"),
-jl_symbol("checked_trunc_sint"),
-jl_symbol("math.jl"),
-jl_symbol("round"),
-jl_symbol("iostream.jl"),
+jl_symbol("ensure_initialized"),
+jl_symbol("_array_for"),
+jl_symbol("strings/util.jl"),
+jl_symbol("Dict"),
 jl_symbol("Nothing"),
-jl_symbol("state"),
 jl_symbol("compiler/ssair/ir.jl"),
-jl_symbol("stream.jl"),
-jl_symbol("Box"),
-jl_symbol("missing.jl"),
-jl_symbol("rmul!"),
-jl_symbol("process.jl"),
-jl_symbol("Core"),
-jl_symbol("reduce.jl"),
-jl_symbol("SizeUnknown"),
-jl_symbol("diag"),
-jl_symbol("atomics.jl"),
-jl_symbol("promote_rule"),
-jl_symbol("_mod64"),
-jl_symbol("llvmcall"),
-jl_symbol("reducedim.jl"),
-jl_symbol("to_shape"),
-jl_symbol("ComplexF32"),
-jl_symbol("HasShape"),
-jl_symbol("block"),
-jl_symbol("checked_trunc_uint"),
-jl_symbol("float"),
-jl_symbol("unsafe_trunc"),
-jl_symbol("isnan"),
+jl_symbol("unsafe_write"),
+jl_symbol("util.jl"),
+jl_symbol("toInt32"),
+jl_symbol("loading.jl"),
+jl_symbol("value"),
+jl_symbol("expr.jl"),
+jl_symbol("print_to_string"),
+jl_symbol("the_exception"),
+jl_symbol("nonzeros"),
+jl_symbol("<:"),
+jl_symbol("KeyError"),
 jl_symbol("xor"),
-jl_symbol("task.jl"),
-jl_symbol("complex.jl"),
-jl_symbol(">>>"),
-jl_symbol("bitset.jl"),
 jl_symbol("logging.jl"),
-jl_symbol("s"),
-jl_symbol("libmpfr"),
+jl_symbol("stat.jl"),
+jl_symbol("close"),
+jl_symbol("adjoint"),
+jl_symbol("meta"),
+jl_symbol("path.jl"),
+jl_symbol("round"),
+jl_symbol("Cstring"),
+jl_symbol("SizeUnknown"),
+jl_symbol("esc"),
+jl_symbol("missing.jl"),
+jl_symbol("throw_undef_if_not"),
+jl_symbol("error.jl"),
+jl_symbol("Type"),
+jl_symbol("mul!"),
+jl_symbol("math.jl"),
+jl_symbol("unsafe_trunc"),
 jl_symbol("missing"),
-jl_symbol("nzval"),
-jl_symbol("special/trig.jl"),
-jl_symbol("loading.jl"),
-jl_symbol("KeyError"),
-jl_symbol("cmp"),
-jl_symbol("promote_type"),
 jl_symbol("subarray.jl"),
-jl_symbol("handle"),
+jl_symbol("noinline"),
+jl_symbol("isnan"),
+jl_symbol("ldiv!"),
+jl_symbol("DataType"),
+jl_symbol("codeunit"),
+jl_symbol("condition.jl"),
+jl_symbol("step"),
+jl_symbol("copyast"),
+jl_symbol("bitset.jl"),
+jl_symbol("float"),
+jl_symbol("fastmath.jl"),
+jl_symbol("_mod64"),
 jl_symbol("_div64"),
-jl_symbol("ht"),
-jl_symbol("UInt128"),
-jl_symbol("zeros"),
-jl_symbol("Dict"),
 jl_symbol("all"),
-jl_symbol("ComplexF64"),
+jl_symbol("parse"),
+jl_symbol("joinpath"),
+jl_symbol("nextind"),
+jl_symbol("regex.jl"),
+jl_symbol("Enums.jl"),
+jl_symbol("promote_type"),
+jl_symbol("Cdouble"),
+jl_symbol("ComplexF32"),
+jl_symbol("read"),
+jl_symbol("intfuncs.jl"),
 jl_symbol("Complex"),
-jl_symbol("checked_add"),
-jl_symbol("mod"),
+jl_symbol("_deleteend!"),
+jl_symbol("stat"),
+jl_symbol("UnionAll"),
+jl_symbol("special/trig.jl"),
+jl_symbol("UInt128"),
+jl_symbol("_copyto_impl!"),
+jl_symbol("stream.jl"),
+jl_symbol("lmul!"),
+jl_symbol("repr"),
+jl_symbol("promote_rule"),
+jl_symbol("xor_int"),
+jl_symbol("complex.jl"),
+jl_symbol("transpose"),
+jl_symbol(">>>"),
+jl_symbol("cholmod_sparse"),
+jl_symbol("filemode"),
+jl_symbol("ComplexF64"),
+jl_symbol("SparseMatrixCSC"),
+jl_symbol("view"),
+jl_symbol("GitError"),
+jl_symbol("zeros"),
+jl_symbol("InexactError"),
+jl_symbol("LogLevel"),
+jl_symbol("between"),
diff --git a/src/coverage.cpp b/src/coverage.cpp
new file mode 100644
index 0000000000000..4ce33c105691c
--- /dev/null
+++ b/src/coverage.cpp
@@ -0,0 +1,214 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <string>
+#include <fstream>
+#include <map>
+#include <vector>
+
+#include "llvm-version.h"
+#include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/StringMap.h>
+#include <llvm/Support/raw_ostream.h>
+
+#include "julia.h"
+#include "julia_internal.h"
+
+using namespace llvm;
+
+static int codegen_imaging_mode(void)
+{
+    return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental);
+}
+
+// Logging for code coverage and memory allocation
+
+const int logdata_blocksize = 32; // target getting nearby lines in the same general cache area and reducing calls to malloc by chunking
+typedef uint64_t logdata_block[logdata_blocksize];
+typedef StringMap< std::vector<logdata_block*> > logdata_t;
+
+static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
+{
+    unsigned block = line / logdata_blocksize;
+    line = line % logdata_blocksize;
+    if (vec.size() <= block)
+        vec.resize(block + 1);
+    if (vec[block] == NULL) {
+        vec[block] = (logdata_block*)calloc(1, sizeof(logdata_block));
+    }
+    logdata_block &data = *vec[block];
+    if (data[line] == 0)
+        data[line] = 1;
+    return &data[line];
+}
+
+// Code coverage
+
+static logdata_t coverageData;
+
+JL_DLLEXPORT void jl_coverage_alloc_line(StringRef filename, int line)
+{
+    assert(!codegen_imaging_mode());
+    if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
+        return;
+    allocLine(coverageData[filename], line);
+}
+
+JL_DLLEXPORT uint64_t *jl_coverage_data_pointer(StringRef filename, int line)
+{
+    return allocLine(coverageData[filename], line);
+}
+
+extern "C" JL_DLLEXPORT void jl_coverage_visit_line(const char *filename_, size_t len_filename, int line)
+{
+    StringRef filename = StringRef(filename_, len_filename);
+    if (codegen_imaging_mode() || filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
+        return;
+    std::vector<logdata_block*> &vec = coverageData[filename];
+    uint64_t *ptr = allocLine(vec, line);
+    (*ptr)++;
+}
+
+// Memory allocation log (malloc_log)
+
+static logdata_t mallocData;
+
+JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line)
+{
+    return allocLine(mallocData[filename], line);
+}
+
+// Resets the malloc counts.
+extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
+{
+    logdata_t::iterator it = mallocData.begin();
+    for (; it != mallocData.end(); it++) {
+        std::vector<logdata_block*> &bytes = (*it).second;
+        std::vector<logdata_block*>::iterator itb;
+        for (itb = bytes.begin(); itb != bytes.end(); itb++) {
+            if (*itb) {
+                logdata_block &data = **itb;
+                for (int i = 0; i < logdata_blocksize; i++) {
+                    if (data[i] > 0)
+                        data[i] = 1;
+                }
+            }
+        }
+    }
+    jl_gc_sync_total_bytes(0);
+}
+
+static void write_log_data(logdata_t &logData, const char *extension)
+{
+    std::string base = std::string(jl_options.julia_bindir);
+    base = base + "/../share/julia/base/";
+    logdata_t::iterator it = logData.begin();
+    for (; it != logData.end(); it++) {
+        std::string filename(it->first());
+        std::vector<logdata_block*> &values = it->second;
+        if (!values.empty()) {
+            if (!jl_isabspath(filename.c_str()))
+                filename = base + filename;
+            std::ifstream inf(filename.c_str());
+            if (!inf.is_open())
+                continue;
+            std::string outfile = filename + extension;
+            std::ofstream outf(outfile.c_str(), std::ofstream::trunc | std::ofstream::out | std::ofstream::binary);
+            if (outf.is_open()) {
+                inf.exceptions(std::ifstream::badbit);
+                outf.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+                char line[1024];
+                int l = 1;
+                unsigned block = 0;
+                while (!inf.eof()) {
+                    inf.getline(line, sizeof(line));
+                    if (inf.fail()) {
+                        if (inf.eof())
+                            break; // no content on trailing line
+                        // Read through lines longer than sizeof(line)
+                        inf.clear();
+                        inf.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+                    }
+                    logdata_block *data = NULL;
+                    if (block < values.size()) {
+                        data = values[block];
+                    }
+                    uint64_t value = data ? (*data)[l] : 0;
+                    if (++l >= logdata_blocksize) {
+                        l = 0;
+                        block++;
+                    }
+                    outf.width(9);
+                    if (value == 0)
+                        outf << '-';
+                    else
+                        outf << (value - 1);
+                    outf.width(0);
+                    outf << " " << line << '\n';
+                }
+                outf.close();
+            }
+            inf.close();
+        }
+    }
+}
+
+static void write_lcov_data(logdata_t &logData, const std::string &outfile)
+{
+    std::ofstream outf(outfile.c_str(), std::ofstream::ate | std::ofstream::out | std::ofstream::binary);
+    //std::string base = std::string(jl_options.julia_bindir);
+    //base = base + "/../share/julia/base/";
+    logdata_t::iterator it = logData.begin();
+    for (; it != logData.end(); it++) {
+        StringRef filename = it->first();
+        const std::vector<logdata_block*> &values = it->second;
+        if (!values.empty()) {
+            outf << "SF:" << filename.str() << '\n';
+            size_t n_covered = 0;
+            size_t n_instrumented = 0;
+            size_t lno = 0;
+            for (auto &itv : values) {
+                if (itv) {
+                    logdata_block &data = *itv;
+                    for (int i = 0; i < logdata_blocksize; i++) {
+                        auto cov = data[i];
+                        if (cov > 0) {
+                            n_instrumented++;
+                            if (cov > 1)
+                                n_covered++;
+                            outf << "DA:" << lno << ',' << (cov - 1) << '\n';
+                        }
+                        lno++;
+                    }
+                }
+                else {
+                    lno += logdata_blocksize;
+                }
+            }
+            outf << "LH:" << n_covered << '\n';
+            outf << "LF:" << n_instrumented << '\n';
+            outf << "end_of_record\n";
+        }
+    }
+    outf.close();
+}
+
+extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
+{
+    if (output) {
+        StringRef output_pattern(output);
+        if (output_pattern.endswith(".info"))
+            write_lcov_data(coverageData, jl_format_filename(output_pattern.str().c_str()));
+    }
+    else {
+        std::string stm;
+        raw_string_ostream(stm) << "." << jl_getpid() << ".cov";
+        write_log_data(coverageData, stm.c_str());
+    }
+}
+
+extern "C" JL_DLLEXPORT void jl_write_malloc_log(void)
+{
+    std::string stm;
+    raw_string_ostream(stm) << "." << jl_getpid() << ".mem";
+    write_log_data(mallocData, stm.c_str());
+}
diff --git a/src/crc32c.c b/src/crc32c.c
index 8bc9b0c23381b..1e57d8aef85db 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -80,7 +80,7 @@ JL_UNUSED static inline uint32_t crc32c_shift(const uint32_t zeros[][256], uint3
         zeros[2][(crc >> 16) & 0xff] ^ zeros[3][crc >> 24];
 }
 
-#if (defined(_CPU_X86_64_) || defined(_CPU_X86_)) && !defined(_COMPILER_MICROSOFT_)
+#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
 #  ifdef _CPU_X86_64_
 #    define CRC32_PTR "crc32q"
 #  else
diff --git a/src/datatype.c b/src/datatype.c
index 1a3ffa78170ac..3825641b96623 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -48,9 +48,9 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
                                      jl_methtable_type);
     mt->name = jl_demangle_typename(name);
     mt->module = module;
-    mt->defs = jl_nothing;
-    mt->leafcache = (jl_array_t*)jl_an_empty_vec_any;
-    mt->cache = jl_nothing;
+    jl_atomic_store_relaxed(&mt->defs, jl_nothing);
+    jl_atomic_store_relaxed(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&mt->cache, jl_nothing);
     mt->max_args = 0;
     mt->kwsorter = NULL;
     mt->backedges = NULL;
@@ -69,8 +69,8 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->name = name;
     tn->module = module;
     tn->wrapper = NULL;
-    tn->cache = jl_emptysvec;
-    tn->linearcache = jl_emptysvec;
+    jl_atomic_store_relaxed(&tn->cache, jl_emptysvec);
+    jl_atomic_store_relaxed(&tn->linearcache, jl_emptysvec);
     tn->names = NULL;
     tn->hash = bitmix(bitmix(module ? module->build_id : 0, name->hash), 0xa1ada1da);
     tn->abstract = abstract;
@@ -242,14 +242,15 @@ int jl_struct_try_layout(jl_datatype_t *dt)
         return 1;
     else if (!jl_has_fixed_layout(dt))
         return 0;
+    // jl_has_fixed_layout also ensured that dt->types is assigned now
     jl_compute_field_offsets(dt);
     assert(dt->layout);
     return 1;
 }
 
-int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree) JL_NOTSAFEPOINT
+int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree)
 {
-    if (ty->name->mayinlinealloc && (ty->isconcretetype || ((jl_datatype_t*)jl_unwrap_unionall(ty->name->wrapper))->layout)) { // TODO: use jl_struct_try_layout(dt) (but it is a safepoint)
+    if (ty->name->mayinlinealloc && jl_struct_try_layout(ty)) {
         if (ty->layout->npointers > 0) {
             if (pointerfree)
                 return 0;
@@ -263,7 +264,7 @@ int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree) JL_NOTSAFEPOIN
     return 0;
 }
 
-static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbytes, size_t *align, int asfield) JL_NOTSAFEPOINT
+static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbytes, size_t *align, int asfield)
 {
     if (jl_is_uniontype(ty)) {
         unsigned na = union_isinlinable(((jl_uniontype_t*)ty)->a, 1, nbytes, align, asfield);
@@ -289,19 +290,19 @@ static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbyte
     return 0;
 }
 
-int jl_uniontype_size(jl_value_t *ty, size_t *sz) JL_NOTSAFEPOINT
+int jl_uniontype_size(jl_value_t *ty, size_t *sz)
 {
     size_t al = 0;
     return union_isinlinable(ty, 0, sz, &al, 0) != 0;
 }
 
-JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al)
 {
     unsigned countbits = union_isinlinable(eltype, 0, fsz, al, 1);
     return (countbits > 0 && countbits < 127) ? countbits : 0;
 }
 
-JL_DLLEXPORT int jl_stored_inline(jl_value_t *eltype) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_stored_inline(jl_value_t *eltype)
 {
     size_t fsz = 0, al = 0;
     return jl_islayout_inline(eltype, &fsz, &al);
@@ -316,7 +317,7 @@ int jl_pointer_egal(jl_value_t *t)
         return 1;
     if (t == (jl_value_t*)jl_bool_type)
         return 1;
-    if (jl_is_mutable_datatype(t) && // excludes abstract types
+    if (jl_is_mutable_datatype(jl_unwrap_unionall(t)) && // excludes abstract types
         t != (jl_value_t*)jl_string_type && // technically mutable, but compared by contents
         t != (jl_value_t*)jl_simplevector_type &&
         !jl_is_kind(t))
@@ -339,6 +340,10 @@ int jl_pointer_egal(jl_value_t *t)
             return 1;
         }
     }
+    if (jl_is_uniontype(t)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        return jl_pointer_egal(u->a) && jl_pointer_egal(u->b);
+    }
     return 0;
 }
 
@@ -628,7 +633,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
             if (fldn < 1 || fldn > jl_svec_len(fnames))
                 jl_errorf("invalid field attribute %lld", (long long)fldn);
             fldn--;
-            if (attr == atomic_sym) {
+            if (attr == jl_atomic_sym) {
                 if (!mutabl)
                     jl_errorf("invalid field attribute atomic for immutable struct");
                 if (atomicfields == NULL) {
@@ -726,23 +731,23 @@ JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
 #if MAX_ATOMIC_SIZE > MAX_POINTERATOMIC_SIZE
 #error MAX_ATOMIC_SIZE too large
 #endif
+#if MAX_ATOMIC_SIZE >= 16 && !defined(_P64)
+#error 12 byte GC pool size alignment unimplemented for 32-bit
+#endif
 #if MAX_POINTERATOMIC_SIZE > 16
 #error MAX_POINTERATOMIC_SIZE too large
 #endif
-#if MAX_POINTERATOMIC_SIZE >= 16
-#ifndef _P64
-#error 12 byte GC pool size not implemented for 32-bit
-#endif
-typedef __uint128_t uint128_t;
-typedef uint128_t jl_uatomicmax_t;
-#else
-typedef uint64_t jl_uatomicmax_t;
-#endif
-
 #if BYTE_ORDER != LITTLE_ENDIAN
 #error using masks for atomics (instead of memcpy like nb == 16) assumes little endian
 #endif
 
+#if MAX_POINTERATOMIC_SIZE >= 16
+typedef struct _jl_uint128_t {
+    uint64_t a;
+    uint64_t b;
+} jl_uint128_t;
+#endif
+
 static inline uint32_t zext_read32(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
 {
     uint32_t y = *(uint32_t*)x;
@@ -768,11 +773,11 @@ static inline uint64_t zext_read64(const jl_value_t *x, size_t nb) JL_NOTSAFEPOI
 #endif
 
 #if MAX_POINTERATOMIC_SIZE >= 16
-static inline uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
+static inline jl_uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
 {
-    uint128_t y = 0;
+    jl_uint128_t y = {0};
     if (nb == 16)
-        y = *(uint128_t*)x;
+        y = *(jl_uint128_t*)x;
     else
         memcpy(&y, x, nb);
     return y;
@@ -813,34 +818,34 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)
     size_t nb = jl_datatype_size(bt);
     // some types have special pools to minimize allocations
     if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
-    if (bt == jl_bool_type)    return (1 & jl_atomic_load((int8_t*)data)) ? jl_true : jl_false;
-    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_load((uint8_t*)data));
-    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_load((int64_t*)data));
-    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_load((int32_t*)data));
-    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_load((int8_t*)data));
-    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_load((int16_t*)data));
-    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_load((uint64_t*)data));
-    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_load((uint32_t*)data));
-    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_load((uint16_t*)data));
-    if (bt == jl_char_type)    return jl_box_char(jl_atomic_load((uint32_t*)data));
+    if (bt == jl_bool_type)    return (1 & jl_atomic_load((_Atomic(int8_t)*)data)) ? jl_true : jl_false;
+    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_load((_Atomic(uint8_t)*)data));
+    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_load((_Atomic(int64_t)*)data));
+    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_load((_Atomic(int32_t)*)data));
+    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_load((_Atomic(int8_t)*)data));
+    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_load((_Atomic(int16_t)*)data));
+    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_load((_Atomic(uint64_t)*)data));
+    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_load((_Atomic(uint32_t)*)data));
+    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_load((_Atomic(uint16_t)*)data));
+    if (bt == jl_char_type)    return jl_box_char(jl_atomic_load((_Atomic(uint32_t)*)data));
 
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
     // data is aligned to the power of two,
     // we will write too much of v, but the padding should exist
     if (nb == 1)
-        *(uint8_t*) v = jl_atomic_load((uint8_t*)data);
+        *(uint8_t*) v = jl_atomic_load((_Atomic(uint8_t)*)data);
     else if (nb <= 2)
-        *(uint16_t*)v = jl_atomic_load((uint16_t*)data);
+        *(uint16_t*)v = jl_atomic_load((_Atomic(uint16_t)*)data);
     else if (nb <= 4)
-        *(uint32_t*)v = jl_atomic_load((uint32_t*)data);
+        *(uint32_t*)v = jl_atomic_load((_Atomic(uint32_t)*)data);
 #if MAX_POINTERATOMIC_SIZE >= 8
     else if (nb <= 8)
-        *(uint64_t*)v = jl_atomic_load((uint64_t*)data);
+        *(uint64_t*)v = jl_atomic_load((_Atomic(uint64_t)*)data);
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
     else if (nb <= 16)
-        *(uint128_t*)v = jl_atomic_load((uint128_t*)data);
+        *(jl_uint128_t*)v = jl_atomic_load((_Atomic(jl_uint128_t)*)data);
 #endif
     else
         abort();
@@ -856,18 +861,18 @@ JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb)
     if (nb == 0)
         ;
     else if (nb == 1)
-        jl_atomic_store((uint8_t*)dst, *(uint8_t*)src);
+        jl_atomic_store((_Atomic(uint8_t)*)dst, *(uint8_t*)src);
     else if (nb == 2)
-        jl_atomic_store((uint16_t*)dst, *(uint16_t*)src);
+        jl_atomic_store((_Atomic(uint16_t)*)dst, *(uint16_t*)src);
     else if (nb <= 4)
-        jl_atomic_store((uint32_t*)dst, zext_read32(src, nb));
+        jl_atomic_store((_Atomic(uint32_t)*)dst, zext_read32(src, nb));
 #if MAX_POINTERATOMIC_SIZE >= 8
     else if (nb <= 8)
-        jl_atomic_store((uint64_t*)dst, zext_read64(src, nb));
+        jl_atomic_store((_Atomic(uint64_t)*)dst, zext_read64(src, nb));
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
     else if (nb <= 16)
-        jl_atomic_store((uint128_t*)dst, zext_read128(src, nb));
+        jl_atomic_store((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb));
 #endif
     else
         abort();
@@ -880,32 +885,32 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl
     jl_datatype_t *bt = (jl_datatype_t*)dt;
     // some types have special pools to minimize allocations
     if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
-    if (bt == jl_bool_type)    return (1 & jl_atomic_exchange((int8_t*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false;
-    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_exchange((uint8_t*)dst, *(int8_t*)src));
-    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_exchange((int64_t*)dst, *(int64_t*)src));
-    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_exchange((int32_t*)dst, *(int32_t*)src));
-    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_exchange((int8_t*)dst, *(int8_t*)src));
-    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_exchange((int16_t*)dst, *(int16_t*)src));
-    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_exchange((uint64_t*)dst, *(uint64_t*)src));
-    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src));
-    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src));
-    if (bt == jl_char_type)    return jl_box_char(jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src));
+    if (bt == jl_bool_type)    return (1 & jl_atomic_exchange((_Atomic(int8_t)*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false;
+    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(int8_t*)src));
+    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_exchange((_Atomic(int64_t)*)dst, *(int64_t*)src));
+    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_exchange((_Atomic(int32_t)*)dst, *(int32_t*)src));
+    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_exchange((_Atomic(int8_t)*)dst, *(int8_t*)src));
+    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_exchange((_Atomic(int16_t)*)dst, *(int16_t*)src));
+    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_exchange((_Atomic(uint64_t)*)dst, *(uint64_t*)src));
+    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
+    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src));
+    if (bt == jl_char_type)    return jl_box_char(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
 
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt);
     if (nb == 1)
-        *(uint8_t*)v = jl_atomic_exchange((uint8_t*)dst, *(uint8_t*)src);
+        *(uint8_t*)v = jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(uint8_t*)src);
     else if (nb == 2)
-        *(uint16_t*)v = jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src);
+        *(uint16_t*)v = jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src);
     else if (nb <= 4)
-        *(uint32_t*)v = jl_atomic_exchange((uint32_t*)dst, zext_read32(src, nb));
+        *(uint32_t*)v = jl_atomic_exchange((_Atomic(uint32_t)*)dst, zext_read32(src, nb));
 #if MAX_POINTERATOMIC_SIZE >= 8
     else if (nb <= 8)
-        *(uint64_t*)v = jl_atomic_exchange((uint64_t*)dst, zext_read64(src, nb));
+        *(uint64_t*)v = jl_atomic_exchange((_Atomic(uint64_t)*)dst, zext_read64(src, nb));
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
     else if (nb <= 16)
-        *(uint128_t*)v = jl_atomic_exchange((uint128_t*)dst, zext_read128(src, nb));
+        *(jl_uint128_t*)v = jl_atomic_exchange((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb));
 #endif
     else
         abort();
@@ -922,29 +927,29 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect
     }
     else if (nb == 1) {
         uint8_t y = *(uint8_t*)expected;
-        success = jl_atomic_cmpswap((uint8_t*)dst, &y, *(uint8_t*)src);
+        success = jl_atomic_cmpswap((_Atomic(uint8_t)*)dst, &y, *(uint8_t*)src);
     }
     else if (nb == 2) {
         uint16_t y = *(uint16_t*)expected;
-        success = jl_atomic_cmpswap((uint16_t*)dst, &y, *(uint16_t*)src);
+        success = jl_atomic_cmpswap((_Atomic(uint16_t)*)dst, &y, *(uint16_t*)src);
     }
     else if (nb <= 4) {
         uint32_t y = zext_read32(expected, nb);
         uint32_t z = zext_read32(src, nb);
-        success = jl_atomic_cmpswap((uint32_t*)dst, &y, z);
+        success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, &y, z);
     }
 #if MAX_POINTERATOMIC_SIZE >= 8
     else if (nb <= 8) {
         uint64_t y = zext_read64(expected, nb);
         uint64_t z = zext_read64(src, nb);
-        success = jl_atomic_cmpswap((uint64_t*)dst, &y, z);
+        success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, &y, z);
     }
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
     else if (nb <= 16) {
-        uint128_t y = zext_read128(expected, nb);
-        uint128_t z = zext_read128(src, nb);
-        success = jl_atomic_cmpswap((uint128_t*)dst, &y, z);
+        jl_uint128_t y = zext_read128(expected, nb);
+        jl_uint128_t z = zext_read128(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, &y, z);
     }
 #endif
     else {
@@ -953,18 +958,13 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect
     return success;
 }
 
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettyp, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
 {
     // dst must have the required alignment for an atomic of the given size
     // n.b.: this does not spuriously fail if there are padding bits
-    jl_value_t *params[2];
-    params[0] = (jl_value_t*)dt;
-    params[1] = (jl_value_t*)jl_bool_type;
-    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
-    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
-    int isptr = jl_field_isptr(tuptyp, 0);
     jl_task_t *ct = jl_current_task;
-    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : tuptyp->size, isptr ? dt : tuptyp);
+    int isptr = jl_field_isptr(rettyp, 0);
+    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : rettyp->size, isptr ? dt : rettyp);
     int success;
     jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
     if (nb == 0) {
@@ -976,10 +976,10 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
         if (dt == et) {
             *y8 = *(uint8_t*)expected;
             uint8_t z8 = *(uint8_t*)src;
-            success = jl_atomic_cmpswap((uint8_t*)dst, y8, z8);
+            success = jl_atomic_cmpswap((_Atomic(uint8_t)*)dst, y8, z8);
         }
         else {
-            *y8 = jl_atomic_load((uint8_t*)dst);
+            *y8 = jl_atomic_load((_Atomic(uint8_t)*)dst);
             success = 0;
         }
     }
@@ -989,10 +989,10 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
         if (dt == et) {
             *y16 = *(uint16_t*)expected;
             uint16_t z16 = *(uint16_t*)src;
-            success = jl_atomic_cmpswap((uint16_t*)dst, y16, z16);
+            success = jl_atomic_cmpswap((_Atomic(uint16_t)*)dst, y16, z16);
         }
         else {
-            *y16 = jl_atomic_load((uint16_t*)dst);
+            *y16 = jl_atomic_load((_Atomic(uint16_t)*)dst);
             success = 0;
         }
     }
@@ -1002,13 +1002,13 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
             *y32 = zext_read32(expected, nb);
             uint32_t z32 = zext_read32(src, nb);
             while (1) {
-                success = jl_atomic_cmpswap((uint32_t*)dst, y32, z32);
+                success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
                 if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
         else {
-            *y32 = jl_atomic_load((uint32_t*)dst);
+            *y32 = jl_atomic_load((_Atomic(uint32_t)*)dst);
             success = 0;
         }
     }
@@ -1019,31 +1019,31 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
             *y64 = zext_read64(expected, nb);
             uint64_t z64 = zext_read64(src, nb);
             while (1) {
-                success = jl_atomic_cmpswap((uint64_t*)dst, y64, z64);
+                success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
                 if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
         else {
-            *y64 = jl_atomic_load((uint64_t*)dst);
+            *y64 = jl_atomic_load((_Atomic(uint64_t)*)dst);
             success = 0;
         }
     }
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
     else if (nb <= 16) {
-        uint128_t *y128 = (uint128_t*)y;
+        jl_uint128_t *y128 = (jl_uint128_t*)y;
         if (dt == et) {
             *y128 = zext_read128(expected, nb);
-            uint128_t z128 = zext_read128(src, nb);
+            jl_uint128_t z128 = zext_read128(src, nb);
             while (1) {
-                success = jl_atomic_cmpswap((uint128_t*)dst, y128, z128);
+                success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
                 if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
         else {
-            *y128 = jl_atomic_load((uint128_t*)dst);
+            *y128 = jl_atomic_load((_Atomic(jl_uint128_t)*)dst);
             success = 0;
         }
     }
@@ -1053,7 +1053,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
     }
     if (isptr) {
         JL_GC_PUSH1(&y);
-        jl_value_t *z = jl_gc_alloc(ct->ptls, tuptyp->size, tuptyp);
+        jl_value_t *z = jl_gc_alloc(ct->ptls, rettyp->size, rettyp);
         *(jl_value_t**)z = y;
         JL_GC_POP();
         y = z;
@@ -1398,7 +1398,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i)
         jl_bounds_error_int(v, i + 1);
     size_t offs = jl_field_offset(st, i);
     if (jl_field_isptr(st, i)) {
-        return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
+        return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs));
     }
     jl_value_t *ty = jl_field_type_concrete(st, i);
     int isatomic = jl_field_isatomic(st, i);
@@ -1435,7 +1435,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_RO
     assert(i < jl_datatype_nfields(st));
     size_t offs = jl_field_offset(st,i);
     assert(jl_field_isptr(st,i));
-    return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
+    return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs));
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
@@ -1476,7 +1476,7 @@ void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs,
         return;
     }
     if (jl_field_isptr(st, i)) {
-        jl_atomic_store_relaxed((jl_value_t**)((char*)v + offs), rhs);
+        jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
         jl_gc_wb(v, rhs);
     }
     else {
@@ -1526,9 +1526,9 @@ jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_
     jl_value_t *r;
     if (jl_field_isptr(st, i)) {
         if (isatomic)
-            r = jl_atomic_exchange((jl_value_t**)((char*)v + offs), rhs);
+            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
         else
-            r = jl_atomic_exchange_relaxed((jl_value_t**)((char*)v + offs), rhs);
+            r = jl_atomic_exchange_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
         jl_gc_wb(v, rhs);
     }
     else {
@@ -1598,7 +1598,7 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu
         if (!jl_isa(y, ty))
             jl_type_error("modifyfield!", ty, y);
         if (jl_field_isptr(st, i)) {
-            jl_value_t **p = (jl_value_t**)((char*)v + offs);
+            _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
             if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_relaxed(p, &r, y))
                 break;
         }
@@ -1658,8 +1658,11 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu
         args[0] = r;
         jl_gc_safepoint();
     }
-    // args[0] == r (old); args[1] == y (new)
-    args[0] = jl_f_tuple(NULL, args, 2);
+    // args[0] == r (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
     JL_GC_POP();
     return args[0];
 }
@@ -1671,8 +1674,10 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
         jl_type_error("replacefield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
     jl_value_t *r = expected;
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
     if (jl_field_isptr(st, i)) {
-        jl_value_t **p = (jl_value_t**)((char*)v + offs);
+        _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
         int success;
         while (1) {
             success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_relaxed(p, &r, rhs);
@@ -1683,11 +1688,8 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
             if (success || !jl_egal(r, expected))
                 break;
         }
-        jl_value_t **args;
-        JL_GC_PUSHARGS(args, 2);
-        args[0] = r;
-        args[1] = success ? jl_true : jl_false;
-        r = jl_f_tuple(NULL, args, 2);
+        JL_GC_PUSH1(&r);
+        r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
         JL_GC_POP();
     }
     else {
@@ -1695,7 +1697,7 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
         int isunion = jl_is_uniontype(ty);
         int needlock;
         jl_value_t *rty = ty;
-        size_t fsz;
+        size_t fsz = jl_field_size(st, i);
         if (isunion) {
             assert(!isatomic);
             hasptr = 0;
@@ -1708,32 +1710,26 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
             needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
         }
         if (isatomic && !needlock) {
-            r = jl_atomic_cmpswap_bits((jl_datatype_t*)rty, (char*)v + offs, r, rhs, fsz);
+            r = jl_atomic_cmpswap_bits((jl_datatype_t*)ty, rettyp, (char*)v + offs, r, rhs, fsz);
             int success = *((uint8_t*)r + fsz);
             if (success && hasptr)
                 jl_gc_multi_wb(v, rhs); // rhs is immutable
         }
         else {
             jl_task_t *ct = jl_current_task;
-            uint8_t *psel;
+            uint8_t *psel = NULL;
             if (isunion) {
-                size_t fsz = jl_field_size(st, i);
                 psel = &((uint8_t*)v)[offs + fsz - 1];
                 rty = jl_nth_union_component(rty, *psel);
             }
-            jl_value_t *params[2];
-            params[0] = rty;
-            params[1] = (jl_value_t*)jl_bool_type;
-            jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
-            JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
-            assert(!jl_field_isptr(tuptyp, 0));
-            r = jl_gc_alloc(ct->ptls, tuptyp->size, (jl_value_t*)tuptyp);
+            assert(!jl_field_isptr(rettyp, 0));
+            r = jl_gc_alloc(ct->ptls, rettyp->size, (jl_value_t*)rettyp);
             int success = (rty == jl_typeof(expected));
             if (needlock)
                 jl_lock_value(v);
-            size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-            memcpy((char*)r, (char*)v + offs, fsz);
+            memcpy((char*)r, (char*)v + offs, fsz); // copy field, including union bits
             if (success) {
+                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
                 if (((jl_datatype_t*)rty)->layout->haspadding)
                     success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
                 else
@@ -1767,7 +1763,7 @@ JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
 {
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
     size_t offs = jl_field_offset(st, i);
-    jl_value_t **fld = (jl_value_t**)((char*)v + offs);
+    _Atomic(jl_value_t*) *fld = (_Atomic(jl_value_t*)*)((char*)v + offs);
     if (!jl_field_isptr(st, i)) {
         jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(st, i);
         if (!jl_is_datatype(ft) || ft->layout->first_ptr < 0)
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index ad9ed659cbe0d..956559c179985 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -50,17 +50,17 @@ typedef object::SymbolRef SymRef;
 // and cannot have any interaction with the julia runtime
 static uv_rwlock_t threadsafe;
 
-extern "C" void jl_init_debuginfo(void)
+void jl_init_debuginfo(void)
 {
     uv_rwlock_init(&threadsafe);
 }
 
-extern "C" void jl_lock_profile(void)
+extern "C" JL_DLLEXPORT void jl_lock_profile_impl(void)
 {
     uv_rwlock_rdlock(&threadsafe);
 }
 
-extern "C" void jl_unlock_profile(void)
+extern "C" JL_DLLEXPORT void jl_unlock_profile_impl(void)
 {
     uv_rwlock_rdunlock(&threadsafe);
 }
@@ -136,7 +136,7 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
     mod_size = Size;
 #endif
     if (0) {
-        JL_LOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_lock(&jl_in_stackwalk);
         if (mod_size && !SymLoadModuleEx(GetCurrentProcess(), NULL, NULL, NULL, (DWORD64)Section, mod_size, NULL, SLMFLAG_VIRTUAL)) {
             static int warned = 0;
             if (!warned) {
@@ -156,7 +156,7 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
                 jl_printf(JL_STDERR, "WARNING: failed to insert function name %s into debug info: %lu\n", name, GetLastError());
             }
         }
-        JL_UNLOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_unlock(&jl_in_stackwalk);
     }
 #if defined(_CPU_X86_64_)
     jl_profile_atomic([&]() {
@@ -299,8 +299,8 @@ class JuliaJITEventListener: public JITEventListener
 
 #if defined(_OS_WINDOWS_)
         uint64_t SectionAddrCheck = 0;
-        uint64_t SectionLoadCheck = 0;
-        uint64_t SectionWriteCheck = 0;
+        uint64_t SectionLoadCheck = 0; (void)SectionLoadCheck;
+        uint64_t SectionWriteCheck = 0; (void)SectionWriteCheck;
         uint8_t *UnwindData = NULL;
 #if defined(_CPU_X86_64_)
         uint8_t *catchjmp = NULL;
@@ -426,6 +426,7 @@ JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener,
     ((JuliaJITEventListener*)Listener)->_NotifyObjectEmitted(Object, L, memmgr);
 }
 
+// TODO: convert the safe names from aotcomile.cpp:makeSafeName back into symbols
 static std::pair<char *, bool> jl_demangle(const char *name) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables since
@@ -709,8 +710,9 @@ static uint64_t jl_sysimage_base;
 static jl_sysimg_fptrs_t sysimg_fptrs;
 static jl_method_instance_t **sysimg_fvars_linfo;
 static size_t sysimg_fvars_n;
-void jl_register_fptrs(uint64_t sysimage_base, const jl_sysimg_fptrs_t *fptrs,
-                       jl_method_instance_t **linfos, size_t n)
+extern "C" JL_DLLEXPORT
+void jl_register_fptrs_impl(uint64_t sysimage_base, const jl_sysimg_fptrs_t *fptrs,
+    jl_method_instance_t **linfos, size_t n)
 {
     jl_sysimage_base = (uintptr_t)sysimage_base;
     sysimg_fptrs = *fptrs;
@@ -782,10 +784,30 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
             if (needs_name) {
                 if (auto name_or_err = sym_found.getName()) {
                     auto nameref = name_or_err.get();
+                    const char globalPrefix = // == DataLayout::getGlobalPrefix
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
+                        '_';
+#elif defined(_OS_DARWIN_)
+                        '_';
+#else
+                        '\0';
+#endif
+                    if (globalPrefix) {
+                        if (nameref[0] == globalPrefix)
+                          nameref = nameref.drop_front();
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
+                        else if (nameref[0] == '@') // X86_VectorCall
+                          nameref = nameref.drop_front();
+#endif
+                        // else VectorCall, Assembly, Internal, etc.
+                    }
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
+                    nameref = nameref.split('@').first;
+#endif
                     size_t len = nameref.size();
                     *name = (char*)realloc_s(*name, len + 1);
-                    (*name)[len] = 0;
                     memcpy(*name, nameref.data(), len);
+                    (*name)[len] = 0;
                     needs_name = false;
                 }
             }
@@ -802,12 +824,12 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
         PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)frame_info_func;
         pSymbol->SizeOfStruct = sizeof(SYMBOL_INFO);
         pSymbol->MaxNameLen = MAX_SYM_NAME;
-        JL_LOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_lock(&jl_in_stackwalk);
         if (SymFromAddr(GetCurrentProcess(), dwAddress, &dwDisplacement64, pSymbol)) {
             // errors are ignored
             jl_copy_str(name, pSymbol->Name);
         }
-        JL_UNLOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_unlock(&jl_in_stackwalk);
     }
 #endif
 }
@@ -1046,10 +1068,10 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
 #ifdef _OS_WINDOWS_
     IMAGEHLP_MODULE64 ModuleInfo;
     ModuleInfo.SizeOfStruct = sizeof(IMAGEHLP_MODULE64);
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     jl_refresh_dbg_module_list();
     bool isvalid = SymGetModuleInfo64(GetCurrentProcess(), (DWORD64)pointer, &ModuleInfo);
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     if (!isvalid)
         return false;
 
@@ -1122,7 +1144,7 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
 #ifdef _OS_WINDOWS_
     static IMAGEHLP_LINE64 frame_info_line;
     DWORD dwDisplacement = 0;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     DWORD64 dwAddress = pointer;
     frame_info_line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
     if (SymGetLineFromAddr64(GetCurrentProcess(), dwAddress, &dwDisplacement, &frame_info_line)) {
@@ -1132,7 +1154,7 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
             jl_copy_str(&frame0->file_name, frame_info_line.FileName);
         frame0->line = frame_info_line.LineNumber;
     }
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
 #endif
     object::SectionRef Section;
     llvm::DIContext *context = NULL;
@@ -1189,7 +1211,7 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
 }
 
 // Set *name and *filename to either NULL or malloc'd string
-int jl_getFunctionInfo(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
+extern "C" JL_DLLEXPORT int jl_getFunctionInfo_impl(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables if noInline
     // since it can be called from an unmanaged thread on OSX.
@@ -1477,6 +1499,13 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     jl_profile_atomic([&]() {
         __register_frame(Addr);
     });
+
+    // Now first count the number of FDEs
+    size_t nentries = 0;
+    processFDEs((char*)Addr, Size, [&](const char*){ nentries++; });
+    if (nentries == 0)
+        return;
+
     // Our unwinder
     unw_dyn_info_t *di = new unw_dyn_info_t;
     // In a shared library, this is set to the address of the PLT.
@@ -1484,13 +1513,10 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     // not seem to be used on our supported architectures.
     di->gp = 0;
     // I'm not a great fan of the naming of this constant, but it means the
-    // right thing, which is a table of FDEs and ips.
+    // right thing, which is a table of FDEs and IPs.
     di->format = UNW_INFO_FORMAT_IP_OFFSET;
     di->u.rti.name_ptr = 0;
     di->u.rti.segbase = (unw_word_t)Addr;
-    // Now first count the number of FDEs
-    size_t nentries = 0;
-    processFDEs((char*)Addr, Size, [&](const char*){ nentries++; });
 
     uintptr_t start_ip = (uintptr_t)-1;
     uintptr_t end_ip = 0;
@@ -1625,8 +1651,8 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 
 #endif
 
-extern "C"
-uint64_t jl_getUnwindInfo(uint64_t dwAddr)
+extern "C" JL_DLLEXPORT
+uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr)
 {
     // Might be called from unmanaged thread
     uv_rwlock_rdlock(&threadsafe);
diff --git a/src/disasm.cpp b/src/disasm.cpp
index dfc1e32b56eeb..73b394b77d0b2 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -5,11 +5,45 @@
 //
 // Original copyright:
 //
-//                     The LLVM Compiler Infrastructure
+// University of Illinois/NCSA
+// Open Source License
+// Copyright (c) 2003-2016 University of Illinois at Urbana-Champaign.
+// All rights reserved.
 //
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+//  Developed by:
 //
+//    LLVM Team
+//
+//    University of Illinois at Urbana-Champaign
+//
+//    http://llvm.org
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal with
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+//
+//    * Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimers.
+//
+//    * Redistributions in binary form must reproduce the above copyright notice,
+//      this list of conditions and the following disclaimers in the
+//      documentation and/or other materials provided with the distribution.
+//
+//    * Neither the names of the LLVM Team, University of Illinois at
+//      Urbana-Champaign, nor the names of its contributors may be used to
+//      endorse or promote products derived from this Software without specific
+//      prior written permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+// SOFTWARE.
 //===----------------------------------------------------------------------===//
 //
 // This class implements a disassembler of a memory block, given a function
@@ -22,43 +56,53 @@
 #include <string>
 
 #include "llvm-version.h"
-#include <llvm/Object/ObjectFile.h>
-#include <llvm/BinaryFormat/MachO.h>
+
+// for outputting disassembly
+#include <llvm/ADT/Triple.h>
+#include <llvm/AsmParser/Parser.h>
 #include <llvm/BinaryFormat/COFF.h>
-#include <llvm/MC/MCInst.h>
-#include <llvm/MC/MCStreamer.h>
-#include <llvm/MC/MCSubtargetInfo.h>
-#include <llvm/MC/MCObjectFileInfo.h>
-#include <llvm/MC/MCRegisterInfo.h>
-#include <llvm/MC/MCAsmInfo.h>
+#include <llvm/BinaryFormat/MachO.h>
+#include <llvm/DebugInfo/DIContext.h>
+#include <llvm/DebugInfo/DWARF/DWARFContext.h>
+#include <llvm/ExecutionEngine/JITEventListener.h>
+#include <llvm/IR/AssemblyAnnotationWriter.h>
+#include <llvm/IR/DebugInfo.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
 #include <llvm/MC/MCAsmBackend.h>
+#include <llvm/MC/MCAsmInfo.h>
 #include <llvm/MC/MCCodeEmitter.h>
-#include <llvm/MC/MCInstPrinter.h>
-#include <llvm/MC/MCInstrInfo.h>
 #include <llvm/MC/MCContext.h>
+#include <llvm/MC/MCDisassembler/MCDisassembler.h>
+#include <llvm/MC/MCDisassembler/MCExternalSymbolizer.h>
 #include <llvm/MC/MCExpr.h>
+#include <llvm/MC/MCInst.h>
+#include <llvm/MC/MCInstPrinter.h>
 #include <llvm/MC/MCInstrAnalysis.h>
+#include <llvm/MC/MCInstrInfo.h>
+#include <llvm/MC/MCObjectFileInfo.h>
+#include <llvm/MC/MCRegisterInfo.h>
+#include <llvm/MC/MCStreamer.h>
+#include <llvm/MC/MCSubtargetInfo.h>
 #include <llvm/MC/MCSymbol.h>
-#include <llvm/AsmParser/Parser.h>
-#include <llvm/MC/MCDisassembler/MCDisassembler.h>
-#include <llvm/MC/MCDisassembler/MCExternalSymbolizer.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/Object/ObjectFile.h>
+#include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/NativeFormatting.h>
 #include <llvm/Support/SourceMgr.h>
 #include <llvm/Support/TargetRegistry.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/FormattedStream.h>
-#include <llvm/Support/NativeFormatting.h>
-#include <llvm/ExecutionEngine/JITEventListener.h>
-#include <llvm/IR/LLVMContext.h>
-#include <llvm/DebugInfo/DIContext.h>
-#include <llvm/DebugInfo/DWARF/DWARFContext.h>
-#include <llvm/IR/DebugInfo.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/AssemblyAnnotationWriter.h>
+
+// for outputting assembly
+#include <llvm/CodeGen/AsmPrinter.h>
+#include <llvm/CodeGen/AsmPrinterHandler.h>
+#include <llvm/CodeGen/MachineModuleInfo.h>
+#include <llvm/CodeGen/Passes.h>
+#include <llvm/CodeGen/TargetPassConfig.h>
+#include <llvm/Support/CodeGen.h>
 #include <llvm/IR/LegacyPassManager.h>
 
 #include "julia.h"
@@ -185,17 +229,21 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
             // if so, drop all existing calls to it from the top of the context
             // AND check if instead the context was previously printed that way
             // but now has removed the recursive frames
-            StringRef method = StringRef(context.at(nctx - 1).FunctionName).rtrim(';');
+            StringRef method = StringRef(context.at(nctx - 1).FunctionName).rtrim(';'); // last matching frame
             if ((nctx < nframes && StringRef(DI.at(nframes - nctx - 1).FunctionName).rtrim(';') == method) ||
                 (nctx < context.size() && StringRef(context.at(nctx).FunctionName).rtrim(';') == method)) {
                 update_line_only = true;
-                while (nctx > 0 && StringRef(context.at(nctx - 1).FunctionName).rtrim(';') == method) {
+                // transform nctx to exclude the combined frames
+                while (nctx > 0 && StringRef(context.at(nctx - 1).FunctionName).rtrim(';') == method)
                     nctx -= 1;
-                }
             }
         }
-        else if (context.size() > 0) {
-            update_line_only = true;
+        if (!update_line_only && nctx < context.size() && nctx < nframes) {
+            // look at the first non-matching element to see if we are only changing the line number
+            const DILineInfo &CtxLine = context.at(nctx);
+            const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+            if (StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';'))
+                update_line_only = true;
         }
     }
     else if (nctx < context.size() && nctx < nframes) {
@@ -279,20 +327,26 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
 
 // adaptor class for printing line numbers before llvm IR lines
 class LineNumberAnnotatedWriter : public AssemblyAnnotationWriter {
-    DILocation *InstrLoc = nullptr;
-    DILineInfoPrinter LinePrinter{"; ", false};
+    const DILocation *InstrLoc = nullptr;
+    DILineInfoPrinter LinePrinter;
     DenseMap<const Instruction *, DILocation *> DebugLoc;
     DenseMap<const Function *, DISubprogram *> Subprogram;
 public:
-    LineNumberAnnotatedWriter(const char *debuginfo)
-    {
+    LineNumberAnnotatedWriter(const char *LineStart, bool bracket_outer, const char *debuginfo)
+      : LinePrinter(LineStart, bracket_outer) {
         LinePrinter.SetVerbosity(debuginfo);
     }
     virtual void emitFunctionAnnot(const Function *, formatted_raw_ostream &);
     virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &);
+    virtual void emitInstructionAnnot(const DILocation *, formatted_raw_ostream &);
     virtual void emitBasicBlockEndAnnot(const BasicBlock *, formatted_raw_ostream &);
     // virtual void printInfoComment(const Value &, formatted_raw_ostream &) {}
 
+    void emitEnd(formatted_raw_ostream &Out) {
+        LinePrinter.emit_finish(Out);
+        InstrLoc = nullptr;
+    }
+
     void addSubprogram(const Function *F, DISubprogram *SP)
     {
         Subprogram[F] = SP;
@@ -327,12 +381,19 @@ void LineNumberAnnotatedWriter::emitFunctionAnnot(
 void LineNumberAnnotatedWriter::emitInstructionAnnot(
       const Instruction *I, formatted_raw_ostream &Out)
 {
-    DILocation *NewInstrLoc = I->getDebugLoc();
+    const DILocation *NewInstrLoc = I->getDebugLoc();
     if (!NewInstrLoc) {
         auto Loc = DebugLoc.find(I);
         if (Loc != DebugLoc.end())
             NewInstrLoc = Loc->second;
     }
+    emitInstructionAnnot(NewInstrLoc, Out);
+    Out << LinePrinter.inlining_indent(" ");
+}
+
+void LineNumberAnnotatedWriter::emitInstructionAnnot(
+      const DILocation *NewInstrLoc, formatted_raw_ostream &Out)
+{
     if (NewInstrLoc && NewInstrLoc != InstrLoc) {
         InstrLoc = NewInstrLoc;
         std::vector<DILineInfo> DIvec;
@@ -348,14 +409,13 @@ void LineNumberAnnotatedWriter::emitInstructionAnnot(
         } while (NewInstrLoc);
         LinePrinter.emit_lineinfo(Out, DIvec);
     }
-    Out << LinePrinter.inlining_indent(" ");
 }
 
 void LineNumberAnnotatedWriter::emitBasicBlockEndAnnot(
         const BasicBlock *BB, formatted_raw_ostream &Out)
 {
     if (BB == &BB->getParent()->back())
-        LinePrinter.emit_finish(Out);
+        emitEnd(Out);
 }
 
 static void jl_strip_llvm_debug(Module *m, bool all_meta, LineNumberAnnotatedWriter *AAW)
@@ -424,7 +484,7 @@ void jl_strip_llvm_addrspaces(Module *m)
 // print an llvm IR acquired from jl_get_llvmf
 // warning: this takes ownership of, and destroys, f->getParent()
 extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo)
+jl_value_t *jl_dump_function_ir_impl(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo)
 {
     std::string code;
     raw_string_ostream stream(code);
@@ -434,8 +494,8 @@ jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_modul
         if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent()))
             jl_error("jl_dump_function_ir: Expected Function* in a temporary Module");
 
-        JL_LOCK(&codegen_lock); // Might GC
-        LineNumberAnnotatedWriter AAW{debuginfo};
+        JL_LOCK(&jl_codegen_lock); // Might GC
+        LineNumberAnnotatedWriter AAW{"; ", false, debuginfo};
         if (!llvmf->getParent()) {
             // print the function declaration as-is
             llvmf->print(stream, &AAW);
@@ -458,7 +518,7 @@ jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_modul
             }
             delete m;
         }
-        JL_UNLOCK(&codegen_lock); // Might GC
+        JL_UNLOCK(&jl_codegen_lock); // Might GC
     }
 
     return jl_pchar_to_string(stream.str().data(), stream.str().size());
@@ -507,7 +567,7 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
 
 // print a native disassembly for the function starting at fptr
 extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo, char binary)
+jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
 {
     assert(fptr != 0);
     std::string code;
@@ -801,8 +861,16 @@ static void jl_dump_asm_internal(
     assert(MRI && "Unable to create target register info!");
 
     std::unique_ptr<MCObjectFileInfo> MOFI(new MCObjectFileInfo());
+#if JL_LLVM_VERSION >= 130000
+    MCSubtargetInfo *MSTI = TheTarget->createMCSubtargetInfo(TheTriple.str(), cpu, features);
+    assert(MSTI && "Unable to create subtarget info!");
+
+    MCContext Ctx(TheTriple, MAI.get(), MRI.get(), MSTI, &SrcMgr);
+    MOFI->initMCObjectFileInfo(Ctx, /* PIC */ false, /* LargeCodeModel */ false);
+#else
     MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr);
     MOFI->InitMCObjectFileInfo(TheTriple, /* PIC */ false, Ctx);
+#endif
 
     // Set up Subtarget and Disassembler
     std::unique_ptr<MCSubtargetInfo>
@@ -1046,8 +1114,140 @@ static void jl_dump_asm_internal(
     }
 }
 
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *
+addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
+    TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+    PassConfig->setDisableVerify(false);
+    PM.add(PassConfig);
+    MachineModuleInfoWrapperPass *MMIWP =
+        new MachineModuleInfoWrapperPass(TM);
+    PM.add(MMIWP);
+    if (PassConfig->addISelPasses())
+        return NULL;
+    PassConfig->addMachinePasses();
+    PassConfig->setInitialized();
+    return &MMIWP->getMMI().getContext();
+}
+
+class LineNumberPrinterHandler : public AsmPrinterHandler {
+    MCStreamer &S;
+    LineNumberAnnotatedWriter LinePrinter;
+    std::string Buffer;
+    llvm::raw_string_ostream RawStream;
+    llvm::formatted_raw_ostream Stream;
+
+public:
+    LineNumberPrinterHandler(AsmPrinter &Printer, const char *debuginfo)
+        : S(*Printer.OutStreamer),
+          LinePrinter("; ", true, debuginfo),
+          RawStream(Buffer),
+          Stream(RawStream) {}
+
+    void emitAndReset() {
+        Stream.flush();
+        RawStream.flush();
+        if (Buffer.empty())
+            return;
+        S.emitRawText(Buffer);
+        Buffer.clear();
+    }
+
+    virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+    //virtual void beginModule(Module *M) override {}
+    virtual void endModule() override {}
+    /// note that some AsmPrinter implementations may not call beginFunction at all
+    virtual void beginFunction(const MachineFunction *MF) override {
+        LinePrinter.emitFunctionAnnot(&MF->getFunction(), Stream);
+        emitAndReset();
+    }
+    //virtual void markFunctionEnd() override {}
+    virtual void endFunction(const MachineFunction *MF) override {
+        LinePrinter.emitEnd(Stream);
+        emitAndReset();
+    }
+    //virtual void beginFragment(const MachineBasicBlock *MBB,
+    //                           ExceptionSymbolProvider ESP) override {}
+    //virtual void endFragment() override {}
+    //virtual void beginFunclet(const MachineBasicBlock &MBB,
+    //                          MCSymbol *Sym = nullptr) override {}
+    //virtual void endFunclet() override {}
+    virtual void beginInstruction(const MachineInstr *MI) override {
+        LinePrinter.emitInstructionAnnot(MI->getDebugLoc(), Stream);
+        emitAndReset();
+    }
+    virtual void endInstruction() override {}
+};
+
+// get a native assembly for llvm::Function
+extern "C" JL_DLLEXPORT
+jl_value_t *jl_dump_function_asm_impl(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
+{
+    // precise printing via IR assembler
+    SmallVector<char, 4096> ObjBufferSV;
+    { // scope block
+        Function *f = (Function*)F;
+        llvm::raw_svector_ostream asmfile(ObjBufferSV);
+        assert(!f->isDeclaration());
+        std::unique_ptr<Module> m(f->getParent());
+        for (auto &f2 : m->functions()) {
+            if (f != &f2 && !f->isDeclaration())
+                f2.deleteBody();
+        }
+        LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(jl_TargetMachine);
+        legacy::PassManager PM;
+        addTargetPasses(&PM, TM);
+        if (raw_mc) {
+            raw_svector_ostream obj_OS(ObjBufferSV);
+            if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
+                return jl_an_empty_string;
+            PM.run(*m);
+        }
+        else {
+            MCContext *Context = addPassesToGenerateCode(TM, PM);
+            if (!Context)
+                return jl_an_empty_string;
+            Context->setGenDwarfForAssembly(false);
+            // Duplicate LLVMTargetMachine::addAsmPrinter here so we can set the asm dialect and add the custom annotation printer
+            const MCSubtargetInfo &STI = *TM->getMCSubtargetInfo();
+            const MCAsmInfo &MAI = *TM->getMCAsmInfo();
+            const MCRegisterInfo &MRI = *TM->getMCRegisterInfo();
+            const MCInstrInfo &MII = *TM->getMCInstrInfo();
+            unsigned OutputAsmDialect = MAI.getAssemblerDialect();
+            if (!strcmp(asm_variant, "att"))
+                OutputAsmDialect = 0;
+            if (!strcmp(asm_variant, "intel"))
+                OutputAsmDialect = 1;
+            MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
+                TM->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
+             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
+                STI, MRI, TM->Options.MCOptions));
+            std::unique_ptr<MCCodeEmitter> MCE;
+            if (binary) // enable MCAsmStreamer::AddEncodingComment printing
+                MCE.reset(TM->getTarget().createMCCodeEmitter(MII, MRI, *Context));
+            auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
+            std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
+                *Context, std::move(FOut), true,
+                true, InstPrinter,
+                std::move(MCE), std::move(MAB),
+                false));
+            std::unique_ptr<AsmPrinter> Printer(
+                TM->getTarget().createAsmPrinter(*TM, std::move(S)));
+            Printer->addAsmPrinterHandler(AsmPrinter::HandlerInfo(
+                        std::unique_ptr<AsmPrinterHandler>(new LineNumberPrinterHandler(*Printer, debuginfo)),
+                        "emit", "Debug Info Emission", "Julia", "Julia::LineNumberPrinterHandler Markup"));
+            if (!Printer)
+                return jl_an_empty_string;
+            PM.add(Printer.release());
+            PM.add(createFreeMachineFunctionPass());
+            PM.run(*m);
+        }
+    }
+    return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
+}
+
 extern "C" JL_DLLEXPORT
-LLVMDisasmContextRef jl_LLVMCreateDisasm(
+LLVMDisasmContextRef jl_LLVMCreateDisasm_impl(
         const char *TripleName, void *DisInfo, int TagType,
         LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp)
 {
@@ -1055,7 +1255,7 @@ LLVMDisasmContextRef jl_LLVMCreateDisasm(
 }
 
 extern "C" JL_DLLEXPORT
-JL_DLLEXPORT size_t jl_LLVMDisasmInstruction(
+JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_impl(
         LLVMDisasmContextRef DC, uint8_t *Bytes, uint64_t BytesSize,
         uint64_t PC, char *OutString, size_t OutStringSize)
 {
diff --git a/src/dlload.c b/src/dlload.c
index df03d9d8e900f..33afe62acad90 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -9,7 +9,6 @@
 #include "julia.h"
 #include "julia_internal.h"
 #ifdef _OS_WINDOWS_
-#include <windows.h>
 #include <direct.h>
 #else
 #include <unistd.h>
@@ -58,20 +57,10 @@ static int endswith_extension(const char *path) JL_NOTSAFEPOINT
 }
 
 #ifdef _OS_WINDOWS_
-#ifdef _MSC_VER
-#if (_MSC_VER >= 1930) || (_MSC_VER < 1800)
-#error This version of MSVC has not been tested.
-#elif _MSC_VER >= 1900 // VC++ 2015 / 2017 / 2019
-#define CRTDLL_BASENAME "vcruntime140"
-#elif _MSC_VER >= 1800 // VC++ 2013
-#define CRTDLL_BASENAME "msvcr120"
-#endif
-#else
 #define CRTDLL_BASENAME "msvcrt"
-#endif
 
-const char jl_crtdll_basename[] = CRTDLL_BASENAME;
-const char jl_crtdll_name[] = CRTDLL_BASENAME ".dll";
+JL_DLLEXPORT const char *jl_crtdll_basename = CRTDLL_BASENAME;
+const char *jl_crtdll_name = CRTDLL_BASENAME ".dll";
 
 #undef CRTDLL_BASENAME
 #endif
@@ -130,7 +119,7 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
 #ifdef RTLD_NOLOAD
                   | JL_RTLD(flags, NOLOAD)
 #endif
-#if defined(RTLD_DEEPBIND) && !(defined(JL_ASAN_ENABLED) || defined(JL_TSAN_ENABLED) || defined(JL_MSAN_ENABLED))
+#if defined(RTLD_DEEPBIND) && !(defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_))
                   | JL_RTLD(flags, DEEPBIND)
 #endif
 #ifdef RTLD_FIRST
@@ -166,6 +155,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     uv_stat_t stbuf;
     void *handle;
     int abspath;
+    int is_atpath;
     // number of extensions to try — if modname already ends with the
     // standard extension, then we don't try adding additional extensions
     int n_extensions = endswith_extension(modname) ? 1 : N_EXTENSIONS;
@@ -190,19 +180,33 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
         goto done;
     }
 
-    abspath = isabspath(modname);
+    abspath = jl_isabspath(modname);
+    is_atpath = 0;
+
+    // Detect if our `modname` is something like `@rpath/libfoo.dylib`
+#ifdef _OS_DARWIN_
+    size_t nameLen = strlen(modname);
+    const char *const atPaths[] = {"@executable_path/", "@loader_path/", "@rpath/"};
+    for (i = 0; i < sizeof(atPaths)/sizeof(char*); ++i) {
+        size_t atLen = strlen(atPaths[i]);
+        if (nameLen >= atLen && 0 == strncmp(modname, atPaths[i], atLen)) {
+            is_atpath = 1;
+        }
+    }
+#endif
 
     /*
       this branch permutes all base paths in DL_LOAD_PATH with all extensions
       note: skip when !jl_base_module to avoid UndefVarError(:DL_LOAD_PATH),
             and also skip for absolute paths
+            and also skip for `@`-paths on macOS
       We also do simple string replacement here for elements starting with `@executable_path/`.
       While these exist as OS concepts on Darwin, we want to use them on other platforms
       such as Windows, so we emulate them here.
     */
-    if (!abspath && jl_base_module != NULL) {
+    if (!abspath && !is_atpath && jl_base_module != NULL) {
         jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"));
-        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? b->value : NULL);
+        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_atomic_load_relaxed(&b->value) : NULL);
         if (DL_LOAD_PATH != NULL) {
             size_t j;
             for (j = 0; j < jl_array_len(DL_LOAD_PATH); j++) {
@@ -309,7 +313,7 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
         char err[256];
         win32_formatmessage(GetLastError(), err, sizeof(err));
 #endif
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
         // Hide the error throwing from the analyser since there isn't a way to express
         // "safepoint only when throwing error" currently.
         jl_errorf("could not load symbol \"%s\":\n%s", symbol, err);
@@ -320,7 +324,7 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
 
 #ifdef _OS_WINDOWS_
 //Look for symbols in win32 libraries
-const char *jl_dlfind_win32(const char *f_name)
+JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
 {
     void * dummy;
     if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0))
diff --git a/src/dump.c b/src/dump.c
index 49fa6efa431cd..53b9bc3f0a719 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -16,11 +16,7 @@
 #include <dlfcn.h>
 #endif
 
-#ifndef _COMPILER_MICROSOFT_
 #include "valgrind.h"
-#else
-#define RUNNING_ON_VALGRIND 0
-#endif
 #include "julia_assert.h"
 
 #ifdef __cplusplus
@@ -46,6 +42,7 @@ static jl_value_t *deser_symbols[256];
 static htable_t backref_table;
 static int backref_table_numel;
 static arraylist_t backref_list;
+static htable_t new_code_instance_validate;
 
 // list of (jl_value_t **loc, size_t pos) entries
 // for anything that was flagged by the deserializer for later
@@ -349,13 +346,13 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
             jl_serialize_value(s, (jl_value_t*)table[i]);
             jl_binding_t *b = (jl_binding_t*)table[i+1];
             jl_serialize_value(s, b->name);
-            jl_value_t *e = b->value;
+            jl_value_t *e = jl_atomic_load_relaxed(&b->value);
             if (!b->constp && e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL)
                 // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                 jl_serialize_cnull(s, jl_typeof(e));
             else
                 jl_serialize_value(s, e);
-            jl_serialize_value(s, b->globalref);
+            jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref));
             jl_serialize_value(s, b->owner);
             write_int8(s->s, (b->deprecated<<3) | (b->constp<<2) | (b->exportp<<1) | (b->imported));
         }
@@ -659,7 +656,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         if (!(serialization_mode & METHOD_INTERNAL))
             return;
         jl_serialize_value(s, m->specializations);
-        jl_serialize_value(s, m->speckeyset);
+        jl_serialize_value(s, jl_atomic_load_relaxed(&m->speckeyset));
         jl_serialize_value(s, (jl_value_t*)m->name);
         jl_serialize_value(s, (jl_value_t*)m->file);
         write_int32(s->s, m->line);
@@ -670,7 +667,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         write_int8(s->s, m->isva);
         write_int8(s->s, m->pure);
         write_int8(s->s, m->is_for_opaque_closure);
-        write_int8(s->s, m->aggressive_constprop);
+        write_int8(s->s, m->constprop);
         jl_serialize_value(s, (jl_value_t*)m->slot_syms);
         jl_serialize_value(s, (jl_value_t*)m->roots);
         jl_serialize_value(s, (jl_value_t*)m->ccallable);
@@ -1054,7 +1051,7 @@ static void jl_collect_backedges(jl_array_t *s, jl_array_t *t)
                         size_t min_valid = 0;
                         size_t max_valid = ~(size_t)0;
                         int ambig = 0;
-                        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
+                        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
                         if (matches == jl_false) {
                             valid = 0;
                             break;
@@ -1170,7 +1167,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *
     jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
-    ct->world_age = jl_world_counter;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
     jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
     ct->world_age = last_age;
 
@@ -1221,7 +1218,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *
             if (toplevel && prefs_hash_func && get_compiletime_prefs_func) {
                 // Temporary invoke in newest world age
                 size_t last_age = ct->world_age;
-                ct->world_age = jl_world_counter;
+                ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
 
                 // call get_compiletime_prefs(__toplevel__)
                 jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL};
@@ -1509,13 +1506,14 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
     }
     m->specializations = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->specializations);
     jl_gc_wb(m, m->specializations);
-    m->speckeyset = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->speckeyset);
-    jl_gc_wb(m, m->speckeyset);
+    jl_array_t *speckeyset = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->speckeyset);
+    jl_atomic_store_relaxed(&m->speckeyset, speckeyset);
+    jl_gc_wb(m, speckeyset);
     m->name = (jl_sym_t*)jl_deserialize_value(s, NULL);
     jl_gc_wb(m, m->name);
     m->file = (jl_sym_t*)jl_deserialize_value(s, NULL);
     m->line = read_int32(s->s);
-    m->primary_world = jl_world_counter;
+    m->primary_world = jl_atomic_load_acquire(&jl_world_counter);
     m->deleted_world = ~(size_t)0;
     m->called = read_int32(s->s);
     m->nargs = read_int32(s->s);
@@ -1524,7 +1522,7 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
     m->isva = read_int8(s->s);
     m->pure = read_int8(s->s);
     m->is_for_opaque_closure = read_int8(s->s);
-    m->aggressive_constprop = read_int8(s->s);
+    m->constprop = read_int8(s->s);
     m->slot_syms = jl_deserialize_value(s, (jl_value_t**)&m->slot_syms);
     jl_gc_wb(m, m->slot_syms);
     m->roots = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->roots);
@@ -1616,8 +1614,10 @@ static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl
         codeinst->precompile = 1;
     codeinst->next = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->next);
     jl_gc_wb(codeinst, codeinst->next);
-    if (validate)
-        codeinst->min_world = jl_world_counter;
+    if (validate) {
+        codeinst->min_world = jl_atomic_load_acquire(&jl_world_counter);
+        ptrhash_put(&new_code_instance_validate, codeinst, (void*)(~(uintptr_t)HT_NOTFOUND));   // "HT_FOUND"
+    }
     return (jl_value_t*)codeinst;
 }
 
@@ -1647,10 +1647,12 @@ static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DIS
             break;
         jl_binding_t *b = jl_get_binding_wr(m, asname, 1);
         b->name = (jl_sym_t*)jl_deserialize_value(s, (jl_value_t**)&b->name);
-        b->value = jl_deserialize_value(s, &b->value);
-        if (b->value != NULL) jl_gc_wb(m, b->value);
-        b->globalref = jl_deserialize_value(s, &b->globalref);
-        if (b->globalref != NULL) jl_gc_wb(m, b->globalref);
+        jl_value_t *bvalue = jl_deserialize_value(s, (jl_value_t**)&b->value);
+        *(jl_value_t**)&b->value = bvalue;
+        if (bvalue != NULL) jl_gc_wb(m, bvalue);
+        jl_value_t *bglobalref = jl_deserialize_value(s, (jl_value_t**)&b->globalref);
+        *(jl_value_t**)&b->globalref = bglobalref;
+        if (bglobalref != NULL) jl_gc_wb(m, bglobalref);
         b->owner = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&b->owner);
         if (b->owner != NULL) jl_gc_wb(m, b->owner);
         int8_t flags = read_int8(s->s);
@@ -1676,7 +1678,7 @@ static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DIS
     m->optlevel = read_int8(s->s);
     m->compile = read_int8(s->s);
     m->infer = read_int8(s->s);
-    m->primary_world = jl_world_counter;
+    m->primary_world = jl_atomic_load_acquire(&jl_world_counter);
     return (jl_value_t*)m;
 }
 
@@ -1726,7 +1728,7 @@ static void jl_deserialize_struct(jl_serializer_state *s, jl_value_t *v) JL_GC_D
         if (entry->max_world == ~(size_t)0) {
             if (entry->min_world > 1) {
                 // update world validity to reflect current state of the counter
-                entry->min_world = jl_world_counter;
+                entry->min_world = jl_atomic_load_acquire(&jl_world_counter);
             }
         }
         else {
@@ -1963,6 +1965,8 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
     size_t i, l = jl_array_len(targets) / 2;
     jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
     memset(jl_array_data(valids), 1, l);
+    jl_value_t *loctag = NULL;
+    JL_GC_PUSH1(&loctag);
     *pvalids = valids;
     for (i = 0; i < l; i++) {
         jl_value_t *callee = jl_array_ptr_ref(targets, i * 2);
@@ -1981,7 +1985,7 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
         size_t max_valid = ~(size_t)0;
         int ambig = 0;
         // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
-        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
+        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
         if (matches == jl_false || jl_array_len(matches) != jl_array_len(expected)) {
             valid = 0;
         }
@@ -2004,7 +2008,13 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
             }
         }
         jl_array_uint8_set(valids, i, valid);
+        if (!valid && _jl_debug_method_invalidation) {
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee);
+            loctag = jl_cstr_to_string("insert_backedges_callee");
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+        }
     }
+    JL_GC_POP();
 }
 
 static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
@@ -2018,7 +2028,7 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
     for (i = 0; i < l; i += 2) {
         jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
         assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        assert(caller->def.method->primary_world == jl_world_counter); // caller should be new
+        assert(caller->def.method->primary_world == jl_atomic_load_acquire(&jl_world_counter)); // caller should be new
         jl_array_t *idxs_array = (jl_array_t*)jl_array_ptr_ref(list, i + 1);
         assert(jl_isa((jl_value_t*)idxs_array, jl_array_int32_type));
         int32_t *idxs = (int32_t*)jl_array_data(idxs_array);
@@ -2047,10 +2057,16 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
             while (codeinst) {
                 if (codeinst->min_world > 0)
                     codeinst->max_world = ~(size_t)0;
+                ptrhash_remove(&new_code_instance_validate, codeinst);  // mark it as handled
                 codeinst = jl_atomic_load_relaxed(&codeinst->next);
             }
         }
         else {
+            jl_code_instance_t *codeinst = caller->cache;
+            while (codeinst) {
+                ptrhash_remove(&new_code_instance_validate, codeinst);  // should be left invalid
+                codeinst = jl_atomic_load_relaxed(&codeinst->next);
+            }
             if (_jl_debug_method_invalidation) {
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
                 loctag = jl_cstr_to_string("insert_backedges");
@@ -2061,6 +2077,15 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
     JL_GC_POP();
 }
 
+static void validate_new_code_instances(void)
+{
+    size_t i;
+    for (i = 0; i < new_code_instance_validate.size; i += 2) {
+        if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) {
+            ((jl_code_instance_t*)new_code_instance_validate.table[i])->max_world = ~(size_t)0;
+        }
+    }
+}
 
 static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *mod_list)
 {
@@ -2203,8 +2228,6 @@ static jl_array_t *jl_finalize_deserializer(jl_serializer_state *s, arraylist_t
 
 JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)
 {
-    if (!init_order)
-        return;
     int i, l = jl_array_len(init_order);
     for (i = 0; i < l; i++) {
         jl_value_t *mod = jl_array_ptr_ref(init_order, i);
@@ -2623,11 +2646,12 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
     // prepare to deserialize
     int en = jl_gc_enable(0);
     jl_gc_enable_finalizers(ct, 0);
-    ++jl_world_counter; // reserve a world age for the deserialization
+    jl_atomic_fetch_add(&jl_world_counter, 1); // reserve a world age for the deserialization
 
     arraylist_new(&backref_list, 4000);
     arraylist_push(&backref_list, jl_main_module);
     arraylist_new(&flagref_list, 0);
+    htable_new(&new_code_instance_validate, 0);
     arraylist_new(&ccallable_list, 0);
     htable_new(&uniquing_table, 0);
 
@@ -2657,13 +2681,20 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
     jl_recache_other(); // make all of the other objects identities correct (needs to be after insert methods)
     htable_free(&uniquing_table);
     jl_array_t *init_order = jl_finalize_deserializer(&s, tracee_list); // done with f and s (needs to be after recache)
+    if (init_order == NULL)
+        init_order = (jl_array_t*)jl_an_empty_vec_any;
+    assert(jl_isa((jl_value_t*)init_order, jl_array_any_type));
 
     JL_GC_PUSH4(&init_order, &restored, &external_backedges, &external_edges);
     jl_gc_enable(en); // subtyping can allocate a lot, not valid before recache-other
 
     jl_insert_backedges((jl_array_t*)external_backedges, (jl_array_t*)external_edges); // restore external backedges (needs to be last)
 
+    // check new CodeInstances and validate any that lack external backedges
+    validate_new_code_instances();
+
     serializer_worklist = NULL;
+    htable_free(&new_code_instance_validate);
     arraylist_free(&flagref_list);
     arraylist_free(&backref_list);
     ios_close(f);
@@ -2719,7 +2750,7 @@ void jl_init_serializer(void)
     htable_new(&backref_table, 0);
 
     void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type,
-                     call_sym, invoke_sym, goto_ifnot_sym, return_sym, jl_symbol("tuple"),
+                     jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"),
                      jl_an_empty_string, jl_an_empty_vec_any,
 
                      // empirical list of very common symbols
diff --git a/src/features_x86.h b/src/features_x86.h
index ad6a5eb1e515a..3ef71fb217db6 100644
--- a/src/features_x86.h
+++ b/src/features_x86.h
@@ -33,7 +33,7 @@ JL_FEATURE_DEF(bmi, 32 * 2 + 3, 0)
 // JL_FEATURE_DEF(hle, 32 * 2 + 4, 0) // Not used and gone in LLVM 5.0
 JL_FEATURE_DEF(avx2, 32 * 2 + 5, 0)
 JL_FEATURE_DEF(bmi2, 32 * 2 + 8, 0)
-// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Priviledged instruction
+// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Privileged instruction
 JL_FEATURE_DEF(rtm, 32 * 2 + 11, 0)
 // JL_FEATURE_DEF(mpx, 32 * 2 + 14, 0) // Deprecated in LLVM 10.0
 JL_FEATURE_DEF(avx512f, 32 * 2 + 16, 0)
diff --git a/src/flisp/LICENSE b/src/flisp/LICENSE
new file mode 100644
index 0000000000000..34860f4ba63d4
--- /dev/null
+++ b/src/flisp/LICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 2009 Jeff Bezanson
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to
+      endorse or promote products derived from this software without specific
+      prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/src/flisp/Makefile b/src/flisp/Makefile
index 50566a0258386..7a363b0ec13d7 100644
--- a/src/flisp/Makefile
+++ b/src/flisp/Makefile
@@ -50,13 +50,11 @@ endif
 FLAGS := -I$(LLTSRCDIR) $(JCFLAGS) $(HFILEDIRS:%=-I%) \
         -I$(LIBUV_INC) -I$(UTF8PROC_INC) -I$(build_includedir) $(LIBDIRS:%=-L%) \
         -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
-ifneq ($(USEMSVC), 1)
 ifneq ($(OS), emscripten)
 FLAGS += -DUSE_COMPUTED_GOTO
 endif
 FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
 FLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
-endif
 
 DEBUGFLAGS += $(FLAGS)
 SHIPFLAGS += $(FLAGS)
@@ -75,8 +73,8 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(DEBUGFLAGS) -c $< -o $@)
 
-FLISP_SRCS := $(flisp.c cvalues.c types.c flisp.h print.c read.c equal.c:%=$(SRCDIR)/%)
-FLMAIN_SRCS := $(flmain.c flisp.h:%=$(SRCDIR)/%)
+FLISP_SRCS := $(addprefix $(SRCDIR)/,flisp.c cvalues.c types.c flisp.h print.c read.c equal.c)
+FLMAIN_SRCS := $(addprefix $(SRCDIR)/,flmain.c flisp.h)
 $(BUILDDIR)/flisp.o: $(FLISP_SRCS)
 $(BUILDDIR)/flisp.dbg.obj: $(FLISP_SRCS)
 $(BUILDDIR)/flmain.o: $(FLMAIN_SRCS)
@@ -95,11 +93,7 @@ $(BUILDDIR)/$(LIBTARGET).a: $(OBJS) | $(BUILDDIR)
 	rm -rf $@
 	@$(call PRINT_LINK, $(AR) -rcs $@ $(OBJS))
 
-ifneq ($(USEMSVC), 1)
 CCLD := $(CC)
-else
-CCLD := $(LD)
-endif
 
 $(BUILDDIR)/$(EXENAME)-debug$(EXE): $(DOBJS) $(LIBFILES_debug) $(BUILDDIR)/$(LIBTARGET)-debug.a $(BUILDDIR)/flmain.dbg.obj | $(BUILDDIR)/flisp.boot
 	@$(call PRINT_LINK, $(CCLD) $(DEBUGFLAGS) $(JLDFLAGS) $(DOBJS) $(BUILDDIR)/flmain.dbg.obj -o $@ $(BUILDDIR)/$(LIBTARGET)-debug.a $(LIBFILES_debug) $(LIBS) $(OSLIBS))
@@ -130,9 +124,7 @@ endif
 endif
 
 test:
-ifneq ($(USEMSVC), 1)
 	$(call spawn,./$(EXENAME)$(EXE)) unittest.lsp
-endif
 
 clean:
 	rm -f $(BUILDDIR)/*.o
diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h
index 233c3340d0e48..209a4f2d4fcdb 100644
--- a/src/flisp/flisp.h
+++ b/src/flisp/flisp.h
@@ -6,6 +6,14 @@
 
 #include "platform.h"
 #include "libsupport.h"
+#include "utils.h"
+#include "bitvector.h"
+#include "timefuncs.h"
+#include "strtod.h"
+#include "dirpath.h"
+#include "hashing.h"
+#include "ptrhash.h"
+#include "htable.h"
 #include "uv.h"
 
 //#define MEMDEBUG
@@ -204,13 +212,13 @@ typedef struct _ectx_t {
         for(l__ca=1; l__ca; l__ca=0, fl_restorestate(fl_ctx, &_ctx))
 
 #if defined(_OS_WINDOWS_)
-__declspec(noreturn) void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...);
+__declspec(noreturn) void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...) JL_NOTSAFEPOINT;
 __declspec(noreturn) void lerror(fl_context_t *fl_ctx, value_t e, const char *msg) JL_NOTSAFEPOINT;
 __declspec(noreturn) void fl_raise(fl_context_t *fl_ctx, value_t e);
 __declspec(noreturn) void type_error(fl_context_t *fl_ctx, const char *fname, const char *expected, value_t got);
 __declspec(noreturn) void bounds_error(fl_context_t *fl_ctx, const char *fname, value_t arr, value_t ind);
 #else
-void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...) __attribute__ ((__noreturn__));
+void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...) __attribute__ ((__noreturn__)) JL_NOTSAFEPOINT;
 void lerror(fl_context_t *fl_ctx, value_t e, const char *msg) __attribute__((__noreturn__)) JL_NOTSAFEPOINT;
 void fl_raise(fl_context_t *fl_ctx, value_t e) __attribute__ ((__noreturn__));
 void type_error(fl_context_t *fl_ctx, const char *fname, const char *expected, value_t got) __attribute__ ((__noreturn__));
@@ -336,10 +344,10 @@ value_t cvalue_static_cstrn(fl_context_t *fl_ctx, const char *str, size_t n);
 value_t cvalue_static_cstring(fl_context_t *fl_ctx, const char *str);
 value_t string_from_cstr(fl_context_t *fl_ctx, char *str);
 value_t string_from_cstrn(fl_context_t *fl_ctx, char *str, size_t n);
-int fl_isstring(fl_context_t *fl_ctx, value_t v);
-int fl_isnumber(fl_context_t *fl_ctx, value_t v);
-int fl_isgensym(fl_context_t *fl_ctx, value_t v);
-int fl_isiostream(fl_context_t *fl_ctx, value_t v);
+int fl_isstring(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
+int fl_isnumber(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
+int fl_isgensym(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
+int fl_isiostream(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
 ios_t *fl_toiostream(fl_context_t *fl_ctx, value_t v, const char *fname);
 value_t cvalue_compare(value_t a, value_t b);
 int numeric_compare(fl_context_t *fl_ctx, value_t a, value_t b, int eq, int eqnans, char *fname);
@@ -500,7 +508,7 @@ struct _fl_context_t {
     void *jlbuf;
 };
 
-static inline void argcount(fl_context_t *fl_ctx, const char *fname, uint32_t nargs, uint32_t c)
+static inline void argcount(fl_context_t *fl_ctx, const char *fname, uint32_t nargs, uint32_t c) JL_NOTSAFEPOINT
 {
     if (__unlikely(nargs != c))
         lerrorf(fl_ctx, fl_ctx->ArgError,"%s: too %s arguments", fname, nargs<c ? "few":"many");
diff --git a/src/flisp/iostream.c b/src/flisp/iostream.c
index eee28910853c5..e1d4a0543ad73 100644
--- a/src/flisp/iostream.c
+++ b/src/flisp/iostream.c
@@ -164,20 +164,6 @@ value_t fl_ioputc(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     return fixnum(ios_pututf8(s, wc));
 }
 
-value_t fl_ioungetc(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
-{
-    argcount(fl_ctx, "io.ungetc", nargs, 2);
-    ios_t *s = toiostream(fl_ctx, args[0], "io.ungetc");
-    if (!iscprim(args[1]) || ((cprim_t*)ptr(args[1]))->type != fl_ctx->wchartype)
-        type_error(fl_ctx, "io.ungetc", "wchar", args[1]);
-    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[1]));
-    if (wc >= 0x80) {
-        lerror(fl_ctx, fl_ctx->ArgError, "io_ungetc: unicode not yet supported");
-    }
-    s->u_colno -= utf8proc_charwidth(wc);
-    return fixnum(ios_ungetc((int)wc,s));
-}
-
 value_t fl_ioflush(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     argcount(fl_ctx, "io.flush", nargs, 1);
@@ -235,6 +221,17 @@ value_t fl_ioseek(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     return fl_ctx->T;
 }
 
+value_t fl_ioskip(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+{
+    argcount(fl_ctx, "io.skip", nargs, 2);
+    ios_t *s = toiostream(fl_ctx, args[0], "io.skip");
+    int64_t pos = (ssize_t)tosize(fl_ctx, args[1], "io.skip");
+    int64_t res = ios_skip(s, pos);
+    if (res < 0)
+        return fl_ctx->F;
+    return fl_ctx->T;
+}
+
 value_t fl_iopos(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     argcount(fl_ctx, "io.pos", nargs, 1);
@@ -428,9 +425,9 @@ static const builtinspec_t iostreamfunc_info[] = {
     { "io.close", fl_ioclose },
     { "io.eof?" , fl_ioeof },
     { "io.seek" , fl_ioseek },
+    { "io.skip" , fl_ioskip },
     { "io.pos",   fl_iopos },
     { "io.getc" , fl_iogetc },
-    { "io.ungetc", fl_ioungetc },
     { "io.putc" , fl_ioputc },
     { "io.peekc" , fl_iopeekc },
     { "io.discardbuffer", fl_iopurge },
diff --git a/src/flisp/julia_charmap.h b/src/flisp/julia_charmap.h
index 59f408ce012c9..3c54eaf98f484 100644
--- a/src/flisp/julia_charmap.h
+++ b/src/flisp/julia_charmap.h
@@ -1,6 +1,9 @@
 /* Array of {original codepoint, replacement codepoint} normalizations
    to perform on Julia identifiers, to canonicalize characters that
-   are both easily confused and easily inputted by accident. */
+   are both easily confused and easily inputted by accident.
+
+   Important: when this table is updated, also update the corresponding table
+              in base/strings/unicode.jl */
 static const uint32_t charmap[][2] = {
     { 0x025B, 0x03B5 }, // latin small letter open e -> greek small letter epsilon
     { 0x00B5, 0x03BC }, // micro sign -> greek small letter mu
diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c
index dbe94e1388069..9fcd3e9789af4 100644
--- a/src/flisp/julia_extensions.c
+++ b/src/flisp/julia_extensions.c
@@ -82,9 +82,10 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
               wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
               wc == 0x22a4 || wc == 0x22a5 ||   // ⊤ ⊥
 
-              (wc >= 0x2202 && wc <= 0x2233 &&
+              (wc >= 0x2200 && wc <= 0x2233 &&
                (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆
                 wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏
+                wc == 0x2200 || wc == 0x2203 || wc == 0x2204 || // ∀, ∃, ∄
                 wc == 0x2210 || wc == 0x2211 || // ∐, ∑
                 wc == 0x221e || wc == 0x221f || // ∞, ∟
                 wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳
@@ -327,22 +328,22 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg
     ios_t *s = fl_toiostream(fl_ctx, args[1], "accum-julia-symbol");
     if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
         type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]);
-    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
+    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); // peek the first character we'll read
     ios_t str;
     int allascii = 1;
     ios_mem(&str, 0);
     do {
-        allascii &= (wc <= 0x7f);
         ios_getutf8(s, &wc);
         if (wc == '!') {
             uint32_t nwc = 0;
             ios_peekutf8(s, &nwc);
             // make sure != is always an operator
             if (nwc == '=') {
-                ios_ungetc('!', s);
+                ios_skip(s, -1);
                 break;
             }
         }
+        allascii &= (wc <= 0x7f);
         ios_pututf8(&str, wc);
         if (safe_peekutf8(fl_ctx, s, &wc) == IOS_EOF)
             break;
diff --git a/src/flisp/print.c b/src/flisp/print.c
index 789a42133c6b7..2b20d0d98b225 100644
--- a/src/flisp/print.c
+++ b/src/flisp/print.c
@@ -643,10 +643,10 @@ static void cvalue_printdata(fl_context_t *fl_ctx, ios_t *f, void *data,
             if (init == 0) {
 #if defined(RTLD_SELF)
                 jl_static_print = (size_t (*)(ios_t*, void*))
-                    (uintptr_t)dlsym(RTLD_SELF, "jl_static_show");
+                    (uintptr_t)dlsym(RTLD_SELF, "ijl_static_show");
 #elif defined(RTLD_DEFAULT)
                 jl_static_print = (size_t (*)(ios_t*, void*))
-                    (uintptr_t)dlsym(RTLD_DEFAULT, "jl_static_show");
+                    (uintptr_t)dlsym(RTLD_DEFAULT, "ijl_static_show");
 #elif defined(_OS_WINDOWS_)
                 HMODULE handle;
                 if (GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
@@ -654,7 +654,7 @@ static void cvalue_printdata(fl_context_t *fl_ctx, ios_t *f, void *data,
                                        (LPCWSTR)(&cvalue_printdata),
                                        &handle)) {
                     jl_static_print = (size_t (*)(ios_t*, void*))
-                        (uintptr_t)GetProcAddress(handle, "jl_static_show");
+                        (uintptr_t)GetProcAddress(handle, "ijl_static_show");
                 }
 #endif
                 init = 1;
diff --git a/src/flisp/read.c b/src/flisp/read.c
index 494303ef9add7..9a480e0536c7a 100644
--- a/src/flisp/read.c
+++ b/src/flisp/read.c
@@ -164,23 +164,25 @@ static void accumchar(fl_context_t *fl_ctx, char c, int *pi)
 // return: 1 if escaped (forced to be symbol)
 static int read_token(fl_context_t *fl_ctx, char c, int digits)
 {
-    int i=0, ch, escaped=0, issym=0, first=1;
+    int i=0, ch, escaped=0, issym=0, nc=0;
 
     while (1) {
-        if (!first) {
-            ch = ios_getc(readF(fl_ctx));
+        if (nc != 0) {
+            if (nc != 1)
+                (void)ios_getc(readF(fl_ctx)); // consume ch
+            ch = ios_peekc(readF(fl_ctx));
             if (ch == IOS_EOF)
                 goto terminate;
             c = (char)ch;
         }
-        first = 0;
         if (c == '|') {
             issym = 1;
             escaped = !escaped;
         }
         else if (c == '\\') {
             issym = 1;
-            ch = ios_getc(readF(fl_ctx));
+            (void)ios_getc(readF(fl_ctx)); // consume '\'
+            ch = ios_peekc(readF(fl_ctx));
             if (ch == IOS_EOF)
                 goto terminate;
             accumchar(fl_ctx, (char)ch, &i);
@@ -191,8 +193,10 @@ static int read_token(fl_context_t *fl_ctx, char c, int digits)
         else {
             accumchar(fl_ctx, c, &i);
         }
+        nc++;
     }
-    ios_ungetc(c, readF(fl_ctx));
+    if (nc == 0)
+        ios_skip(readF(fl_ctx), -1); // rewind stream for the caller, to prepare for throwing an error
  terminate:
     fl_ctx->readbuf[i++] = '\0';
     return issym;
@@ -376,7 +380,7 @@ static uint32_t peek(fl_context_t *fl_ctx)
     }
     else if (c == ',') {
         fl_ctx->readtoktype = TOK_COMMA;
-        ch = ios_getc(readF(fl_ctx));
+        ch = ios_peekc(readF(fl_ctx));
         if (ch == IOS_EOF)
             return fl_ctx->readtoktype;
         if ((char)ch == '@')
@@ -384,7 +388,8 @@ static uint32_t peek(fl_context_t *fl_ctx)
         else if ((char)ch == '.')
             fl_ctx->readtoktype = TOK_COMMADOT;
         else
-            ios_ungetc((char)ch, readF(fl_ctx));
+            return fl_ctx->readtoktype;
+        (void)ios_getc(readF(fl_ctx)); // consume ch
     }
     else {
         if (!read_token(fl_ctx, c, 0)) {
@@ -486,13 +491,15 @@ static value_t read_string(fl_context_t *fl_ctx)
                 free(buf);
                 lerror(fl_ctx, fl_ctx->ParseError, "read: end of input in escape sequence");
             }
-            j=0;
+            j = 0;
             if (octal_digit(c)) {
-                do {
+                while (1) {
                     eseq[j++] = c;
-                    c = ios_getc(readF(fl_ctx));
-                } while (octal_digit(c) && j<3 && (c!=IOS_EOF));
-                if (c!=IOS_EOF) ios_ungetc(c, readF(fl_ctx));
+                    c = ios_peekc(readF(fl_ctx));
+                    if (c == IOS_EOF || !octal_digit(c) || j >= 3)
+                        break;
+                    (void)ios_getc(readF(fl_ctx)); // consume c
+                }
                 eseq[j] = '\0';
                 wc = strtol(eseq, NULL, 8);
                 // \DDD and \xXX read bytes, not characters
@@ -501,12 +508,13 @@ static value_t read_string(fl_context_t *fl_ctx)
             else if ((c=='x' && (ndig=2)) ||
                      (c=='u' && (ndig=4)) ||
                      (c=='U' && (ndig=8))) {
-                c = ios_getc(readF(fl_ctx));
-                while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
+                while (1) {
+                    c = ios_peekc(readF(fl_ctx));
+                    if (c == IOS_EOF || !hex_digit(c) || j >= ndig)
+                        break;
                     eseq[j++] = c;
-                    c = ios_getc(readF(fl_ctx));
+                    (void)ios_getc(readF(fl_ctx)); // consume c
                 }
-                if (c!=IOS_EOF) ios_ungetc(c, readF(fl_ctx));
                 eseq[j] = '\0';
                 if (j) wc = strtol(eseq, NULL, 16);
                 if (!j || wc > 0x10ffff) {
diff --git a/src/gc-debug.c b/src/gc-debug.c
index 3a0e4bf78598b..8d2fcf67a75af 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -282,8 +282,8 @@ void gc_verify(jl_ptls_t ptls)
     }
     restore();
     gc_verify_track(ptls);
-    gc_debug_print_status();
-    gc_debug_critical_error();
+    jl_gc_debug_print_status();
+    jl_gc_debug_critical_error();
     abort();
 }
 #endif
@@ -315,7 +315,6 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
         char *cur_page = gc_page_data((char*)halfpages - 1);
         if (cur_page == data) {
             lim = (char*)halfpages - 1;
-            break;
         }
     }
     // compute the freelist_map
@@ -497,12 +496,12 @@ int gc_debug_check_pool(void)
     return gc_debug_alloc_check(&jl_gc_debug_env.pool);
 }
 
-int gc_debug_check_other(void)
+int jl_gc_debug_check_other(void)
 {
     return gc_debug_alloc_check(&jl_gc_debug_env.other);
 }
 
-void gc_debug_print_status(void)
+void jl_gc_debug_print_status(void)
 {
     uint64_t pool_count = jl_gc_debug_env.pool.num;
     uint64_t other_count = jl_gc_debug_env.other.num;
@@ -511,9 +510,9 @@ void gc_debug_print_status(void)
                    pool_count + other_count, pool_count, other_count, gc_num.pause);
 }
 
-void gc_debug_critical_error(void)
+void jl_gc_debug_critical_error(void)
 {
-    gc_debug_print_status();
+    jl_gc_debug_print_status();
     if (!jl_gc_debug_env.wait_for_debugger)
         return;
     jl_safe_printf("Waiting for debugger to attach\n");
@@ -522,11 +521,11 @@ void gc_debug_critical_error(void)
     }
 }
 
-void gc_debug_print(void)
+void jl_gc_debug_print(void)
 {
     if (!gc_debug_alloc_check(&jl_gc_debug_env.print))
         return;
-    gc_debug_print_status();
+    jl_gc_debug_print_status();
 }
 
 // a list of tasks for conservative stack scan during gc_scrub
@@ -583,7 +582,7 @@ static void gc_scrub_task(jl_task_t *ta)
 
     char *low;
     char *high;
-    if (ta->copy_stack && ptls2 && ta == ptls2->current_task) {
+    if (ta->copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         low  = (char*)ptls2->stackbase - ptls2->stacksize;
         high = (char*)ptls2->stackbase;
     }
@@ -594,7 +593,7 @@ static void gc_scrub_task(jl_task_t *ta)
     else
         return;
 
-    if (ptls == ptls2 && ptls2 && ta == ptls2->current_task) {
+    if (ptls == ptls2 && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         // scan up to current `sp` for current thread and task
         low = (char*)jl_get_frame_addr();
     }
@@ -608,11 +607,11 @@ void gc_scrub(void)
     jl_gc_debug_tasks.len = 0;
 }
 #else
-void gc_debug_critical_error(void)
+void jl_gc_debug_critical_error(void)
 {
 }
 
-void gc_debug_print_status(void)
+void jl_gc_debug_print_status(void)
 {
     // May not be accurate but should be helpful enough
     uint64_t pool_count = gc_num.poolalloc;
@@ -980,7 +979,7 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
 }
 #endif
 
-void gc_debug_init(void)
+void jl_gc_debug_init(void)
 {
 #ifdef GC_DEBUG_ENV
     char *env = getenv("JULIA_GC_NO_GENERATIONAL");
diff --git a/src/gc-pages.c b/src/gc-pages.c
index 29b3534a5ba9d..a4ebe0315d71e 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -82,7 +82,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
             block_pg_cnt = pg_cnt = min_block_pg_alloc;
         }
         else {
-            JL_UNLOCK_NOGC(&gc_perm_lock);
+            uv_mutex_unlock(&gc_perm_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -159,7 +159,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
                GC_PAGE_SZ * pg_cnt - LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size));
 #endif
         if (pg == 0) {
-            JL_UNLOCK_NOGC(&gc_perm_lock);
+            uv_mutex_unlock(&gc_perm_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -171,7 +171,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
 NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
 {
     struct jl_gc_metadata_ext info;
-    JL_LOCK_NOGC(&gc_perm_lock);
+    uv_mutex_lock(&gc_perm_lock);
 
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
@@ -255,7 +255,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
     errno = last_errno;
     current_pg_count++;
     gc_final_count_page(current_pg_count);
-    JL_UNLOCK_NOGC(&gc_perm_lock);
+    uv_mutex_unlock(&gc_perm_lock);
     return info.meta;
 }
 
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index 3708531e9b405..b7adf254026ca 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -23,7 +23,7 @@
 #define MIN_STACK_MAPPINGS_PER_POOL 5
 
 const size_t jl_guard_size = (4096 * 8);
-static uint32_t num_stack_mappings = 0;
+static _Atomic(uint32_t) num_stack_mappings = 0;
 
 #ifdef _OS_WINDOWS_
 #define MAP_FAILED NULL
@@ -233,7 +233,7 @@ void sweep_stack_pools(void)
                     t->stkbuf = NULL;
                     _jl_free_stack(ptls2, stkbuf, bufsz);
                 }
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
                 if (t->ctx.tsan_state) {
                     __tsan_destroy_fiber(t->ctx.tsan_state);
                     t->ctx.tsan_state = NULL;
diff --git a/src/gc.c b/src/gc.c
index 3734ec40d0795..577ac5839eb87 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -128,11 +128,11 @@ STATIC_INLINE void import_gc_state(jl_ptls_t ptls, jl_gc_mark_sp_t *sp) {
 // is going to realloc the buffer (of its own list) or accessing the
 // list of another thread
 static jl_mutex_t finalizers_lock;
-static jl_mutex_t gc_cache_lock;
+static uv_mutex_t gc_cache_lock;
 
 // Flag that tells us whether we need to support conservative marking
 // of objects.
-static int support_conservative_marking = 0;
+static _Atomic(int) support_conservative_marking = 0;
 
 /**
  * Note about GC synchronization:
@@ -166,7 +166,7 @@ static int support_conservative_marking = 0;
  * finalizers in unmanaged (GC safe) mode.
  */
 
-jl_gc_num_t gc_num = {0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+jl_gc_num_t gc_num = {0};
 static size_t last_long_collect_interval;
 
 pagetable_t memory_map;
@@ -181,7 +181,7 @@ bigval_t *big_objects_marked = NULL;
 // `to_finalize` should not have tagged pointers.
 arraylist_t finalizer_list_marked;
 arraylist_t to_finalize;
-int jl_gc_have_pending_finalizers = 0;
+JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
 
 NOINLINE uintptr_t gc_get_stack_ptr(void)
 {
@@ -262,7 +262,9 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
 {
     arraylist_push(&to_finalize, o);
     arraylist_push(&to_finalize, f);
-    jl_gc_have_pending_finalizers = 1;
+    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
+    // release our lock, and that will have a release barrier by then
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
 }
 
 static void run_finalizer(jl_task_t *ct, jl_value_t *o, jl_value_t *ff)
@@ -274,7 +276,7 @@ static void run_finalizer(jl_task_t *ct, jl_value_t *o, jl_value_t *ff)
     jl_value_t *args[2] = {ff,o};
     JL_TRY {
         size_t last_age = ct->world_age;
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_apply(args, 2);
         ct->world_age = last_age;
     }
@@ -298,7 +300,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
     // This way, the mutation should not conflict with the owning thread,
     // which only writes to locations later than `len`
     // and will not resize the buffer without acquiring the lock.
-    size_t len = need_sync ? jl_atomic_load_acquire(&list->len) : list->len;
+    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
     size_t oldlen = len;
     void **items = list->items;
     size_t j = 0;
@@ -331,7 +333,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
         // The `memset` (like any other content mutation) has to be done
         // **before** the `cmpxchg` which publishes the length.
         memset(&items[len], 0, (oldlen - len) * sizeof(void*));
-        jl_atomic_cmpswap(&list->len, &oldlen, len);
+        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
     }
     else {
         list->len = len;
@@ -388,7 +390,7 @@ static void run_finalizers(jl_task_t *ct)
     if (to_finalize.items == to_finalize._space) {
         copied_list.items = copied_list._space;
     }
-    jl_gc_have_pending_finalizers = 0;
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
     arraylist_new(&to_finalize, 0);
     // This releases the finalizers lock.
     jl_gc_run_finalizers_in_list(ct, &copied_list);
@@ -453,7 +455,7 @@ JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
         return;
     }
     ptls->finalizers_inhibited = new_val;
-    if (jl_gc_have_pending_finalizers) {
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
         jl_gc_run_pending_finalizers(ct);
     }
 }
@@ -484,7 +486,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct)
 
 static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
 {
-    assert(ptls->gc_state == 0);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
     arraylist_t *a = &ptls->finalizers;
     // This acquire load and the release store at the end are used to
     // synchronize with `finalize_object` on another thread. Apart from the GC,
@@ -493,7 +495,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
     // (only one thread since it needs to acquire the finalizer lock).
     // Similar to `finalize_object`, all content mutation has to be done
     // between the acquire and the release of the length.
-    size_t oldlen = jl_atomic_load_acquire(&a->len);
+    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
     if (__unlikely(oldlen + 2 > a->max)) {
         JL_LOCK_NOGC(&finalizers_lock);
         // `a->len` might have been modified.
@@ -507,7 +509,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
     void **items = a->items;
     items[oldlen] = v;
     items[oldlen + 1] = f;
-    jl_atomic_store_release(&a->len, oldlen + 2);
+    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
 }
 
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
@@ -537,7 +539,7 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
     // still holding a reference to the object
     for (int i = 0; i < jl_n_threads; i++) {
         jl_ptls_t ptls2 = jl_all_tls_states[i];
-        finalize_object(&ptls2->finalizers, o, &copied_list, ct->tid != i);
+        finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
     }
     finalize_object(&finalizer_list_marked, o, &copied_list, 0);
     if (copied_list.len > 0) {
@@ -683,9 +685,9 @@ static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) J
 
 static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    JL_LOCK_NOGC(&gc_cache_lock);
+    uv_mutex_lock(&gc_cache_lock);
     gc_sync_cache_nolock(ptls, &ptls->gc_cache);
-    JL_UNLOCK_NOGC(&gc_cache_lock);
+    uv_mutex_unlock(&gc_cache_lock);
 }
 
 // No other threads can be running marking at the same time
@@ -738,7 +740,7 @@ STATIC_INLINE int gc_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode,
         assert((tag & 0x3) == mark_mode);
     }
     *bits = mark_mode;
-    tag = jl_atomic_exchange_relaxed(&o->header, tag);
+    tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag);
     verify_val(jl_valueof(o));
     return !gc_marked(tag);
 }
@@ -781,7 +783,8 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
     jl_assume(page);
     if (mark_mode == GC_OLD_MARKED) {
         ptls->gc_cache.perm_scanned_bytes += page->osize;
-        jl_atomic_fetch_add_relaxed(&page->nold, 1);
+        static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), "");
+        jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1);
     }
     else {
         ptls->gc_cache.scanned_bytes += page->osize;
@@ -790,7 +793,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
             char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET;
             int obj_id = (((char*)o) - page_begin) / page->osize;
             uint8_t *ages = page->ages + obj_id / 8;
-            jl_atomic_fetch_and_relaxed(ages, ~(1 << (obj_id % 8)));
+            jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8)));
         }
     }
     objprofile_count(jl_typeof(jl_valueof(o)),
@@ -877,7 +880,7 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT
 
 static inline void maybe_collect(jl_ptls_t ptls)
 {
-    if (ptls->gc_num.allocd >= 0 || gc_debug_check_other()) {
+    if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) {
         jl_gc_collect(JL_GC_AUTO);
     }
     else {
@@ -956,8 +959,10 @@ JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
         jl_throw(jl_memory_exception);
     gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t,
         gc_cblist_notify_external_alloc, (v, allocsz));
-    ptls->gc_num.allocd += allocsz;
-    ptls->gc_num.bigalloc++;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
@@ -1050,7 +1055,8 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 {
     jl_ptls_t ptls = jl_current_task->ptls;
-    ptls->gc_num.allocd += sz;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
 }
 
 static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
@@ -1074,8 +1080,8 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
     for (int i = 0; i < jl_n_threads; i++) {
         jl_ptls_t ptls = jl_all_tls_states[i];
         if (ptls) {
-            memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t));
-            ptls->gc_num.allocd = -(int64_t)gc_num.interval;
+            memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
+            jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
         }
     }
 }
@@ -1198,13 +1204,15 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
     // to workaround a llvm bug.
     // Ref https://llvm.org/bugs/show_bug.cgi?id=27190
     jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + pool_offset);
-    assert(ptls->gc_state == 0);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
 #ifdef MEMDEBUG
     return jl_gc_big_alloc(ptls, osize);
 #endif
     maybe_collect(ptls);
-    ptls->gc_num.allocd += osize;
-    ptls->gc_num.poolalloc++;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_num.poolalloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1);
     // first try to use the freelist
     jl_taggedvalue_t *v = p->freelist;
     if (v) {
@@ -1644,9 +1652,9 @@ JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t
                                                   jl_gc_mark_sp_t sp)
 {
     jl_safe_printf("GC error (probable corruption) :\n");
-    gc_debug_print_status();
+    jl_gc_debug_print_status();
     jl_(vt);
-    gc_debug_critical_error();
+    jl_gc_debug_critical_error();
     gc_mark_loop_unwind(ptls, sp, 0);
     abort();
 }
@@ -1656,20 +1664,18 @@ JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t
 // See the call to `gc_mark_loop` in init with a `NULL` `ptls`.
 void *gc_mark_label_addrs[_GC_MARK_L_MAX];
 
-// Double the mark stack (both pc and data) with the lock held.
+// Double the local mark stack (both pc and data)
 static void NOINLINE gc_mark_stack_resize(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) JL_NOTSAFEPOINT
 {
     jl_gc_mark_data_t *old_data = gc_cache->data_stack;
     void **pc_stack = sp->pc_start;
     size_t stack_size = (char*)sp->pc_end - (char*)pc_stack;
-    JL_LOCK_NOGC(&gc_cache->stack_lock);
     gc_cache->data_stack = (jl_gc_mark_data_t *)realloc_s(old_data, stack_size * 2 * sizeof(jl_gc_mark_data_t));
     sp->data = (jl_gc_mark_data_t *)(((char*)sp->data) + (((char*)gc_cache->data_stack) - ((char*)old_data)));
 
     sp->pc_start = gc_cache->pc_stack = (void**)realloc_s(pc_stack, stack_size * 2 * sizeof(void*));
     gc_cache->pc_stack_end = sp->pc_end = sp->pc_start + stack_size * 2;
-    sp->pc += sp->pc_start - pc_stack;
-    JL_UNLOCK_NOGC(&gc_cache->stack_lock);
+    sp->pc = sp->pc_start + (sp->pc - pc_stack);
 }
 
 // Push a work item to the stack. The type of the work item is marked with `pc`.
@@ -2398,8 +2404,8 @@ module_binding: {
             void *vb = jl_astaggedvalue(b);
             verify_parent1("module", binding->parent, &vb, "binding_buff");
             (void)vb;
-            jl_value_t *value = b->value;
-            jl_value_t *globalref = b->globalref;
+            jl_value_t *value = jl_atomic_load_relaxed(&b->value);
+            jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref);
             if (value) {
                 verify_parent2("module", binding->parent,
                                &b->value, "binding(%s)", jl_symbol_name(b->name));
@@ -2564,6 +2570,8 @@ mark: {
             if (a->data == NULL || jl_array_len(a) == 0)
                 goto pop;
             if (flags.ptrarray) {
+                if ((jl_datatype_t*)jl_tparam0(vt) == jl_symbol_type)
+                    goto pop;
                 size_t l = jl_array_len(a);
                 uintptr_t nptr = (l << 2) | (bits & GC_OLD);
                 objary_begin = (jl_value_t**)a->data;
@@ -2637,15 +2645,16 @@ mark: {
                 objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
             jl_task_t *ta = (jl_task_t*)new_obj;
             gc_scrub_record_task(ta);
-            void *stkbuf = ta->stkbuf;
             if (gc_cblist_task_scanner) {
                 export_gc_state(ptls, &sp);
+                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                 gc_invoke_callbacks(jl_gc_cb_task_scanner_t,
                     gc_cblist_task_scanner,
-                    (ta, ta->tid != -1 && ta == jl_all_tls_states[ta->tid]->root_task));
+                    (ta, tid != -1 && ta == jl_all_tls_states[tid]->root_task));
                 import_gc_state(ptls, &sp);
             }
 #ifdef COPY_STACKS
+            void *stkbuf = ta->stkbuf;
             if (stkbuf && ta->copy_stack)
                 gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
 #endif
@@ -2656,8 +2665,9 @@ mark: {
             uintptr_t ub = (uintptr_t)-1;
 #ifdef COPY_STACKS
             if (stkbuf && ta->copy_stack && ta->ptls == NULL) {
-                assert(ta->tid >= 0);
-                jl_ptls_t ptls2 = jl_all_tls_states[ta->tid];
+                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
+                assert(tid >= 0);
+                jl_ptls_t ptls2 = jl_all_tls_states[tid];
                 ub = (uintptr_t)ptls2->stackbase;
                 lb = ub - ta->copy_stack;
                 offset = (uintptr_t)stkbuf - lb;
@@ -2767,7 +2777,7 @@ mark: {
 static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
                                      jl_ptls_t ptls2)
 {
-    gc_mark_queue_obj(gc_cache, sp, ptls2->current_task);
+    gc_mark_queue_obj(gc_cache, sp, jl_atomic_load_relaxed(&ptls2->current_task));
     gc_mark_queue_obj(gc_cache, sp, ptls2->root_task);
     if (ptls2->next_task)
         gc_mark_queue_obj(gc_cache, sp, ptls2->next_task);
@@ -2778,6 +2788,7 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp
 }
 
 void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp);
+extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
 
 // mark the initial root set
 static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
@@ -2799,9 +2810,11 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
         }
     }
     gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type);
-    for (size_t i = 0; i < N_CALL_CACHE; i++)
-        if (call_cache[i])
-            gc_mark_queue_obj(gc_cache, sp, call_cache[i]);
+    for (size_t i = 0; i < N_CALL_CACHE; i++) {
+        jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
+        if (v != NULL)
+            gc_mark_queue_obj(gc_cache, sp, v);
+    }
     if (jl_all_methods != NULL)
         gc_mark_queue_obj(gc_cache, sp, jl_all_methods);
     if (_jl_debug_method_invalidation != NULL)
@@ -2809,6 +2822,8 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
 
     // constants
     gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
+    if (cmpswap_names != NULL)
+        gc_mark_queue_obj(gc_cache, sp, cmpswap_names);
 }
 
 // find unmarked objects that need to be finalized from the finalizer list "list".
@@ -2855,7 +2870,7 @@ static void sweep_finalizer_list(arraylist_t *list)
 }
 
 // collector entry point and control
-static volatile uint32_t jl_gc_disable_counter = 1;
+static _Atomic(uint32_t) jl_gc_disable_counter = 1;
 
 JL_DLLEXPORT int jl_gc_enable(int on)
 {
@@ -2966,8 +2981,8 @@ static void jl_gc_queue_remset(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp
         jl_binding_t *ptr = (jl_binding_t*)items[i];
         // A null pointer can happen here when the binding is cleaned up
         // as an exception is thrown after it was already queued (#10221)
-        if (!ptr->value) continue;
-        if (gc_mark_queue_obj(gc_cache, sp, ptr->value)) {
+        jl_value_t *v = jl_atomic_load_relaxed(&ptr->value);
+        if (v != NULL && gc_mark_queue_obj(gc_cache, sp, v)) {
             items[n_bnd_refyoung] = ptr;
             n_bnd_refyoung++;
         }
@@ -3002,6 +3017,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
     uint64_t t0 = jl_hrtime();
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
+    JL_PROBE_GC_MARK_BEGIN();
 
     // 1. fix GC bits of objects in the remset.
     for (int t_i = 0; t_i < jl_n_threads; t_i++)
@@ -3028,6 +3044,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_mark_loop(ptls, sp);
     gc_mark_sp_init(gc_cache, &sp);
     gc_num.since_sweep += gc_num.allocd;
+    JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes);
     gc_settime_premark_end();
     gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes);
     int64_t actual_allocd = gc_num.since_sweep;
@@ -3058,7 +3075,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_mark_sp_init(gc_cache, &sp);
     // Conservative marking relies on age to tell allocated objects
     // and freelist entries apart.
-    mark_reset_age = !support_conservative_marking;
+    mark_reset_age = !jl_gc_conservative_gc_support_enabled();
     // Reset the age and old bit for any unmarked objects referenced by the
     // `to_finalize` list. These objects are only reachable from this list
     // and should not be referenced by any old objects so this won't break
@@ -3085,13 +3102,11 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
     // 5. next collection decision
-    int not_freed_enough = estimate_freed < (7*(actual_allocd/10));
+    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
     int nptr = 0;
     for (int i = 0;i < jl_n_threads;i++)
         nptr += jl_all_tls_states[i]->heap.remset_nptr;
     int large_frontier = nptr*sizeof(void*) >= default_collect_interval; // many pointers in the intergen frontier => "quick" mark is not quick
-    int sweep_full;
-    int recollect = 0;
     // trigger a full collection if the number of live bytes doubles since the last full
     // collection and then remains at least that high for a while.
     if (grown_heap_age == 0) {
@@ -3101,38 +3116,51 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     else if (live_bytes >= last_live_bytes) {
         grown_heap_age++;
     }
-    if (collection == JL_GC_INCREMENTAL) {
-        sweep_full = 0;
-    } else if ((collection == JL_GC_FULL || large_frontier ||
+    int sweep_full = 0;
+    int recollect = 0;
+    if ((large_frontier ||
          ((not_freed_enough || promoted_bytes >= gc_num.interval) &&
           (promoted_bytes >= default_collect_interval || prev_sweep_full)) ||
-         grown_heap_age > 1) &&
-        gc_num.pause > 1) {
-        recollect = (collection == JL_GC_FULL);
-        if (large_frontier)
-            gc_num.interval = last_long_collect_interval;
-        if (not_freed_enough || large_frontier) {
-            if (gc_num.interval <= 2*(max_collect_interval/5)) {
-                gc_num.interval = 5 * (gc_num.interval / 2);
+         grown_heap_age > 1) && gc_num.pause > 1) {
+        sweep_full = 1;
+    }
+    // update heuristics only if this GC was automatically triggered
+    if (collection == JL_GC_AUTO) {
+        if (sweep_full) {
+            if (large_frontier)
+                gc_num.interval = last_long_collect_interval;
+            if (not_freed_enough || large_frontier) {
+                if (gc_num.interval <= 2*(max_collect_interval/5)) {
+                    gc_num.interval = 5 * (gc_num.interval / 2);
+                }
             }
+            last_long_collect_interval = gc_num.interval;
+        }
+        else {
+            // reset interval to default, or at least half of live_bytes
+            int64_t half = live_bytes/2;
+            if (default_collect_interval < half && half <= max_collect_interval)
+                gc_num.interval = half;
+            else
+                gc_num.interval = default_collect_interval;
         }
-        last_long_collect_interval = gc_num.interval;
+    }
+    if (gc_sweep_always_full) {
         sweep_full = 1;
-        promoted_bytes = 0;
     }
-    else {
-        // reset interval to default, or at least half of live_bytes
-        int64_t half = live_bytes/2;
-        if (default_collect_interval < half && half <= max_collect_interval)
-            gc_num.interval = half;
-        else
-            gc_num.interval = default_collect_interval;
-        sweep_full = gc_sweep_always_full;
+    if (collection == JL_GC_FULL) {
+        sweep_full = 1;
+        recollect = 1;
     }
-    if (sweep_full)
+    if (sweep_full) {
+        // these are the difference between the number of gc-perm bytes scanned
+        // on the first collection after sweep_full, and the current scan
         perm_scanned_bytes = 0;
+        promoted_bytes = 0;
+    }
     scanned_bytes = 0;
     // 5. start sweeping
+    JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
     sweep_weak_refs();
     sweep_stack_pools();
     gc_sweep_foreign_objs();
@@ -3142,6 +3170,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_sweep_pool(sweep_full);
     if (sweep_full)
         gc_sweep_perm_alloc();
+    JL_PROBE_GC_SWEEP_END();
     // sweeping is over
     // 6. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
@@ -3199,17 +3228,20 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
 JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 {
+    JL_PROBE_GC_BEGIN(collection);
+
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    if (jl_gc_disable_counter) {
-        size_t localbytes = ptls->gc_num.allocd + gc_num.interval;
-        ptls->gc_num.allocd = -(int64_t)gc_num.interval;
-        jl_atomic_add_fetch(&gc_num.deferred_alloc, localbytes);
+    if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
+        jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
         return;
     }
-    gc_debug_print();
+    jl_gc_debug_print();
 
-    int8_t old_state = ptls->gc_state;
+    int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
     jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
     // `jl_safepoint_start_gc()` makes sure only one thread can
     // run the GC.
@@ -3228,10 +3260,12 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // TODO (concurrently queue objects)
     // no-op for non-threading
     jl_gc_wait_for_the_world();
+    JL_PROBE_GC_STOP_THE_WORLD();
+
     gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
         gc_cblist_pre_gc, (collection));
 
-    if (!jl_gc_disable_counter) {
+    if (!jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
         JL_LOCK_NOGC(&finalizers_lock);
         if (_jl_gc_collect(ptls, collection)) {
             // recollect
@@ -3245,6 +3279,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // no-op for non-threading
     jl_safepoint_end_gc();
     jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+    JL_PROBE_GC_END();
 
     // Only disable finalizers on current thread
     // Doing this on all threads is racy (it's impossible to check
@@ -3255,6 +3290,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
         run_finalizers(ct);
         ptls->in_finalizer = was_in_finalizer;
     }
+    JL_PROBE_GC_FINALIZER();
+
     gc_invoke_callbacks(jl_gc_cb_post_gc_t,
         gc_cblist_post_gc, (collection));
 #ifdef _OS_WINDOWS_
@@ -3307,22 +3344,25 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
     gc_cache->nbig_obj = 0;
-    JL_MUTEX_INIT(&gc_cache->stack_lock);
     size_t init_size = 1024;
     gc_cache->pc_stack = (void**)malloc_s(init_size * sizeof(void*));
     gc_cache->pc_stack_end = gc_cache->pc_stack + init_size;
     gc_cache->data_stack = (jl_gc_mark_data_t *)malloc_s(init_size * sizeof(jl_gc_mark_data_t));
 
-    memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t));
+    memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
     assert(gc_num.interval == default_collect_interval);
-    ptls->gc_num.allocd = -(int64_t)gc_num.interval;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
 }
 
 // System-wide initializations
 void jl_gc_init(void)
 {
+    JL_MUTEX_INIT(&finalizers_lock);
+    uv_mutex_init(&gc_cache_lock);
+    uv_mutex_init(&gc_perm_lock);
+
     jl_gc_init_page();
-    gc_debug_init();
+    jl_gc_debug_init();
 
     arraylist_new(&finalizer_list_marked, 0);
     arraylist_new(&to_finalize, 0);
@@ -3360,8 +3400,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
     if (pgcstack && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        ptls->gc_num.allocd += sz;
-        ptls->gc_num.malloc++;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
     }
     return malloc(sz);
 }
@@ -3373,8 +3415,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
     if (pgcstack && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        ptls->gc_num.allocd += nm*sz;
-        ptls->gc_num.malloc++;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
+        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
     }
     return calloc(nm, sz);
 }
@@ -3386,8 +3430,10 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
     free(p);
     if (pgcstack && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
-        ptls->gc_num.freed += sz;
-        ptls->gc_num.freecall++;
+        jl_atomic_store_relaxed(&ptls->gc_num.freed,
+            jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_num.freecall,
+            jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1);
     }
 }
 
@@ -3399,10 +3445,13 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         if (sz < old)
-            ptls->gc_num.freed += (old - sz);
+            jl_atomic_store_relaxed(&ptls->gc_num.freed,
+                jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz));
         else
-            ptls->gc_num.allocd += (sz - old);
-        ptls->gc_num.realloc++;
+            jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+                jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
+        jl_atomic_store_relaxed(&ptls->gc_num.realloc,
+            jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
     }
     return realloc(p, sz);
 }
@@ -3419,8 +3468,8 @@ JL_DLLEXPORT void *jl_malloc(size_t sz)
     return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
 }
 
-JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
-{
+//_unchecked_calloc does not check for potential overflow of nm*sz
+STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
     size_t nmsz = nm*sz;
     int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
     if (p == NULL)
@@ -3429,6 +3478,13 @@ JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
     return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
 }
 
+JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
+{
+    if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT)
+        return NULL;
+    return _unchecked_calloc(nm, sz);
+}
+
 JL_DLLEXPORT void jl_free(void *p)
 {
     if (p != NULL) {
@@ -3466,8 +3522,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
-    ptls->gc_num.allocd += allocsz;
-    ptls->gc_num.malloc++;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -3497,10 +3555,13 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
         live_bytes += allocsz - oldsz;
     }
     else if (allocsz < oldsz)
-        ptls->gc_num.freed += (oldsz - allocsz);
+        jl_atomic_store_relaxed(&ptls->gc_num.freed,
+            jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz));
     else
-        ptls->gc_num.allocd += (allocsz - oldsz);
-    ptls->gc_num.realloc++;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
+    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
 
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
@@ -3570,7 +3631,7 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
 #define GC_PERM_POOL_SIZE (2 * 1024 * 1024)
 // 20k limit for pool allocation. At most 1% fragmentation
 #define GC_PERM_POOL_LIMIT (20 * 1024)
-jl_mutex_t gc_perm_lock = {0, 0};
+uv_mutex_t gc_perm_lock;
 static uintptr_t gc_perm_pool = 0;
 static uintptr_t gc_perm_end = 0;
 
@@ -3647,9 +3708,9 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
     if (__unlikely(sz > GC_PERM_POOL_LIMIT))
 #endif
         return gc_perm_alloc_large(sz, zero, align, offset);
-    JL_LOCK_NOGC(&gc_perm_lock);
+    uv_mutex_lock(&gc_perm_lock);
     void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset);
-    JL_UNLOCK_NOGC(&gc_perm_lock);
+    uv_mutex_unlock(&gc_perm_lock);
     return p;
 }
 
@@ -3715,8 +3776,8 @@ JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
         }
         return result;
     } else {
-        int result = support_conservative_marking;
-        support_conservative_marking = 1;
+        int result = jl_atomic_load(&support_conservative_marking);
+        jl_atomic_store(&support_conservative_marking, 1);
         return result;
     }
 }
diff --git a/src/gc.h b/src/gc.h
index 06faa64a8b07f..8b420d28cffbc 100644
--- a/src/gc.h
+++ b/src/gc.h
@@ -11,9 +11,7 @@
 
 #include <stdlib.h>
 #include <string.h>
-#ifndef _MSC_VER
 #include <strings.h>
-#endif
 #include <inttypes.h>
 #include "julia.h"
 #include "julia_threads.h"
@@ -371,18 +369,12 @@ typedef struct {
     int ub;
 } pagetable_t;
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT;
 #else
 STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
 {
-#if defined(_COMPILER_MICROSOFT_)
-    unsigned long j;
-    _BitScanForward(&j, bitvec);
-    return j;
-#else
     return __builtin_ffs(bitvec) - 1;
-#endif
 }
 #endif
 
@@ -517,7 +509,7 @@ void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
                            arraylist_t *list, size_t start);
 void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp);
 void sweep_stack_pools(void);
-void gc_debug_init(void);
+void jl_gc_debug_init(void);
 
 extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];
 
@@ -646,14 +638,14 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_off
 #ifdef GC_DEBUG_ENV
 JL_DLLEXPORT extern jl_gc_debug_env_t jl_gc_debug_env;
 #define gc_sweep_always_full jl_gc_debug_env.always_full
-int gc_debug_check_other(void);
+int jl_gc_debug_check_other(void);
 int gc_debug_check_pool(void);
-void gc_debug_print(void);
+void jl_gc_debug_print(void);
 void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT;
 void gc_scrub(void);
 #else
 #define gc_sweep_always_full 0
-static inline int gc_debug_check_other(void)
+static inline int jl_gc_debug_check_other(void)
 {
     return 0;
 }
@@ -661,7 +653,7 @@ static inline int gc_debug_check_pool(void)
 {
     return 0;
 }
-static inline void gc_debug_print(void)
+static inline void jl_gc_debug_print(void)
 {
 }
 static inline void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT
diff --git a/src/gen_sysimg_symtab.jl b/src/gen_sysimg_symtab.jl
index 2d389a7209b33..8f03cc1560767 100644
--- a/src/gen_sysimg_symtab.jl
+++ b/src/gen_sysimg_symtab.jl
@@ -69,5 +69,5 @@ function outputline(io, name)
     println(io, "jl_symbol(\"", name, "\"),")
 end
 
-open(f->foreach(l->outputline(f,l), take(syms, 106)), "common_symbols1.inc", "w")
-open(f->foreach(l->outputline(f,l), take(drop(syms, 106), 254)), "common_symbols2.inc", "w")
+open(f->foreach(l->outputline(f,l), take(syms, 100)), "common_symbols1.inc", "w")
+open(f->foreach(l->outputline(f,l), take(drop(syms, 100), 254)), "common_symbols2.inc", "w")
diff --git a/src/getopt.c b/src/getopt.c
deleted file mode 100644
index 1170ecb5c9a0a..0000000000000
--- a/src/getopt.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/* This file is adapted from musl-libc
-----------------------------------------------------------------------
-Copyright © 2005-2014 Rich Felker, et al.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----------------------------------------------------------------------
-*/
-
-#include <wchar.h>
-#include <string.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stddef.h>
-#include "getopt.h"
-
-char *optarg;
-int optind=1, opterr=1, optopt, __optpos, __optreset=0;
-
-#define optpos __optpos
-
-int getopt(int argc, char * const argv[], const char *optstring)
-{
-  int i;
-  wchar_t c, d;
-  int k, l;
-  char *optchar;
-
-  if (!optind || __optreset) {
-    __optreset = 0;
-    __optpos = 0;
-    optind = 1;
-  }
-
-  if (optind >= argc || !argv[optind] || argv[optind][0] != '-' || !argv[optind][1])
-    return -1;
-  if (argv[optind][1] == '-' && !argv[optind][2])
-    return optind++, -1;
-
-  if (!optpos) optpos++;
-  if ((k = mbtowc(&c, argv[optind]+optpos, MB_LEN_MAX)) < 0) {
-    k = 1;
-    c = 0xfffd; /* replacement char */
-  }
-  optchar = argv[optind]+optpos;
-  optopt = c;
-  optpos += k;
-
-  if (!argv[optind][optpos]) {
-    optind++;
-    optpos = 0;
-  }
-
-  for (i=0; (l = mbtowc(&d, optstring+i, MB_LEN_MAX)) && d!=c; i+=l>0?l:1);
-
-  if (d != c) {
-    if (optstring[0] != ':' && opterr) {
-      fprintf(stderr, "%s: illegal option: %c\n", argv[0], optchar);
-    }
-    return '?';
-  }
-  if (optstring[i+1] == ':') {
-    if (optind >= argc) {
-      if (optstring[0] == ':') return ':';
-      if (opterr) {
-        fprintf(stderr, "%s: option requires an argument: %c\n", argv[0], optchar);
-      }
-      return '?';
-    }
-    if (optstring[i+2] == ':') optarg = 0;
-    if (optstring[i+2] != ':' || optpos) {
-      optarg = argv[optind++] + optpos;
-      optpos = 0;
-    }
-  }
-  return c;
-}
-
-static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly)
-{
-  if (!optind || __optreset) {
-    __optreset = 0;
-    __optpos = 0;
-    optind = 1;
-  }
-  if (optind >= argc || !argv[optind] || argv[optind][0] != '-') return -1;
-  if ((longonly && argv[optind][1]) ||
-    (argv[optind][1] == '-' && argv[optind][2]))
-  {
-    int i;
-    for (i=0; longopts[i].name; i++) {
-      const char *name = longopts[i].name;
-      char *opt = argv[optind]+1;
-      if (*opt == '-') opt++;
-      for (; *name && *name == *opt; name++, opt++);
-      if (*name || (*opt && *opt != '=')) continue;
-      if (*opt == '=') {
-        if (!longopts[i].has_arg) continue;
-        optarg = opt+1;
-      } else {
-        if (longopts[i].has_arg == required_argument) {
-          if (!(optarg = argv[++optind]))
-            return ':';
-        } else optarg = NULL;
-      }
-      optind++;
-      if (idx) *idx = i;
-      if (longopts[i].flag) {
-        *longopts[i].flag = longopts[i].val;
-        return 0;
-      }
-      return longopts[i].val;
-    }
-    if (argv[optind][1] == '-') {
-      optind++;
-      return '?';
-    }
-  }
-  return getopt(argc, argv, optstring);
-}
-
-int getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx)
-{
-  return __getopt_long(argc, argv, optstring, longopts, idx, 0);
-}
-
-int getopt_long_only(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx)
-{
-  return __getopt_long(argc, argv, optstring, longopts, idx, 1);
-}
diff --git a/src/getopt.h b/src/getopt.h
deleted file mode 100644
index 7bcf5fb94d40e..0000000000000
--- a/src/getopt.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* This file is adapted from musl-libc
-----------------------------------------------------------------------
-Copyright © 2005-2014 Rich Felker, et al.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----------------------------------------------------------------------
-*/
-
-#ifndef _GETOPT_H
-#define _GETOPT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int getopt(int, char * const [], const char *);
-extern char *optarg;
-extern int optind, opterr, optopt;
-
-struct option
-{
-  const char *name;
-  int has_arg;
-  int *flag;
-  int val;
-};
-
-int getopt_long(int, char *const *, const char *, const struct option *, int *);
-int getopt_long_only(int, char *const *, const char *, const struct option *, int *);
-
-#define no_argument        0
-#define required_argument  1
-#define optional_argument  2
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/gf.c b/src/gf.c
index 41381ccc5178e..3d26ac65fbf94 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -24,10 +24,10 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT size_t jl_world_counter = 1; // TODO: should this be atomic release/consume?
+JL_DLLEXPORT _Atomic(size_t) jl_world_counter = 1; // uses atomic acquire/release
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
 {
-    return jl_world_counter;
+    return jl_atomic_load_acquire(&jl_world_counter);
 }
 
 JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT
@@ -118,7 +118,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
             }
         }
         else {
-            jl_method_instance_t **data = (jl_method_instance_t**)jl_svec_data(specializations);
+            _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
             JL_GC_PUSH1(&specializations); // clang-sa doesn't realize this loop uses specializations
             for (i = cl; i > 0; i--) {
                 jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i - 1]);
@@ -140,7 +140,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
         }
         else {
             if (hv) {
-                jl_method_instance_t **data = (jl_method_instance_t**)jl_svec_data(specializations);
+                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
                 for (i = 0; i < cl; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
                     if ((jl_value_t*)mi == jl_nothing)
@@ -412,11 +412,12 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
 {
     jl_svec_t *specializations = def->func.method->specializations;
     size_t i, l = jl_svec_len(specializations);
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
             assert(jl_is_method_instance(mi));
-            if (jl_rettype_inferred(mi, jl_world_counter, jl_world_counter) == jl_nothing)
+            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
                 jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
         }
     }
@@ -441,7 +442,7 @@ static void foreach_mtable_in_module(
                     jl_typename_t *tn = ((jl_datatype_t*)v)->name;
                     if (tn->module == m && tn->name == b->name) {
                         jl_methtable_t *mt = tn->mt;
-                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt) {
+                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
                             visit(mt, env);
                         }
                     }
@@ -466,6 +467,7 @@ void jl_foreach_reachable_mtable(void (*visit)(jl_methtable_t *mt, void *env), v
     JL_GC_PUSH2(&visited, &mod_array);
     mod_array = jl_get_loaded_modules();
     visit(jl_type_type_mt, env);
+    visit(jl_nonfunction_mt, env);
     if (mod_array) {
         int i;
         for (i = 0; i < jl_array_len(mod_array); i++) {
@@ -477,6 +479,7 @@ void jl_foreach_reachable_mtable(void (*visit)(jl_methtable_t *mt, void *env), v
     }
     else {
         foreach_mtable_in_module(jl_main_module, visit, env, &visited);
+        foreach_mtable_in_module(jl_core_module, visit, env, &visited);
     }
     JL_GC_POP();
 }
@@ -493,14 +496,15 @@ static void reset_mt_caches(jl_methtable_t *mt, void *env)
 
 
 jl_function_t *jl_typeinf_func = NULL;
-size_t jl_typeinf_world = 0;
+JL_DLLEXPORT size_t jl_typeinf_world = 1;
 
 JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
 {
+    size_t newfunc = jl_typeinf_world == 1 && jl_typeinf_func == NULL;
     jl_typeinf_func = (jl_function_t*)f;
     jl_typeinf_world = jl_get_tls_world_age();
-    ++jl_world_counter; // make type-inference the only thing in this world
-    if (jl_typeinf_world == 0) {
+    int world = jl_atomic_fetch_add(&jl_world_counter, 1) + 1; // make type-inference the only thing in this world
+    if (newfunc) {
         // give type inference a chance to see all of these
         // TODO: also reinfer if max_world != ~(size_t)0
         jl_array_t *unspec = jl_alloc_vec_any(0);
@@ -509,8 +513,8 @@ JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
         size_t i, l;
         for (i = 0, l = jl_array_len(unspec); i < l; i++) {
             jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(unspec, i);
-            if (jl_rettype_inferred(mi, jl_world_counter, jl_world_counter) == jl_nothing)
-                jl_type_infer(mi, jl_world_counter, 1);
+            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
+                jl_type_infer(mi, world, 1);
         }
         JL_GC_POP();
     }
@@ -990,7 +994,7 @@ static inline jl_typemap_entry_t *lookup_leafcache(jl_array_t *leafcache JL_PROP
 }
 
 static jl_method_instance_t *cache_method(
-        jl_methtable_t *mt, jl_typemap_t **cache, jl_value_t *parent JL_PROPAGATES_ROOT,
+        jl_methtable_t *mt, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT,
         jl_tupletype_t *tt, // the original tupletype of the signature
         jl_method_t *definition,
         size_t world, size_t min_valid, size_t max_valid,
@@ -1007,7 +1011,7 @@ static jl_method_instance_t *cache_method(
                 return entry->func.linfo;
         }
         struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(*cache, &search, offs, /*subtype*/1);
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
         if (entry && entry->func.value)
             return entry->func.linfo;
     }
@@ -1133,7 +1137,7 @@ static jl_method_instance_t *cache_method(
     // that satisfies our requirements
     if (cachett != tt) {
         struct jl_typemap_assoc search = {(jl_value_t*)cachett, world, NULL, 0, ~(size_t)0};
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(*cache, &search, offs, /*subtype*/1);
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
         if (entry && jl_egal((jl_value_t*)entry->simplesig, simplett ? (jl_value_t*)simplett : jl_nothing) &&
                 jl_egal((jl_value_t*)guardsigs, (jl_value_t*)entry->guardsigs)) {
             JL_GC_POP();
@@ -1177,7 +1181,7 @@ static jl_method_instance_t *jl_mt_assoc_by_type(jl_methtable_t *mt JL_PROPAGATE
     // caller must hold the mt->writelock
     assert(tt->isdispatchtuple || tt->hasfreetypevars);
     if (tt->isdispatchtuple) {
-        jl_array_t *leafcache = mt->leafcache;
+        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
         jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
         if (entry)
             return entry->func.linfo;
@@ -1351,7 +1355,7 @@ static void invalidate_external(jl_method_instance_t *mi, size_t max_world) {
             jl_printf((JL_STREAM*)STDERR_FILENO, "error in invalidation callback: ");
             jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-            jlbacktrace(); // writen to STDERR_FILENO
+            jlbacktrace(); // written to STDERR_FILENO
         }
     }
 }
@@ -1525,7 +1529,7 @@ static int typemap_search(jl_typemap_entry_t *entry, void *closure)
 
 static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT;
 
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
 static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT {
     jl_value_t *closure = (jl_value_t*)(method);
     if (jl_typemap_visitor(mt->defs, typemap_search, &closure))
@@ -1545,7 +1549,7 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     mt_cache_env.shadowed = NULL;
     mt_cache_env.invalidated = 0;
     jl_typemap_visitor(mt->cache, disable_mt_cache, (void*)&mt_cache_env);
-    jl_array_t *leafcache = mt->leafcache;
+    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
     size_t i, l = jl_array_len(leafcache);
     for (i = 1; i < l; i += 2) {
         jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_array_ptr_ref(leafcache, i);
@@ -1586,7 +1590,8 @@ JL_DLLEXPORT void jl_method_table_disable(jl_methtable_t *mt, jl_method_t *metho
     jl_typemap_entry_t *methodentry = do_typemap_search(mt, method);
     JL_LOCK(&mt->writelock);
     // Narrow the world age on the method to make it uncallable
-    jl_method_table_invalidate(mt, methodentry, method, jl_world_counter++);
+    size_t world = jl_atomic_fetch_add(&jl_world_counter, 1);
+    jl_method_table_invalidate(mt, methodentry, method, world);
     JL_UNLOCK(&mt->writelock);
 }
 
@@ -1625,7 +1630,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
     jl_value_t *oldvalue = NULL;
     jl_array_t *oldmi = NULL;
     if (method->primary_world == 1)
-        method->primary_world = ++jl_world_counter;
+        method->primary_world = jl_atomic_fetch_add(&jl_world_counter, 1) + 1;
     size_t max_world = method->primary_world - 1;
     jl_value_t *loctag = NULL;  // debug info for invalidation
     jl_value_t *isect = NULL;
@@ -1725,7 +1730,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 if (morespec[j] == (char)morespec_is)
                     continue;
                 jl_svec_t *specializations = jl_atomic_load_acquire(&m->specializations);
-                jl_method_instance_t **data = (jl_method_instance_t**)jl_svec_data(specializations);
+                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
                 size_t i, l = jl_svec_len(specializations);
                 enum morespec_options ambig = morespec_unknown;
                 for (i = 0; i < l; i++) {
@@ -1781,7 +1786,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 mt_cache_env.invalidated = 0;
 
                 jl_typemap_visitor(mt->cache, invalidate_mt_cache, (void*)&mt_cache_env);
-                jl_array_t *leafcache = mt->leafcache;
+                jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
                 size_t i, l = jl_array_len(leafcache);
                 for (i = 1; i < l; i += 2) {
                     jl_value_t *entry = jl_array_ptr_ref(leafcache, i);
@@ -1824,7 +1829,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
         jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
         jl_ptls_t ptls = jl_current_task->ptls;
         ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        jl_critical_error(0, NULL);
+        jl_critical_error(0, NULL, jl_current_task);
         abort();
     }
     // not reached
@@ -1941,10 +1946,11 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             compile_option = ((jl_method_t*)def)->module->compile;
     }
 
+    // if compilation is disabled or source is unavailable, try calling unspecialized version
     if (compile_option == JL_OPTIONS_COMPILE_OFF ||
-        compile_option == JL_OPTIONS_COMPILE_MIN) {
+        compile_option == JL_OPTIONS_COMPILE_MIN ||
+        def->source == jl_nothing) {
         // copy fptr from the template method definition
-        jl_method_t *def = mi->def.method;
         if (jl_is_method(def) && def->unspecialized) {
             jl_code_instance_t *unspec = jl_atomic_load_relaxed(&def->unspecialized->cache);
             if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
@@ -1959,6 +1965,10 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                 return codeinst;
             }
         }
+    }
+    // if that didn't work and compilation is off, try running in the interpreter
+    if (compile_option == JL_OPTIONS_COMPILE_OFF ||
+        compile_option == JL_OPTIONS_COMPILE_MIN) {
         jl_code_info_t *src = jl_code_for_interpreter(mi);
         if (!jl_code_requires_compiler(src)) {
             jl_code_instance_t *codeinst = jl_new_codeinst(mi,
@@ -1980,8 +1990,16 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         jl_method_instance_t *unspec = jl_get_unspecialized(mi);
         jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
         // ask codegen to make the fptr for unspec
-        if (jl_atomic_load_relaxed(&ucache->invoke) == NULL)
+        if (jl_atomic_load_relaxed(&ucache->invoke) == NULL) {
+            if (def->source == jl_nothing && (ucache->def->uninferred == jl_nothing ||
+                                              ucache->def->uninferred == NULL)) {
+                jl_printf(JL_STDERR, "source not available for ");
+                jl_static_show(JL_STDERR, (jl_value_t*)mi);
+                jl_printf(JL_STDERR, "\n");
+                jl_error("source missing for method that needs to be compiled");
+            }
             jl_generate_fptr_for_unspecialized(ucache);
+        }
         assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL);
         if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam &&
             jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) {
@@ -2001,12 +2019,12 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
 }
 
 
-JL_DLLEXPORT jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     return m->rettype_const;
 }
 
-JL_DLLEXPORT jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     while (1) {
         jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
@@ -2015,7 +2033,7 @@ JL_DLLEXPORT jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t
     }
 }
 
-JL_DLLEXPORT jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     jl_svec_t *sparams = m->def->sparam_vals;
     assert(sparams != jl_emptysvec);
@@ -2026,6 +2044,12 @@ JL_DLLEXPORT jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32
     }
 }
 
+JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
+
+JL_DLLEXPORT jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
+
+JL_DLLEXPORT jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
+
 // Return the index of the invoke api, if known
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
 {
@@ -2124,7 +2148,7 @@ static void _generate_from_hint(jl_method_instance_t *mi, size_t world)
 
 static void jl_compile_now(jl_method_instance_t *mi)
 {
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t tworld = jl_typeinf_world;
     _generate_from_hint(mi, world);
     if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
@@ -2135,7 +2159,7 @@ static void jl_compile_now(jl_method_instance_t *mi)
 
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
 {
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t tworld = jl_typeinf_world;
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
@@ -2272,8 +2296,8 @@ STATIC_INLINE int sig_match_fast(jl_value_t *arg1t, jl_value_t **args, jl_value_
     return 1;
 }
 
-jl_typemap_entry_t *call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
-static uint8_t pick_which[N_CALL_CACHE];
+_Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
+static _Atomic(uint8_t) pick_which[N_CALL_CACHE];
 #ifdef JL_GF_PROFILE
 size_t ncalls;
 void call_cache_stats()
@@ -2575,6 +2599,7 @@ JL_DLLEXPORT jl_function_t *jl_get_kwsorter(jl_value_t *ty)
             strcpy(&suffixed[0], name);
             strcpy(&suffixed[l], "##kw");
             jl_sym_t *fname = jl_symbol(suffixed);
+            free(suffixed);
             mt->kwsorter = jl_new_generic_function_with_supertype(fname, mt->module, jl_function_type);
             jl_gc_wb(mt, mt->kwsorter);
         }
@@ -2938,6 +2963,14 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
                     int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
                     int rsubt2 = jl_egal((jl_value_t*)matc2->spec_types, m2->sig);
                     jl_value_t *ti;
+                    if (!subt && !subt2 && rsubt && rsubt2 && lim == -1 && ambig == NULL)
+                        // these would only be filtered out of the list as
+                        // ambiguous if they are also type-equal, as we
+                        // aren't skipping matches and the user doesn't
+                        // care if we report any ambiguities
+                        continue;
+                    if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
+                        continue;
                     if (subt) {
                         ti = (jl_value_t*)matc2->spec_types;
                         isect2 = NULL;
@@ -2946,18 +2979,11 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
                         ti = (jl_value_t*)matc->spec_types;
                         isect2 = NULL;
                     }
-                    else if (rsubt && rsubt2 && lim == -1 && ambig == NULL) {
-                        // these would only be filtered out of the list as
-                        // ambiguous if they are also type-equal, as we
-                        // aren't skipping matches and the user doesn't
-                        // care if we report any ambiguities
-                        ti = jl_bottom_type;
-                    }
                     else {
                         jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &env.match.ti, &isect2);
                         ti = env.match.ti;
                     }
-                    if (ti != jl_bottom_type && !jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig)) {
+                    if (ti != jl_bottom_type) {
                         disjoint = 0;
                         // m and m2 are ambiguous, but let's see if we can find another method (m3)
                         // that dominates their intersection, and means we can ignore this
@@ -3125,7 +3151,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
             jl_method_t *meth = env.matc->method;
             jl_svec_t *tpenv = env.matc->sparams;
             JL_LOCK(&mt->writelock);
-            cache_method(mt, &mt->cache, (jl_value_t*)mt, type, meth, world, env.min_valid, env.max_valid, tpenv);
+            cache_method(mt, &mt->cache, (jl_value_t*)mt, (jl_tupletype_t*)unw, meth, world, env.min_valid, env.max_valid, tpenv);
             JL_UNLOCK(&mt->writelock);
         }
     }
@@ -3157,22 +3183,26 @@ int jl_has_concrete_subtype(jl_value_t *typ)
 //   the best way to avoid acquisition priority
 //   ordering violations
 //static jl_mutex_t typeinf_lock;
-#define typeinf_lock codegen_lock
+#define typeinf_lock jl_codegen_lock
 
 static uint64_t inference_start_time = 0;
+static uint8_t inference_is_measuring_compile_time = 0;
 
 JL_DLLEXPORT void jl_typeinf_begin(void)
 {
     JL_LOCK(&typeinf_lock);
-    if (jl_measure_compile_time[jl_threadid()])
+    if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
         inference_start_time = jl_hrtime();
+        inference_is_measuring_compile_time = 1;
+    }
 }
 
 JL_DLLEXPORT void jl_typeinf_end(void)
 {
-    int tid = jl_threadid();
-    if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
+    if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
+        inference_is_measuring_compile_time = 0;
+    }
     JL_UNLOCK(&typeinf_lock);
 }
 
diff --git a/src/iddict.c b/src/iddict.c
index 0d67a2b4c82c1..e6c9eee44b980 100644
--- a/src/iddict.c
+++ b/src/iddict.c
@@ -43,7 +43,7 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         *pa = a;
     }
     size_t maxprobe = max_probe(sz);
-    void **tab = (void **)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
 
     hv = keyhash(key);
     while (1) {
@@ -54,14 +54,14 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         empty_slot = -1;
 
         do {
-            jl_value_t *k2 = (jl_value_t*)tab[index];
+            jl_value_t *k2 = jl_atomic_load_relaxed(&tab[index]);
             if (k2 == NULL) {
                 if (empty_slot == -1)
                     empty_slot = index;
                 break;
             }
             if (jl_egal(key, k2)) {
-                if (tab[index + 1] != NULL) {
+                if (jl_atomic_load_relaxed(&tab[index + 1]) != NULL) {
                     jl_atomic_store_release(&tab[index + 1], val);
                     jl_gc_wb(a, val);
                     return 0;
@@ -71,8 +71,8 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
                 if (empty_slot == -1)
                     empty_slot = index;
             }
-            if (empty_slot == -1 && tab[index + 1] == NULL) {
-                assert(tab[index] == jl_nothing);
+            if (empty_slot == -1 && jl_atomic_load_relaxed(&tab[index + 1]) == NULL) {
+                assert(jl_atomic_load_relaxed(&tab[index]) == jl_nothing);
                 empty_slot = index;
             }
 
@@ -102,20 +102,20 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         *pa = jl_idtable_rehash(*pa, newsz);
 
         a = *pa;
-        tab = (void **)a->data;
+        tab = (_Atomic(jl_value_t*)*)a->data;
         sz = hash_size(a);
         maxprobe = max_probe(sz);
     }
 }
 
 /* returns bp if key is in hash, otherwise NULL */
-inline jl_value_t **jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
+inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
 {
     size_t sz = hash_size(a);
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    void **tab = (void **)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
     uint_t hv = keyhash(key);
     size_t index = h2index(hv, sz);
     sz *= 2;
@@ -123,12 +123,12 @@ inline jl_value_t **jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEP
     size_t iter = 0;
 
     do {
-        jl_value_t *k2 = (jl_value_t*)jl_atomic_load_relaxed(&tab[index]); // just to ensure the load doesn't get duplicated
+        jl_value_t *k2 = jl_atomic_load_relaxed(&tab[index]); // just to ensure the load doesn't get duplicated
         if (k2 == NULL)
             return NULL;
         if (jl_egal(key, k2)) {
             if (jl_atomic_load_relaxed(&tab[index + 1]) != NULL)
-                return (jl_value_t**)&tab[index + 1];
+                return &tab[index + 1];
             // `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key
             if (key != jl_nothing)
                 return NULL; // concurrent insertion hasn't completed yet
@@ -155,21 +155,21 @@ jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int
 JL_DLLEXPORT
 jl_value_t *jl_eqtable_get(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
 {
-    jl_value_t **bp = jl_table_peek_bp(h, key);
+    _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp);
 }
 
 JL_DLLEXPORT
 jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
 {
-    jl_value_t **bp = jl_table_peek_bp(h, key);
+    _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     if (found)
         *found = (bp != NULL);
     if (bp == NULL)
         return deflt;
-    jl_value_t *val = *bp;
-    *(bp - 1) = jl_nothing; // clear the key
-    *bp = NULL;
+    jl_value_t *val = jl_atomic_load_relaxed(bp);
+    jl_atomic_store_relaxed(bp - 1, jl_nothing); // clear the key
+    jl_atomic_store_relaxed(bp, NULL); // and the value (briefly corrupting the table)
     return val;
 }
 
diff --git a/src/init.c b/src/init.c
index 602583a9221fd..1eb2cccd73d68 100644
--- a/src/init.c
+++ b/src/init.c
@@ -34,11 +34,7 @@
 extern "C" {
 #endif
 
-#ifdef _MSC_VER
-JL_DLLEXPORT char *dirname(char *);
-#else
 #include <libgen.h>
-#endif
 
 #ifdef _OS_WINDOWS_
 extern int needsSymRefreshModuleList;
@@ -51,7 +47,7 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
 // list of modules being deserialized with __init__ methods
 jl_array_t *jl_module_init_order;
 
-size_t jl_page_size;
+JL_DLLEXPORT size_t jl_page_size;
 
 void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
 {
@@ -115,7 +111,7 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
 static void jl_prep_sanitizers(void)
 {
 #if !defined(_OS_WINDOWS_)
-#if defined(JL_ASAN_ENABLED) || defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
     struct rlimit rl;
 
     // When using the sanitizers, increase stack size because they bloat
@@ -295,14 +291,14 @@ static void post_boot_hooks(void);
 
 JL_DLLEXPORT void *jl_libjulia_internal_handle;
 JL_DLLEXPORT void *jl_libjulia_handle;
-void *jl_RTLD_DEFAULT_handle;
+JL_DLLEXPORT void *jl_RTLD_DEFAULT_handle;
 JL_DLLEXPORT void *jl_exe_handle;
 #ifdef _OS_WINDOWS_
 void *jl_ntdll_handle;
 void *jl_kernel32_handle;
 void *jl_crtdll_handle;
 void *jl_winsock_handle;
-extern const char jl_crtdll_name[];
+extern const char *jl_crtdll_name;
 #endif
 
 uv_loop_t *jl_io_loop;
@@ -431,21 +427,7 @@ static void init_stdio(void)
     jl_flush_cstdio();
 }
 
-#ifdef JL_USE_INTEL_JITEVENTS
-char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
-#endif
-
-#ifdef JL_USE_OPROFILE_JITEVENTS
-char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
-#endif
-
-#ifdef JL_USE_PERF_JITEVENTS
-char jl_using_perf_jitevents = 0;
-#endif
-
-char jl_using_gdb_jitevents = 0;
-
-int isabspath(const char *in) JL_NOTSAFEPOINT
+int jl_isabspath(const char *in) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
     char c0 = in[0];
@@ -517,7 +499,7 @@ static char *abspath(const char *in, int nprefix)
 // unless `in` starts with `%`
 static const char *absformat(const char *in)
 {
-    if (in[0] == '%' || isabspath(in))
+    if (in[0] == '%' || jl_isabspath(in))
         return in;
     // get an escaped copy of cwd
     size_t path_size = PATH_MAX;
@@ -572,7 +554,7 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
     free(free_path);
     free_path = NULL;
     if (jl_options.image_file) {
-        if (rel == JL_IMAGE_JULIA_HOME && !isabspath(jl_options.image_file)) {
+        if (rel == JL_IMAGE_JULIA_HOME && !jl_isabspath(jl_options.image_file)) {
             // build time path, relative to JULIA_BINDIR
             free_path = (char*)malloc_s(PATH_MAX);
             int n = snprintf(free_path, PATH_MAX, "%s" PATHSEPSTRING "%s",
@@ -628,8 +610,14 @@ static void restore_fp_env(void)
     }
 }
 
+static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct);
+
+JL_DLLEXPORT int jl_default_debug_info_kind;
+
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 {
+    jl_default_debug_info_kind = 0;
+
     jl_init_timing();
     // Make sure we finalize the tls callback before starting any threads.
     (void)jl_get_pgcstack();
@@ -651,16 +639,20 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(1, &stack_lo, &stack_hi);
 
-    // Load libjulia-internal (which contains this function), and libjulia, explicitly.
     jl_libjulia_internal_handle = jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 1);
-    jl_libjulia_handle = jl_load_dynamic_library(JL_LIBJULIA_SONAME, JL_RTLD_DEFAULT, 1);
 #ifdef _OS_WINDOWS_
+    jl_exe_handle = GetModuleHandleA(NULL);
+    jl_RTLD_DEFAULT_handle = jl_libjulia_internal_handle;
+    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                            (LPCWSTR)&jl_any_type,
+                            (HMODULE*)&jl_libjulia_handle)) {
+        jl_error("could not load base module");
+    }
     jl_ntdll_handle = jl_dlopen("ntdll.dll", 0); // bypass julia's pathchecking for system dlls
     jl_kernel32_handle = jl_dlopen("kernel32.dll", 0);
     jl_crtdll_handle = jl_dlopen(jl_crtdll_name, 0);
     jl_winsock_handle = jl_dlopen("ws2_32.dll", 0);
-    jl_exe_handle = GetModuleHandleA(NULL);
-    JL_MUTEX_INIT(&jl_in_stackwalk);
+    uv_mutex_init(&jl_in_stackwalk);
     SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_IGNORE_CVREC);
     if (!SymInitialize(GetCurrentProcess(), "", 1)) {
         jl_printf(JL_STDERR, "WARNING: failed to initialize stack walk info\n");
@@ -678,53 +670,25 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 #endif
 #endif
 
-#if \
-    defined(JL_USE_INTEL_JITEVENTS) || \
-    defined(JL_USE_OPROFILE_JITEVENTS) || \
-    defined(JL_USE_PERF_JITEVENTS)
-    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
-#endif
-
-#if defined(JL_USE_INTEL_JITEVENTS)
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_intel_jitevents = 1;
-    }
-#endif
-
-#if defined(JL_USE_OPROFILE_JITEVENTS)
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_oprofile_jitevents = 1;
-    }
-#endif
-
-#if defined(JL_USE_PERF_JITEVENTS)
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_perf_jitevents= 1;
-    }
-#endif
-
-#if defined(JL_DEBUG_BUILD)
-    jl_using_gdb_jitevents = 1;
-# else
-    const char *jit_gdb = getenv("ENABLE_GDBLISTENER");
-    if (jit_gdb && atoi(jit_gdb)) {
-        jl_using_gdb_jitevents = 1;
-    }
-#endif
-
     if ((jl_options.outputo || jl_options.outputbc || jl_options.outputasm) &&
         (jl_options.code_coverage || jl_options.malloc_log)) {
         jl_error("cannot generate code-coverage or track allocation information while generating a .o, .bc, or .s output file");
     }
 
+    jl_init_runtime_ccall();
     jl_gc_init();
     jl_init_tasks();
     jl_init_threading();
 
     jl_ptls_t ptls = jl_init_threadtls(0);
-    jl_init_root_task(ptls, stack_lo, stack_hi);
-    jl_task_t *ct = jl_current_task;
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    _finish_julia_init(rel, ptls, ct);
+}
 
+static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
+{
     jl_init_threadinginfra();
 
     jl_resolve_sysimg_location(rel);
@@ -734,12 +698,13 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     if (jl_options.cpu_target == NULL)
         jl_options.cpu_target = "native";
 
-    if (jl_options.image_file)
+    if (jl_options.image_file) {
         jl_restore_system_image(jl_options.image_file);
-    else
+    } else {
         jl_init_types();
+        jl_init_codegen();
+    }
 
-    jl_init_codegen();
     jl_init_common_symbols();
     jl_init_flisp();
     jl_init_serializer();
@@ -840,6 +805,7 @@ static void post_boot_hooks(void)
     jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
     jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
     jl_initerror_type      = (jl_datatype_t*)core("InitError");
+    jl_pair_type           = core("Pair");
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
@@ -852,7 +818,7 @@ static void post_boot_hooks(void)
     for (i = 1; i < jl_core_module->bindings.size; i += 2) {
         if (table[i] != HT_NOTFOUND) {
             jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = b->value;
+            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
             if (v) {
                 if (jl_is_unionall(v))
                     v = jl_unwrap_unionall(v);
diff --git a/src/interpreter.c b/src/interpreter.c
index 7858bd6ddc4ea..6b07a8f7e5971 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -37,7 +37,7 @@ typedef struct {
   JL_GCC_IGNORE_STOP
 #endif
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT;
 
@@ -79,6 +79,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
 {
     jl_value_t **args = jl_array_ptr_data(ex->args);
 
+    // generic function definition
     if (jl_expr_nargs(ex) == 1) {
         jl_value_t **args = jl_array_ptr_data(ex->args);
         jl_sym_t *fname = (jl_sym_t*)args[0];
@@ -92,7 +93,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
         }
         jl_value_t *bp_owner = (jl_value_t*)modu;
         jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
-        jl_value_t **bp = &b->value;
+        _Atomic(jl_value_t*) *bp = &b->value;
         jl_value_t *gf = jl_generic_function_def(b->name, b->owner, bp, bp_owner, b);
         return gf;
     }
@@ -210,13 +211,16 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     jl_value_t **args = jl_array_ptr_data(ex->args);
     size_t nargs = jl_array_len(ex->args);
     jl_sym_t *head = ex->head;
-    if (head == call_sym) {
+    if (head == jl_call_sym) {
         return do_call(args, nargs, s);
     }
-    else if (head == invoke_sym) {
+    else if (head == jl_invoke_sym) {
         return do_invoke(args, nargs, s);
     }
-    else if (head == isdefined_sym) {
+    else if (head == jl_invoke_modify_sym) {
+        return do_call(args + 1, nargs - 1, s);
+    }
+    else if (head == jl_isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
         if (jl_is_slot(sym) || jl_is_argument(sym)) {
@@ -231,7 +235,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         else if (jl_is_symbol(sym)) {
             defined = jl_boundp(s->module, (jl_sym_t*)sym);
         }
-        else if (jl_is_expr(sym) && ((jl_expr_t*)sym)->head == static_parameter_sym) {
+        else if (jl_is_expr(sym) && ((jl_expr_t*)sym)->head == jl_static_parameter_sym) {
             ssize_t n = jl_unbox_long(jl_exprarg(sym, 0));
             assert(n > 0);
             if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
@@ -248,19 +252,19 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         }
         return defined ? jl_true : jl_false;
     }
-    else if (head == throw_undef_if_not_sym) {
+    else if (head == jl_throw_undef_if_not_sym) {
         jl_value_t *cond = eval_value(args[1], s);
         assert(jl_is_bool(cond));
         if (cond == jl_false) {
             jl_sym_t *var = (jl_sym_t*)args[0];
-            if (var == getfield_undefref_sym)
+            if (var == jl_getfield_undefref_sym)
                 jl_throw(jl_undefref_exception);
             else
                 jl_undefined_var_error(var);
         }
         return jl_nothing;
     }
-    else if (head == new_sym) {
+    else if (head == jl_new_sym) {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, nargs);
         for (size_t i = 0; i < nargs; i++)
@@ -269,7 +273,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         JL_GC_POP();
         return v;
     }
-    else if (head == splatnew_sym) {
+    else if (head == jl_splatnew_sym) {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 2);
         argv[0] = eval_value(args[0], s);
@@ -278,7 +282,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         JL_GC_POP();
         return v;
     }
-    else if (head == new_opaque_closure_sym) {
+    else if (head == jl_new_opaque_closure_sym) {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, nargs);
         for (size_t i = 0; i < nargs; i++)
@@ -289,7 +293,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         JL_GC_POP();
         return ret;
     }
-    else if (head == static_parameter_sym) {
+    else if (head == jl_static_parameter_sym) {
         ssize_t n = jl_unbox_long(args[0]);
         assert(n > 0);
         if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
@@ -301,28 +305,34 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         // static parameter val unknown needs to be an error for ccall
         jl_error("could not determine static parameter value");
     }
-    else if (head == copyast_sym) {
+    else if (head == jl_copyast_sym) {
         return jl_copy_ast(eval_value(args[0], s));
     }
-    else if (head == exc_sym) {
+    else if (head == jl_exc_sym) {
         return jl_current_exception();
     }
-    else if (head == boundscheck_sym) {
+    else if (head == jl_boundscheck_sym) {
         return jl_true;
     }
-    else if (head == meta_sym || head == coverageeffect_sym || head == inbounds_sym || head == loopinfo_sym ||
-             head == aliasscope_sym || head == popaliasscope_sym) {
+    else if (head == jl_meta_sym || head == jl_coverageeffect_sym || head == jl_inbounds_sym || head == jl_loopinfo_sym ||
+             head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         return jl_nothing;
     }
-    else if (head == gc_preserve_begin_sym || head == gc_preserve_end_sym) {
+    else if (head == jl_gc_preserve_begin_sym || head == jl_gc_preserve_end_sym) {
         // The interpreter generally keeps values that were assigned in this scope
         // rooted. If the interpreter learns to be more aggressive here, we may
         // want to explicitly root these values.
         return jl_nothing;
     }
-    else if (head == method_sym && nargs == 1) {
+    else if (head == jl_method_sym && nargs == 1) {
         return eval_methoddef(ex, s);
     }
+    else if (head == jl_foreigncall_sym) {
+        jl_error("`ccall` requires the compiler");
+    }
+    else if (head == jl_cfunction_sym) {
+        jl_error("`cfunction` requires the compiler");
+    }
     jl_errorf("unsupported or misplaced expression %s", jl_symbol_name(head));
     abort();
 }
@@ -420,7 +430,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
         if (ip >= ns)
             jl_error("`body` expression must terminate in `return`. Use `block` instead.");
         if (toplevel)
-            ct->world_age = jl_world_counter;
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_value_t *stmt = jl_array_ptr_ref(stmts, ip);
         assert(!jl_is_phinode(stmt));
         size_t next_ip = ip + 1;
@@ -452,7 +462,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
         else if (jl_is_expr(stmt)) {
             // Most exprs are allowed to end a BB by fall through
             jl_sym_t *head = ((jl_expr_t*)stmt)->head;
-            if (head == assign_sym) {
+            if (head == jl_assign_sym) {
                 jl_value_t *lhs = jl_exprarg(stmt, 0);
                 jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
                 if (jl_is_slot(lhs)) {
@@ -478,7 +488,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     JL_GC_POP();
                 }
             }
-            else if (head == enter_sym) {
+            else if (head == jl_enter_sym) {
                 jl_enter_handler(&__eh);
                 // This is a bit tricky, but supports the implementation of PhiC nodes.
                 // They are conceptually slots, but the slot to store to doesn't get explicitly
@@ -516,12 +526,12 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     s->continue_at = 0;
                     continue;
                 }
-                else { // a real exeception
+                else { // a real exception
                     ip = catch_ip;
                     continue;
                 }
             }
-            else if (head == leave_sym) {
+            else if (head == jl_leave_sym) {
                 int hand_n_leave = jl_unbox_long(jl_exprarg(stmt, 0));
                 assert(hand_n_leave > 0);
                 // equivalent to jl_pop_handler(hand_n_leave), but retaining eh for longjmp:
@@ -534,41 +544,41 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                 s->continue_at = next_ip;
                 jl_longjmp(eh->eh_ctx, 1);
             }
-            else if (head == pop_exception_sym) {
+            else if (head == jl_pop_exception_sym) {
                 size_t prev_state = jl_unbox_ulong(eval_value(jl_exprarg(stmt, 0), s));
                 jl_restore_excstack(prev_state);
             }
             else if (toplevel) {
-                if (head == method_sym && jl_expr_nargs(stmt) > 1) {
+                if (head == jl_method_sym && jl_expr_nargs(stmt) > 1) {
                     eval_methoddef((jl_expr_t*)stmt, s);
                 }
-                else if (head == toplevel_sym) {
+                else if (head == jl_toplevel_sym) {
                     jl_value_t *res = jl_toplevel_eval(s->module, stmt);
                     s->locals[jl_source_nslots(s->src) + s->ip] = res;
                 }
                 else if (jl_is_toplevel_only_expr(stmt)) {
                     jl_toplevel_eval(s->module, stmt);
                 }
-                else if (head == meta_sym) {
-                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)nospecialize_sym) {
+                else if (head == jl_meta_sym) {
+                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)jl_nospecialize_sym) {
                         jl_set_module_nospecialize(s->module, 1);
                     }
-                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)specialize_sym) {
+                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)jl_specialize_sym) {
                         jl_set_module_nospecialize(s->module, 0);
                     }
                     if (jl_expr_nargs(stmt) == 2) {
-                        if (jl_exprarg(stmt, 0) == (jl_value_t*)optlevel_sym) {
+                        if (jl_exprarg(stmt, 0) == (jl_value_t*)jl_optlevel_sym) {
                             if (jl_is_long(jl_exprarg(stmt, 1))) {
                                 int n = jl_unbox_long(jl_exprarg(stmt, 1));
                                 jl_set_module_optlevel(s->module, n);
                             }
                         }
-                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)compile_sym) {
+                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)jl_compile_sym) {
                             if (jl_is_long(jl_exprarg(stmt, 1))) {
                                 jl_set_module_compile(s->module, jl_unbox_long(jl_exprarg(stmt, 1)));
                             }
                         }
-                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)infer_sym) {
+                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)jl_infer_sym) {
                             if (jl_is_long(jl_exprarg(stmt, 1))) {
                                 jl_set_module_infer(s->module, jl_unbox_long(jl_exprarg(stmt, 1)));
                             }
@@ -673,6 +683,8 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     return r;
 }
 
+JL_DLLEXPORT jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret_call;
+
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
 {
     jl_method_t *source = oc->source;
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 4096bba08d0c7..6fe51cc8b6b22 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -15,7 +15,7 @@ FunctionType *get_intr_args4(LLVMContext &C) { return FunctionType::get(T_prjlva
 FunctionType *get_intr_args5(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_prjlvalue, T_prjlvalue}, false); }
 
 static JuliaFunction *runtime_func[num_intrinsics] = {
-#define ADD_I(name, nargs) new JuliaFunction{"jl_"#name, get_intr_args##nargs, nullptr},
+#define ADD_I(name, nargs) new JuliaFunction{XSTR(jl_##name), get_intr_args##nargs, nullptr},
 #define ADD_HIDDEN ADD_I
 #define ALIAS(alias, base) nullptr,
     INTRINSICS
@@ -70,7 +70,7 @@ static void jl_init_intrinsic_functions_codegen(void)
 }
 
 extern "C"
-JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void)
+JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_impl(void)
 {
     return 10000 * LLVM_VERSION_MAJOR + 100 * LLVM_VERSION_MINOR
 #ifdef LLVM_VERSION_PATCH
@@ -304,9 +304,10 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
         unboxed = emit_bitcast(ctx, unboxed, to);
     }
     else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
+#ifndef JL_NDEBUG
         const DataLayout &DL = jl_data_layout;
-        unsigned nb = DL.getTypeSizeInBits(ty);
-        assert(nb == DL.getTypeSizeInBits(to));
+#endif
+        assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
         AllocaInst *cast = ctx.builder.CreateAlloca(ty);
         ctx.builder.CreateStore(unboxed, cast);
         unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
@@ -652,8 +653,10 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         return emit_runtime_pointerset(ctx, argv);
     if (align.constant == NULL || !jl_is_long(align.constant))
         return emit_runtime_pointerset(ctx, argv);
-    if (!is_valid_intrinsic_elptr(ety))
+    if (!is_valid_intrinsic_elptr(ety)) {
         emit_error(ctx, "pointerset: invalid pointer type");
+        return jl_cgval_t();
+    }
     emit_typecheck(ctx, x, ety, "pointerset");
 
     Value *idx = emit_unbox(ctx, T_size, i, (jl_value_t*)jl_long_type);
@@ -682,7 +685,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         if (!type_is_ghost(ptrty)) {
             thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
             typed_store(ctx, thePtr, im1, x, jl_cgval_t(), ety, tbaa_data, nullptr, nullptr, isboxed,
-                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false);
+                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, nullptr, "");
         }
     }
     return e;
@@ -692,7 +695,7 @@ static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
 {
     const jl_cgval_t &ord = argv[0];
     if (ord.constant && jl_is_symbol(ord.constant)) {
-        enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, false, false);
+        enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, true);
         if (order == jl_memory_order_invalid) {
             emit_atomic_error(ctx, "invalid atomic ordering");
             return jl_cgval_t(); // unreachable
@@ -776,15 +779,18 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 // e[i] = x (set)
 // e[i] <= x (swap)
 // e[i] y => x (replace)
-static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs)
+// x(e[i], y) (modify)
+static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs, const jl_cgval_t *modifyop)
 {
     bool issetfield = f == atomic_pointerset;
     bool isreplacefield = f == atomic_pointerreplace;
+    bool isswapfield = f == atomic_pointerswap;
+    bool ismodifyfield = f == atomic_pointermodify;
     const jl_cgval_t undefval;
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = isreplacefield ? argv[2] : argv[1];
-    const jl_cgval_t &y = isreplacefield ? argv[1] : undefval;
-    const jl_cgval_t &ord = isreplacefield ? argv[3] : argv[2];
+    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
+    const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
     const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
 
     jl_value_t *aty = e.typ;
@@ -812,7 +818,7 @@ static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const j
         Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
         bool isboxed = true;
         jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, false);
+                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
         if (issetfield)
             ret = e;
         return ret;
@@ -824,7 +830,8 @@ static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const j
         emit_error(ctx, msg);
         return jl_cgval_t();
     }
-    emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+    if (!ismodifyfield)
+        emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
 
     size_t nb = jl_datatype_size(ety);
     if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
@@ -845,7 +852,7 @@ static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const j
         assert(!isboxed);
         Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
         jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, false);
+                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
         if (issetfield)
             ret = e;
         return ret;
@@ -1069,6 +1076,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
 
     switch (f) {
     case arraylen: {
+        assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         jl_value_t *typ = jl_unwrap_unionall(x.typ);
         if (!jl_is_datatype(typ) || ((jl_datatype_t*)typ)->name != jl_array_typename)
@@ -1076,41 +1084,55 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         return mark_julia_type(ctx, emit_arraylen(ctx, x), false, jl_long_type);
     }
     case pointerref:
+        assert(nargs == 3);
         return emit_pointerref(ctx, argv);
     case pointerset:
+        assert(nargs == 4);
         return emit_pointerset(ctx, argv);
     case atomic_fence:
+        assert(nargs == 1);
         return emit_atomicfence(ctx, argv);
     case atomic_pointerref:
+        assert(nargs == 2);
         return emit_atomic_pointerref(ctx, argv);
     case atomic_pointerset:
     case atomic_pointerswap:
-    case atomic_pointerreplace:
-        return emit_atomic_pointerset(ctx, f, argv, nargs);
     case atomic_pointermodify:
-        return emit_runtime_call(ctx, f, argv, nargs);
+    case atomic_pointerreplace:
+        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
     case bitcast:
+        assert(nargs == 2);
         return generic_bitcast(ctx, argv);
     case trunc_int:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::Trunc, argv, true, true);
     case sext_int:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::SExt, argv, true, true);
     case zext_int:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::ZExt, argv, true, true);
     case uitofp:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::UIToFP, argv, false, true);
     case sitofp:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::SIToFP, argv, false, true);
     case fptoui:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPToUI, argv, true, false);
     case fptosi:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPToSI, argv, true, false);
     case fptrunc:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPTrunc, argv, false, false);
     case fpext:
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPExt, argv, false, false);
 
     case not_int: {
+        assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!jl_is_primitivetype(x.typ))
             return emit_runtime_call(ctx, f, argv, nargs);
@@ -1124,6 +1146,27 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         return mark_julia_type(ctx, ans, false, x.typ);
     }
 
+    case have_fma: {
+        assert(nargs == 1);
+        const jl_cgval_t &x = argv[0];
+        if (!x.constant || !jl_is_datatype(x.constant))
+            return emit_runtime_call(ctx, f, argv, nargs);
+        jl_datatype_t *dt = (jl_datatype_t*) x.constant;
+
+        // select the appropriated overloaded intrinsic
+        std::string intr_name = "julia.cpu.have_fma.";
+        if (dt == jl_float32_type)
+            intr_name += "f32";
+        else if (dt == jl_float64_type)
+            intr_name += "f64";
+        else
+            return emit_runtime_call(ctx, f, argv, nargs);
+
+        FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name, T_int1);
+        auto ret = ctx.builder.CreateCall(intr);
+        return mark_julia_type(ctx, ret, false, jl_bool_type);
+    }
+
     default: {
         assert(nargs >= 1 && "invalid nargs for intrinsic call");
         const jl_cgval_t &xinfo = argv[0];
@@ -1455,200 +1498,3 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     }
     assert(0 && "unreachable");
 }
-
-
-// float16 intrinsics
-// TODO: use LLVM's compiler-rt
-
-static inline float half_to_float(uint16_t ival)
-{
-    uint32_t sign = (ival & 0x8000) >> 15;
-    uint32_t exp = (ival & 0x7c00) >> 10;
-    uint32_t sig = (ival & 0x3ff) >> 0;
-    uint32_t ret;
-
-    if (exp == 0) {
-        if (sig == 0) {
-            sign = sign << 31;
-            ret = sign | exp | sig;
-        }
-        else {
-            int n_bit = 1;
-            uint16_t bit = 0x0200;
-            while ((bit & sig) == 0) {
-                n_bit = n_bit + 1;
-                bit = bit >> 1;
-            }
-            sign = sign << 31;
-            exp = ((-14 - n_bit + 127) << 23);
-            sig = ((sig & (~bit)) << n_bit) << (23 - 10);
-            ret = sign | exp | sig;
-        }
-    }
-    else if (exp == 0x1f) {
-        if (sig == 0) { // Inf
-            if (sign == 0)
-                ret = 0x7f800000;
-            else
-                ret = 0xff800000;
-        }
-        else // NaN
-            ret = 0x7fc00000 | (sign << 31) | (sig << (23 - 10));
-    }
-    else {
-        sign = sign << 31;
-        exp = ((exp - 15 + 127) << 23);
-        sig = sig << (23 - 10);
-        ret = sign | exp | sig;
-    }
-
-    float fret;
-    memcpy(&fret, &ret, sizeof(float));
-    return fret;
-}
-
-// float to half algorithm from:
-//   "Fast Half Float Conversion" by Jeroen van der Zijp
-//   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
-//
-// With adjustments for round-to-nearest, ties to even.
-
-static uint16_t basetable[512] = {
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0000, 0x0000, 0x0000, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
-    0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
-    0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
-    0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
-    0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
-    0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
-    0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
-    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00};
-
-static uint8_t shifttable[512] = {
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
-    0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
-    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
-    0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x0d, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
-    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
-    0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
-    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
-    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
-
-static inline uint16_t float_to_half(float param)
-{
-    uint32_t f;
-    memcpy(&f, &param, sizeof(float));
-    if (isnan(param)) {
-        uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10)));
-        return t ^ ((uint16_t)(f >> 0xd));
-    }
-    int i = ((f & ~0x007fffff) >> 23);
-    uint8_t sh = shifttable[i];
-    f &= 0x007fffff;
-    // If `val` is subnormal, the tables are set up to force the
-    // result to 0, so the significand has an implicit `1` in the
-    // cases we care about.
-    f |= 0x007fffff + 0x1;
-    uint16_t h = (uint16_t)(basetable[i] + ((f >> sh) & 0x03ff));
-    // round
-    // NOTE: we maybe should ignore NaNs here, but the payload is
-    // getting truncated anyway so "rounding" it might not matter
-    int nextbit = (f >> (sh - 1)) & 1;
-    if (nextbit != 0 && (h & 0x7C00) != 0x7C00) {
-        // Round halfway to even or check lower bits
-        if ((h & 1) == 1 || (f & ((1 << (sh - 1)) - 1)) != 0)
-            h += UINT16_C(1);
-    }
-    return h;
-}
-
-#if !defined(_OS_DARWIN_)   // xcode already links compiler-rt
-
-extern "C" JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param)
-{
-    return half_to_float(param);
-}
-
-extern "C" JL_DLLEXPORT float __extendhfsf2(uint16_t param)
-{
-    return half_to_float(param);
-}
-
-extern "C" JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
-{
-    return float_to_half(param);
-}
-
-extern "C" JL_DLLEXPORT uint16_t __truncdfhf2(double param)
-{
-    return float_to_half((float)param);
-}
-
-#endif
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 52988a313c990..bb67460bbb31f 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -103,6 +103,8 @@
     ALIAS(llvmcall, llvmcall) \
     /*  object access */ \
     ADD_I(arraylen, 1) \
+    /*  cpu feature tests */ \
+    ADD_I(have_fma, 1) \
     /*  hidden intrinsics */ \
     ADD_HIDDEN(cglobal_auto, 1)
 
diff --git a/src/ircode.c b/src/ircode.c
index 212febe121a75..279e458728e24 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -14,11 +14,7 @@
 #include <dlfcn.h>
 #endif
 
-#ifndef _COMPILER_MICROSOFT_
 #include "valgrind.h"
-#else
-#define RUNNING_ON_VALGRIND 0
-#endif
 #include "julia_assert.h"
 
 #ifdef __cplusplus
@@ -151,7 +147,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     else if (jl_is_expr(v)) {
         jl_expr_t *e = (jl_expr_t*)v;
         size_t l = jl_array_len(e->args);
-        if (e->head == call_sym) {
+        if (e->head == jl_call_sym) {
             if (l == 2) {
                 write_uint8(s->s, TAG_CALL1);
                 jl_encode_value(s, jl_exprarg(e, 0));
@@ -381,6 +377,17 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
 }
 
+static jl_code_info_flags_t code_info_flags(uint8_t pure, uint8_t propagate_inbounds, uint8_t inlineable, uint8_t inferred, uint8_t constprop)
+{
+    jl_code_info_flags_t flags;
+    flags.bits.pure = pure;
+    flags.bits.propagate_inbounds = propagate_inbounds;
+    flags.bits.inlineable = inlineable;
+    flags.bits.inferred = inferred;
+    flags.bits.constprop = constprop;
+    return flags;
+}
+
 // --- decoding ---
 
 static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED;
@@ -475,11 +482,11 @@ static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_D
     }
     else if (tag == TAG_CALL1) {
         len = 2;
-        head = call_sym;
+        head = jl_call_sym;
     }
     else if (tag == TAG_CALL2) {
         len = 3;
-        head = call_sym;
+        head = jl_call_sym;
     }
     else {
         len = read_int32(s->s);
@@ -702,12 +709,8 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
         jl_current_task->ptls
     };
 
-    uint8_t flags = (code->aggressive_constprop << 4)
-                  | (code->inferred << 3)
-                  | (code->inlineable << 2)
-                  | (code->propagate_inbounds << 1)
-                  | (code->pure << 0);
-    write_uint8(s.s, flags);
+    jl_code_info_flags_t flags = code_info_flags(code->pure, code->propagate_inbounds, code->inlineable, code->inferred, code->constprop);
+    write_uint8(s.s, flags.packed);
 
     size_t nslots = jl_array_len(code->slotflags);
     assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions
@@ -787,12 +790,13 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     };
 
     jl_code_info_t *code = jl_new_code_info_uninit();
-    uint8_t flags = read_uint8(s.s);
-    code->aggressive_constprop = !!(flags & (1 << 4));
-    code->inferred = !!(flags & (1 << 3));
-    code->inlineable = !!(flags & (1 << 2));
-    code->propagate_inbounds = !!(flags & (1 << 1));
-    code->pure = !!(flags & (1 << 0));
+    jl_code_info_flags_t flags;
+    flags.packed = read_uint8(s.s);
+    code->constprop = flags.bits.constprop;
+    code->inferred = flags.bits.inferred;
+    code->inlineable = flags.bits.inlineable;
+    code->propagate_inbounds = flags.bits.propagate_inbounds;
+    code->pure = flags.bits.pure;
 
     size_t nslots = read_int32(&src);
     code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots);
@@ -847,8 +851,9 @@ JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_array_t *data)
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inferred;
     assert(jl_typeis(data, jl_array_uint8_type));
-    uint8_t flags = ((uint8_t*)data->data)[0];
-    return !!(flags & (1 << 3));
+    jl_code_info_flags_t flags;
+    flags.packed = ((uint8_t*)data->data)[0];
+    return flags.bits.inferred;
 }
 
 JL_DLLEXPORT uint8_t jl_ir_flag_inlineable(jl_array_t *data)
@@ -856,8 +861,9 @@ JL_DLLEXPORT uint8_t jl_ir_flag_inlineable(jl_array_t *data)
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlineable;
     assert(jl_typeis(data, jl_array_uint8_type));
-    uint8_t flags = ((uint8_t*)data->data)[0];
-    return !!(flags & (1 << 2));
+    jl_code_info_flags_t flags;
+    flags.packed = ((uint8_t*)data->data)[0];
+    return flags.bits.inlineable;
 }
 
 JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data)
@@ -865,8 +871,9 @@ JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data)
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->pure;
     assert(jl_typeis(data, jl_array_uint8_type));
-    uint8_t flags = ((uint8_t*)data->data)[0];
-    return !!(flags & (1 << 0));
+    jl_code_info_flags_t flags;
+    flags.packed = ((uint8_t*)data->data)[0];
+    return flags.bits.pure;
 }
 
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms)
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index e86d6109ff427..886887f1d329d 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -1,7 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// Except for parts of this file which were copied from LLVM, under the UIUC license (marked below).
-
 #include "llvm-version.h"
 #include "platform.h"
 
@@ -12,6 +10,9 @@
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/ExecutionEngine/Orc/CompileUtils.h>
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
+#if JL_LLVM_VERSION >= 130000
+#include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
+#endif
 #include <llvm/Support/DynamicLibrary.h>
 #include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/SmallVectorMemoryBuffer.h>
@@ -36,7 +37,7 @@ void jl_init_jit(void) { }
 // Snooping on which functions are being compiled, and how long it takes
 JL_STREAM *dump_compiles_stream = NULL;
 extern "C" JL_DLLEXPORT
-void jl_dump_compiles(void *s)
+void jl_dump_compiles_impl(void *s)
 {
     dump_compiles_stream = (JL_STREAM*)s;
 }
@@ -75,21 +76,6 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
     }
 }
 
-extern "C" JL_DLLEXPORT
-uint64_t jl_cumulative_compile_time_ns_before()
-{
-    int tid = jl_threadid();
-    jl_measure_compile_time[tid] = 1;
-    return jl_cumulative_compile_time[tid];
-}
-extern "C" JL_DLLEXPORT
-uint64_t jl_cumulative_compile_time_ns_after()
-{
-    int tid = jl_threadid();
-    jl_measure_compile_time[tid] = 0;
-    return jl_cumulative_compile_time[tid];
-}
-
 // this generates llvm code for the lambda info
 // and adds the result to the jitlayers
 // (and the shadow module),
@@ -162,10 +148,10 @@ static jl_callptr_t _jl_compile_codeinst(
         jl_callptr_t addr;
         bool isspecsig = false;
         if (decls.functionObject == "jl_fptr_args") {
-            addr = &jl_fptr_args;
+            addr = jl_fptr_args_addr;
         }
         else if (decls.functionObject == "jl_fptr_sparam") {
-            addr = &jl_fptr_sparam;
+            addr = jl_fptr_sparam_addr;
         }
         else {
             addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
@@ -175,14 +161,14 @@ static jl_callptr_t _jl_compile_codeinst(
             // once set, don't change invoke-ptr, as that leads to race conditions
             // with the (not) simultaneous updates to invoke and specptr
             if (!decls.specFunctionObject.empty()) {
-                this_code->specptr.fptr = (void*)getAddressForFunction(decls.specFunctionObject);
+                jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
                 this_code->isspecsig = isspecsig;
             }
             jl_atomic_store_release(&this_code->invoke, addr);
         }
-        else if (this_code->invoke == jl_fptr_const_return && !decls.specFunctionObject.empty()) {
-            // hack to export this pointer value to jl_dump_method_asm
-            this_code->specptr.fptr = (void*)getAddressForFunction(decls.specFunctionObject);
+        else if (this_code->invoke == jl_fptr_const_return_addr && !decls.specFunctionObject.empty()) {
+            // hack to export this pointer value to jl_dump_method_disasm
+            jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
         }
         if (this_code== codeinst)
             fptr = addr;
@@ -228,13 +214,13 @@ static jl_callptr_t _jl_compile_codeinst(
 const char *jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
 // compile a C-callable alias
-extern "C"
-int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
+extern "C" JL_DLLEXPORT
+int jl_compile_extern_c_impl(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
 {
-    JL_LOCK(&codegen_lock);
+    JL_LOCK(&jl_codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     jl_codegen_params_t params;
     jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
@@ -258,15 +244,15 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
         if (success && llvmmod == NULL)
             jl_add_to_ee(std::unique_ptr<Module>(into));
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
-    JL_UNLOCK(&codegen_lock);
+    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+    JL_UNLOCK(&jl_codegen_lock);
     return success;
 }
 
 // declare a C-callable entry point; called during code loading from the toplevel
 extern "C" JL_DLLEXPORT
-void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
+void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
 {
     // validate arguments. try to do as many checks as possible here to avoid
     // throwing errors later during codegen.
@@ -281,10 +267,10 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
     // compute / validate return type
     if (!jl_is_concrete_type(declrt) || jl_is_kind(declrt))
         jl_error("@ccallable: return type must be concrete and correspond to a C type");
-    JL_LOCK(&codegen_lock);
+    JL_LOCK(&jl_codegen_lock);
     if (!jl_type_mappable_to_c(declrt))
         jl_error("@ccallable: return type doesn't correspond to a C type");
-    JL_UNLOCK(&codegen_lock);
+    JL_UNLOCK(&jl_codegen_lock);
 
     // validate method signature
     size_t i, nargs = jl_nparams(sigt);
@@ -295,7 +281,7 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
     }
 
     // save a record of this so that the alias is generated when we write an object file
-    jl_method_t *meth = (jl_method_t*)jl_methtable_lookup(ft->name->mt, (jl_value_t*)sigt, jl_world_counter);
+    jl_method_t *meth = (jl_method_t*)jl_methtable_lookup(ft->name->mt, (jl_value_t*)sigt, jl_atomic_load_acquire(&jl_world_counter));
     if (!jl_is_method(meth))
         jl_error("@ccallable: could not find requested method");
     JL_GC_PUSH1(&meth);
@@ -310,13 +296,13 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
 }
 
 // this compiles li and emits fptr
-extern "C"
-jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+extern "C" JL_DLLEXPORT
+jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
 {
-    JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
+    JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     // if we don't have any decls already, try to generate it now
     jl_code_info_t *src = NULL;
@@ -332,10 +318,12 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
     }
     if (src == NULL && jl_is_method(mi->def.method) &&
              jl_symbol_name(mi->def.method->name)[0] != '@') {
-        // If the caller didn't provide the source,
-        // see if it is inferred, or try to infer it for ourself.
-        // (but don't bother with typeinf on macros or toplevel thunks)
-        src = jl_type_infer(mi, world, 0);
+        if (mi->def.method->source != jl_nothing) {
+            // If the caller didn't provide the source and IR is available,
+            // see if it is inferred, or try to infer it for ourself.
+            // (but don't bother with typeinf on macros or toplevel thunks)
+            src = jl_type_infer(mi, world, 0);
+        }
     }
     jl_code_instance_t *compiled = jl_method_compiled(mi, world);
     if (compiled) {
@@ -354,23 +342,23 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
     else {
         codeinst = NULL;
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
-    JL_UNLOCK(&codegen_lock);
+    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+    JL_UNLOCK(&jl_codegen_lock);
     JL_GC_POP();
     return codeinst;
 }
 
-extern "C"
-void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
+extern "C" JL_DLLEXPORT
+void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
 {
     if (jl_atomic_load_relaxed(&unspec->invoke) != NULL) {
         return;
     }
-    JL_LOCK(&codegen_lock);
+    JL_LOCK(&jl_codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     if (unspec->invoke == NULL) {
         jl_code_info_t *src = NULL;
@@ -394,38 +382,38 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
         _jl_compile_codeinst(unspec, src, unspec->min_world);
         if (unspec->invoke == NULL) {
             // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
-            jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
+            jl_atomic_store_release(&unspec->invoke, jl_fptr_interpret_call_addr);
         }
         JL_GC_POP();
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
-    JL_UNLOCK(&codegen_lock); // Might GC
+    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+    JL_UNLOCK(&jl_codegen_lock); // Might GC
 }
 
 
 // get a native disassembly for a compiled method
 extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
-        int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
+jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
 {
     // printing via disassembly
     jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
     if (codeinst) {
-        uintptr_t fptr = (uintptr_t)codeinst->invoke;
+        uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
         if (getwrapper)
             return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
-        uintptr_t specfptr = (uintptr_t)codeinst->specptr.fptr;
-        if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) {
+        uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+        if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
             // normally we prevent native code from being generated for these functions,
             // (using sentinel value `1` instead)
             // so create an exception here so we can print pretty our lies
-            JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
+            JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
             uint64_t compiler_start_time = 0;
-            int tid = jl_threadid();
-            if (jl_measure_compile_time[tid])
+            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+            if (measure_compile_time_enabled)
                 compiler_start_time = jl_hrtime();
-            specfptr = (uintptr_t)codeinst->specptr.fptr;
+            specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
             if (specfptr == 0) {
                 jl_code_info_t *src = jl_type_infer(mi, world, 0);
                 JL_GC_PUSH1(&src);
@@ -438,28 +426,29 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
                     if (src && (jl_value_t*)src != jl_nothing)
                         src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
                 }
-                fptr = (uintptr_t)codeinst->invoke;
-                specfptr = (uintptr_t)codeinst->specptr.fptr;
+                fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+                specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                 if (src && jl_is_code_info(src)) {
-                    if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) {
+                    if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
                         fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world);
-                        specfptr = (uintptr_t)codeinst->specptr.fptr;
+                        specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                     }
                 }
                 JL_GC_POP();
             }
-            if (jl_measure_compile_time[tid])
-                jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
-            JL_UNLOCK(&codegen_lock);
+            if (measure_compile_time_enabled)
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+            JL_UNLOCK(&jl_codegen_lock);
         }
         if (specfptr != 0)
             return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary);
     }
 
     // whatever, that didn't work - use the assembler output instead
-    if (raw_mc) // eh, give up, this flag doesn't really work anyways normally
-        return (jl_value_t*)jl_pchar_to_array("", 0);
-    return jl_dump_llvm_asm(jl_get_llvmf_defn(mi, world, getwrapper, true, jl_default_cgparams), asm_variant, debuginfo);
+    void *F = jl_get_llvmf_defn(mi, world, getwrapper, true, jl_default_cgparams);
+    if (!F)
+        return jl_an_empty_string;
+    return jl_dump_function_asm(F, raw_mc, asm_variant, debuginfo, binary);
 }
 
 // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr
@@ -548,7 +537,7 @@ static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, raw
 {
     addTargetPasses(&PM, &TM);
     addOptimizationPasses(&PM, optlevel);
-    addMachinePasses(&PM, &TM);
+    addMachinePasses(&PM, &TM, optlevel);
     if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
         llvm_unreachable("Target does not support MC emission.");
 }
@@ -655,7 +644,11 @@ JuliaOJIT::JuliaOJIT(TargetMachine &TM, LLVMContext *LLVMCtx)
     MemMgr(createRTDyldMemoryManager()),
     JuliaListener(CreateJuliaJITEventListener()),
     TSCtx(std::unique_ptr<LLVMContext>(LLVMCtx)),
+#if JL_LLVM_VERSION >= 130000
+    ES(cantFail(orc::SelfExecutorProcessControl::Create())),
+#else
     ES(),
+#endif
     GlobalJD(ES.createBareJITDylib("JuliaGlobals")),
     JD(ES.createBareJITDylib("JuliaOJIT")),
     ObjectLayer(
@@ -825,18 +818,19 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
 static int globalUniqueGeneratedNames;
 StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst)
 {
-    auto &fname = ReverseLocalSymbolTable[(void*)(uintptr_t)Addr];
-    if (fname.empty()) {
+    std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr];
+    if (fname->empty()) {
         std::string string_fname;
         raw_string_ostream stream_fname(string_fname);
         // try to pick an appropriate name that describes it
-        if (Addr == (uintptr_t)codeinst->invoke) {
+        jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        if (Addr == (uintptr_t)invoke) {
             stream_fname << "jsysw_";
         }
-        else if (codeinst->invoke == &jl_fptr_args) {
+        else if (invoke == jl_fptr_args_addr) {
             stream_fname << "jsys1_";
         }
-        else if (codeinst->invoke == &jl_fptr_sparam) {
+        else if (invoke == jl_fptr_sparam_addr) {
             stream_fname << "jsys3_";
         }
         else {
@@ -844,10 +838,10 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
         }
         const char* unadorned_name = jl_symbol_name(codeinst->def->def.method->name);
         stream_fname << unadorned_name << "_" << globalUniqueGeneratedNames++;
-        fname = strdup(stream_fname.str().c_str());
-        addGlobalMapping(fname, Addr);
+        *fname = std::move(stream_fname.str()); // store to ReverseLocalSymbolTable
+        addGlobalMapping(*fname, Addr);
     }
-    return fname;
+    return *fname;
 }
 
 
@@ -1140,7 +1134,7 @@ void add_named_global(StringRef name, void *addr)
 }
 
 extern "C" JL_DLLEXPORT
-size_t jl_jit_total_bytes(void)
+size_t jl_jit_total_bytes_impl(void)
 {
     return jl_ExecutionEngine->getTotalBytes();
 }
diff --git a/src/jitlayers.h b/src/jitlayers.h
index e3cc9245932ac..70a017b4f720b 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -16,12 +16,14 @@
 
 using namespace llvm;
 
+extern "C" jl_cgparams_t jl_default_cgparams;
+
 extern TargetMachine *jl_TargetMachine;
 extern bool imaging_mode;
 
 void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM);
 void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false);
-void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM);
+void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM, int optlevel);
 void jl_finalize_module(std::unique_ptr<Module>  m);
 void jl_merge_module(Module *dest, std::unique_ptr<Module> src);
 Module *jl_create_llvm_module(StringRef name);
@@ -50,7 +52,7 @@ struct jl_returninfo_t {
 typedef std::vector<std::tuple<jl_code_instance_t*, jl_returninfo_t::CallingConv, unsigned, llvm::Function*, bool>> jl_codegen_call_targets_t;
 typedef std::tuple<std::unique_ptr<Module>, jl_llvm_functions_t> jl_compile_result_t;
 
-typedef struct {
+typedef struct _jl_codegen_params_t {
     typedef StringMap<GlobalVariable*> SymMapGV;
     // outputs
     jl_codegen_call_targets_t workqueue;
@@ -63,7 +65,8 @@ typedef struct {
     StringMap<std::pair<GlobalVariable*,SymMapGV>> libMapGV;
 #ifdef _OS_WINDOWS_
     SymMapGV symMapExe;
-    SymMapGV symMapDl;
+    SymMapGV symMapDll;
+    SymMapGV symMapDlli;
 #endif
     SymMapGV symMapDefault;
     // Map from distinct callee's to its GOT entry.
@@ -223,7 +226,7 @@ class JuliaOJIT {
     ObjLayerT ObjectLayer;
     CompileLayerT CompileLayer;
 
-    DenseMap<void*, StringRef> ReverseLocalSymbolTable;
+    DenseMap<void*, std::string> ReverseLocalSymbolTable;
 };
 extern JuliaOJIT *jl_ExecutionEngine;
 
@@ -240,6 +243,7 @@ Pass *createJuliaLICMPass();
 Pass *createMultiVersioningPass();
 Pass *createAllocOptPass();
 Pass *createDemoteFloat16Pass();
+Pass *createCPUFeaturesPass();
 // Whether the Function is an llvm or julia intrinsic.
 static inline bool isIntrinsicFunction(Function *F)
 {
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index 3cebe459bf643..588d2a831e225 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 // Pointers that are exposed through the public libjulia
 #define JL_EXPORTED_DATA_POINTERS(XX) \
     XX(jl_abstractarray_type) \
@@ -74,6 +76,7 @@
     XX(jl_number_type) \
     XX(jl_opaque_closure_type) \
     XX(jl_opaque_closure_typename) \
+    XX(jl_pair_type) \
     XX(jl_partial_opaque_type) \
     XX(jl_partial_struct_type) \
     XX(jl_phicnode_type) \
@@ -120,8 +123,9 @@
     XX(jl_vecelement_typename) \
     XX(jl_voidpointer_type) \
     XX(jl_void_type) \
-    XX(jl_weakref_type) \
+    XX(jl_weakref_type)
 
 // Data symbols that are defined inside the public libjulia
 #define JL_EXPORTED_DATA_SYMBOLS(XX) \
-    XX(jl_n_threads, int)
+    XX(jl_n_threads, int) \
+    XX(jl_options, jl_options_t)
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 0e82304d82e5c..6d09309cfe310 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -1,6 +1,7 @@
-#define JL_EXPORTED_FUNCS(XX) \
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#define JL_RUNTIME_EXPORTED_FUNCS(XX) \
     XX(jl_active_task_stack) \
-    XX(jl_add_optimization_passes) \
     XX(jl_add_standard_imports) \
     XX(jl_alignment) \
     XX(jl_alloc_array_1d) \
@@ -11,6 +12,7 @@
     XX(jl_alloc_svec_uninit) \
     XX(jl_alloc_vec_any) \
     XX(jl_apply_array_type) \
+    XX(jl_apply_cmpswap_type) \
     XX(jl_apply_generic) \
     XX(jl_apply_tuple_type) \
     XX(jl_apply_tuple_type_v) \
@@ -21,7 +23,6 @@
     XX(jl_argument_method_table) \
     XX(jl_array_cconvert_cstring) \
     XX(jl_array_copy) \
-    XX(jl_array_data_owner) \
     XX(jl_array_del_at) \
     XX(jl_array_del_beg) \
     XX(jl_array_del_end) \
@@ -31,6 +32,7 @@
     XX(jl_array_grow_end) \
     XX(jl_array_isassigned) \
     XX(jl_arraylen) \
+    XX(jl_have_fma) \
     XX(jl_array_ptr) \
     XX(jl_array_ptr_1d_append) \
     XX(jl_array_ptr_1d_push) \
@@ -43,8 +45,13 @@
     XX(jl_array_to_string) \
     XX(jl_array_typetagdata) \
     XX(jl_arrayunset) \
-    XX(jl_astaggedvalue) \
     XX(jl_atexit_hook) \
+    XX(jl_atomic_bool_cmpswap_bits) \
+    XX(jl_atomic_cmpswap_bits) \
+    XX(jl_atomic_error) \
+    XX(jl_atomic_new_bits) \
+    XX(jl_atomic_store_bits) \
+    XX(jl_atomic_swap_bits) \
     XX(jl_backtrace_from_here) \
     XX(jl_base_relative_to) \
     XX(jl_binding_owner) \
@@ -87,8 +94,6 @@
     XX(jl_cglobal_auto) \
     XX(jl_checked_assignment) \
     XX(jl_clear_implicit_imports) \
-    XX(jl_clear_malloc_data) \
-    XX(jl_clock_now) \
     XX(jl_close_uv) \
     XX(jl_code_for_staged) \
     XX(jl_compile_hint) \
@@ -97,11 +102,8 @@
     XX(jl_compute_fieldtypes) \
     XX(jl_copy_ast) \
     XX(jl_copy_code_info) \
-    XX(jl_cpu_pause) \
     XX(jl_cpu_threads) \
-    XX(jl_cpu_wake) \
     XX(jl_crc32c_sw) \
-    XX(jl_create_native) \
     XX(jl_create_system_image) \
     XX(jl_cstr_to_string) \
     XX(jl_current_exception) \
@@ -112,13 +114,7 @@
     XX(jl_dlclose) \
     XX(jl_dlopen) \
     XX(jl_dlsym) \
-    XX(jl_dump_compiles) \
-    XX(jl_dump_fptr_asm) \
-    XX(jl_dump_function_ir) \
     XX(jl_dump_host_cpu) \
-    XX(jl_dump_llvm_asm) \
-    XX(jl_dump_method_asm) \
-    XX(jl_egal) \
     XX(jl_egal__bits) \
     XX(jl_egal__special) \
     XX(jl_eh_restore_state) \
@@ -143,17 +139,17 @@
     XX(jl_exit_on_sigint) \
     XX(jl_exit_threaded_region) \
     XX(jl_expand) \
+    XX(jl_resolve_globals_in_ir) \
     XX(jl_expand_and_resolve) \
     XX(jl_expand_stmt) \
     XX(jl_expand_stmt_with_loc) \
     XX(jl_expand_with_loc) \
     XX(jl_expand_with_loc_warn) \
-    XX(jl_extern_c) \
     XX(jl_field_index) \
+    XX(jl_field_isdefined) \
     XX(jl_gc_add_finalizer) \
     XX(jl_gc_add_finalizer_th) \
     XX(jl_gc_add_ptr_finalizer) \
-    XX(jl_gc_alloc) \
     XX(jl_gc_alloc_0w) \
     XX(jl_gc_alloc_1w) \
     XX(jl_gc_alloc_2w) \
@@ -188,9 +184,6 @@
     XX(jl_gc_pool_alloc) \
     XX(jl_gc_queue_multiroot) \
     XX(jl_gc_queue_root) \
-    XX(jl_gc_safe_enter) \
-    XX(jl_gc_safe_leave) \
-    XX(jl_gc_safepoint) \
     XX(jl_gc_schedule_foreign_sweepfunc) \
     XX(jl_gc_set_cb_notify_external_alloc) \
     XX(jl_gc_set_cb_notify_external_free) \
@@ -200,8 +193,6 @@
     XX(jl_gc_set_cb_task_scanner) \
     XX(jl_gc_sync_total_bytes) \
     XX(jl_gc_total_hrtime) \
-    XX(jl_gc_unsafe_enter) \
-    XX(jl_gc_unsafe_leave) \
     XX(jl_gdblookup) \
     XX(jl_generating_output) \
     XX(jl_generic_function_def) \
@@ -213,7 +204,6 @@
     XX(jl_get_binding_for_method_def) \
     XX(jl_get_binding_or_error) \
     XX(jl_get_binding_wr) \
-    XX(jl_get_cfunction_trampoline) \
     XX(jl_get_cpu_name) \
     XX(jl_get_current_task) \
     XX(jl_get_default_sysimg_path) \
@@ -221,8 +211,6 @@
     XX(jl_get_fenv_consts) \
     XX(jl_get_field) \
     XX(jl_get_field_offset) \
-    XX(jl_get_fieldtypes) \
-    XX(jl_get_function_id) \
     XX(jl_get_global) \
     XX(jl_get_image_file) \
     XX(jl_get_JIT) \
@@ -230,11 +218,6 @@
     XX(jl_get_julia_bindir) \
     XX(jl_get_keyword_sorter) \
     XX(jl_get_kwsorter) \
-    XX(jl_get_llvm_context) \
-    XX(jl_get_llvmf_defn) \
-    XX(jl_get_llvm_function) \
-    XX(jl_get_llvm_module) \
-    XX(jl_get_LLVM_VERSION) \
     XX(jl_get_method_inferred) \
     XX(jl_get_module_binding) \
     XX(jl_get_module_compile) \
@@ -254,7 +237,6 @@
     XX(jl_get_safe_restore) \
     XX(jl_get_size) \
     XX(jl_get_task_tid) \
-    XX(jl_gettimeofday) \
     XX(jl_get_tls_world_age) \
     XX(jl_get_UNAME) \
     XX(jl_get_world_counter) \
@@ -270,8 +252,6 @@
     XX(jl_has_typevar) \
     XX(jl_has_typevar_from_unionall) \
     XX(jl_hrtime) \
-    XX(jl_id_char) \
-    XX(jl_id_start_char) \
     XX(jl_idtable_rehash) \
     XX(jl_infer_thunk) \
     XX(jl_init) \
@@ -279,6 +259,7 @@
     XX(jl_init_restored_modules) \
     XX(jl_init_with_image) \
     XX(jl_init_with_image__threading) \
+    XX(jl_init_options) \
     XX(jl_install_sigint_handler) \
     XX(jl_instantiate_type_in_env) \
     XX(jl_instantiate_unionall) \
@@ -378,7 +359,6 @@
     XX(jl_object_id_) \
     XX(jl_obvious_subtype) \
     XX(jl_operator_precedence) \
-    XX(jl_op_suffix_char) \
     XX(jl_parse) \
     XX(jl_parse_all) \
     XX(jl_parse_input_line) \
@@ -460,10 +440,6 @@
     XX(jl_stored_inline) \
     XX(jl_string_ptr) \
     XX(jl_string_to_array) \
-    XX(jl_strtod_c) \
-    XX(jl_strtof_c) \
-    XX(jl_substrtod) \
-    XX(jl_substrtof) \
     XX(jl_subtype) \
     XX(jl_subtype_env) \
     XX(jl_subtype_env_size) \
@@ -473,21 +449,18 @@
     XX(jl_svec_copy) \
     XX(jl_svec_fill) \
     XX(jl_svec_isassigned) \
-    XX(jl_svec_len) \
     XX(jl_svec_ref) \
     XX(jl_switch) \
     XX(jl_switchto) \
     XX(jl_symbol) \
     XX(jl_symbol_lookup) \
     XX(jl_symbol_n) \
-    XX(jl_symbol_name) \
     XX(jl_tagged_gensym) \
     XX(jl_take_buffer) \
     XX(jl_task_get_next) \
     XX(jl_task_stack_buffer) \
     XX(jl_test_cpu_feature) \
     XX(jl_threadid) \
-    XX(jl_threading_enabled) \
     XX(jl_throw) \
     XX(jl_throw_out_of_memory_error) \
     XX(jl_too_few_args) \
@@ -509,10 +482,8 @@
     XX(jl_type_morespecific) \
     XX(jl_type_morespecific_no_subtype) \
     XX(jl_typename_str) \
-    XX(jl_typeof) \
     XX(jl_typeof_str) \
     XX(jl_types_equal) \
-    XX(jl_type_to_llvm) \
     XX(jl_type_union) \
     XX(jl_type_unionall) \
     XX(jl_unbox_bool) \
@@ -532,7 +503,6 @@
     XX(jl_uncompress_argnames) \
     XX(jl_uncompress_ir) \
     XX(jl_undefined_var_error) \
-    XX(jl_valueof) \
     XX(jl_value_ptr) \
     XX(jl_ver_is_release) \
     XX(jl_ver_major) \
@@ -543,3 +513,54 @@
     XX(jl_vprintf) \
     XX(jl_wakeup_thread) \
     XX(jl_yield) \
+
+#define JL_RUNTIME_EXPORTED_FUNCS_WIN(XX) \
+    XX(jl_setjmp)
+
+// use YY instead of XX to avoid jl -> ijl renaming in libjulia-codegen
+#define JL_CODEGEN_EXPORTED_FUNCS(YY) \
+    YY(jl_dump_function_ir) \
+    YY(jl_dump_method_asm) \
+    YY(jl_extern_c) \
+    YY(jl_get_llvm_context) \
+    YY(jl_get_llvmf_defn) \
+    YY(jl_get_llvm_function) \
+    YY(jl_get_llvm_module) \
+    YY(jl_get_LLVM_VERSION) \
+    YY(jl_dump_native) \
+    YY(jl_get_llvm_gv) \
+    YY(jl_dump_function_asm) \
+    YY(jl_LLVMCreateDisasm) \
+    YY(jl_LLVMDisasmInstruction) \
+    YY(jl_init_codegen) \
+    YY(jl_getFunctionInfo) \
+    YY(jl_register_fptrs) \
+    YY(jl_generate_fptr) \
+    YY(jl_generate_fptr_for_unspecialized) \
+    YY(jl_compile_extern_c) \
+    YY(jl_teardown_codegen) \
+    YY(jl_jit_total_bytes) \
+    YY(jl_lock_profile) \
+    YY(jl_unlock_profile) \
+    YY(jl_create_native) \
+    YY(jl_dump_compiles) \
+    YY(jl_dump_fptr_asm) \
+    YY(jl_get_function_id) \
+    YY(jl_type_to_llvm) \
+    YY(jl_getUnwindInfo) \
+    YY(jl_get_libllvm) \
+    YY(LLVMExtraAddLowerSimdLoopPass) \
+    YY(LLVMExtraAddFinalLowerGCPass) \
+    YY(LLVMExtraAddPropagateJuliaAddrspaces) \
+    YY(LLVMExtraAddRemoveJuliaAddrspacesPass) \
+    YY(LLVMExtraAddCombineMulAddPass) \
+    YY(LLVMExtraAddMultiVersioningPass) \
+    YY(LLVMExtraAddLowerExcHandlersPass) \
+    YY(LLVMExtraAddLateLowerGCFramePass) \
+    YY(LLVMExtraJuliaLICMPass) \
+    YY(LLVMExtraAddAllocOptPass) \
+    YY(LLVMExtraAddLowerPTLSPass) \
+    YY(LLVMExtraAddRemoveNIPass) \
+    YY(LLVMExtraAddGCInvariantVerifierPass) \
+    YY(LLVMExtraAddDemoteFloat16Pass) \
+
diff --git a/src/jl_uv.c b/src/jl_uv.c
index 18733926e8c54..2c9c129d7bbf6 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -22,11 +22,6 @@
 #include "support/ios.h"
 #include "uv.h"
 
-#if defined(_COMPILER_MICROSOFT_) && !defined(write)
-#include <io.h>
-#define write _write
-#endif
-
 #include "julia_assert.h"
 
 #ifdef __cplusplus
@@ -57,7 +52,7 @@ void jl_init_uv(void)
     JL_MUTEX_INIT(&jl_uv_mutex); // a file-scope initializer can be used instead
 }
 
-int jl_uv_n_waiters = 0;
+_Atomic(int) jl_uv_n_waiters = 0;
 
 void JL_UV_LOCK(void)
 {
@@ -107,7 +102,7 @@ static void jl_uv_closeHandle(uv_handle_t *handle)
     if (handle->type != UV_FILE && handle->data) {
         jl_task_t *ct = jl_current_task;
         size_t last_age = ct->world_age;
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_uv_call_close_callback((jl_value_t*)handle->data);
         ct->world_age = last_age;
     }
@@ -201,14 +196,14 @@ JL_DLLEXPORT void jl_uv_req_set_data(uv_req_t *req, void *data) { req->data = da
 JL_DLLEXPORT void *jl_uv_handle_data(uv_handle_t *handle) { return handle->data; }
 JL_DLLEXPORT void *jl_uv_write_handle(uv_write_t *req) { return req->handle; }
 
-extern volatile unsigned _threadedregion;
+extern _Atomic(unsigned) _threadedregion;
 
 JL_DLLEXPORT int jl_process_events(void)
 {
     jl_task_t *ct = jl_current_task;
     uv_loop_t *loop = jl_io_loop;
     jl_gc_safepoint_(ct->ptls);
-    if (loop && (_threadedregion || ct->tid == 0)) {
+    if (loop && (jl_atomic_load_relaxed(&_threadedregion) || jl_atomic_load_relaxed(&ct->tid) == 0)) {
         if (jl_atomic_load(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
             loop->stop_flag = 0;
             int r = uv_run(loop, UV_RUN_NOWAIT);
@@ -369,6 +364,14 @@ JL_DLLEXPORT int jl_fs_sendfile(uv_os_fd_t src_fd, uv_os_fd_t dst_fd,
     return ret;
 }
 
+JL_DLLEXPORT int jl_fs_hardlink(char *path, char *new_path)
+{
+    uv_fs_t req;
+    int ret = uv_fs_link(unused_uv_loop_arg, &req, path, new_path, NULL);
+    uv_fs_req_cleanup(&req);
+    return ret;
+}
+
 JL_DLLEXPORT int jl_fs_symlink(char *path, char *new_path, int flags)
 {
     uv_fs_t req;
@@ -406,7 +409,7 @@ JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
 {
     jl_task_t *ct = jl_get_current_task();
     // TODO: fix this cheating
-    if (jl_get_safe_restore() || ct == NULL || ct->tid != 0)
+    if (jl_get_safe_restore() || ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0)
 #ifdef _OS_WINDOWS_
         return WriteFile(handle, data, len, NULL, NULL);
 #else
@@ -506,7 +509,7 @@ JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n)
 
     // TODO: Hack to make CoreIO thread-safer
     jl_task_t *ct = jl_get_current_task();
-    if (ct == NULL || ct->tid != 0) {
+    if (ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0) {
         if (stream == JL_STDOUT) {
             fd = UV_STDOUT_FD;
         }
diff --git a/src/jlapi.c b/src/jlapi.c
index 2192d4cee665f..a47af5360520a 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -433,7 +433,7 @@ JL_DLLEXPORT jl_value_t *(jl_get_fieldtypes)(jl_value_t *v)
 }
 
 
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
 JL_DLLEXPORT int8_t (jl_gc_unsafe_enter)(void)
 {
     jl_task_t *ct = jl_current_task;
@@ -475,6 +475,20 @@ JL_DLLEXPORT void (jl_cpu_wake)(void)
     jl_cpu_wake();
 }
 
+JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_before(void)
+{
+    // Increment the flag to allow reentrant callers to `@time`.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
+    return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
+}
+
+JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after(void)
+{
+    // Decrement the flag when done measuring, allowing other callers to continue measuring.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
+    return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
+}
+
 JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
 {
     ret[0] = FE_INEXACT;
@@ -501,17 +515,6 @@ JL_DLLEXPORT int jl_set_fenv_rounding(int i)
     return fesetround(i);
 }
 
-
-#ifdef JL_ASAN_ENABLED
-JL_DLLEXPORT const char* __asan_default_options()
-{
-    return "allow_user_segv_handler=1:detect_leaks=0";
-    // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
-    //        or defining __lsan_default_options = exitcode=0 once publicly available
-    //        (here and in flisp/flmain.c)
-}
-#endif
-
 static int exec_program(char *program)
 {
     JL_TRY {
@@ -615,8 +618,7 @@ static NOINLINE int true_main(int argc, char *argv[])
 static void lock_low32(void)
 {
 #if defined(_OS_WINDOWS_) && defined(_P64) && defined(JL_DEBUG_BUILD)
-    // Wine currently has a that causes it to answer VirtualQuery incorrectly.
-    // block usage of the 32-bit address space on win64, to catch pointer cast errors
+    // Prevent usage of the 32-bit address space on Win64, to catch pointer cast errors.
     char *const max32addr = (char*)0xffffffffL;
     SYSTEM_INFO info;
     MEMORY_BASIC_INFORMATION meminfo;
@@ -640,11 +642,12 @@ static void lock_low32(void)
                 if ((char*)p != first)
                     // Wine and Windows10 seem to have issues with reporting memory access information correctly
                     // so we sometimes end up with unexpected results - this is just ignore those and continue
-                    // this is just a debugging aid to help find accidental pointer truncation anyways, so it's not critical
+                    // this is just a debugging aid to help find accidental pointer truncation anyways,
+                    // so it is not critical
                     VirtualFree(p, 0, MEM_RELEASE);
             }
         }
-        meminfo.BaseAddress += meminfo.RegionSize;
+        meminfo.BaseAddress = (void*)((char*)meminfo.BaseAddress + meminfo.RegionSize);
     }
 #endif
     return;
@@ -653,16 +656,16 @@ static void lock_low32(void)
 // Actual definition in `ast.c`
 void jl_lisp_prompt(void);
 
-static void rr_detach_teleport(void) {
 #ifdef _OS_LINUX_
+static void rr_detach_teleport(void) {
 #define RR_CALL_BASE 1000
 #define SYS_rrcall_detach_teleport (RR_CALL_BASE + 9)
     int err = syscall(SYS_rrcall_detach_teleport, 0, 0, 0, 0, 0, 0);
     if (err < 0 || jl_running_under_rr(1)) {
         jl_error("Failed to detach from rr session");
     }
-#endif
 }
+#endif
 
 JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
 {
@@ -679,16 +682,18 @@ JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
         memmove(&argv[1], &argv[2], (argc-2)*sizeof(void*));
         argc--;
     }
-    char **orig_argv = argv;
-    jl_parse_opts(&argc, (char***)&argv);
+    char **new_argv = argv;
+    jl_parse_opts(&argc, (char***)&new_argv);
 
     // The parent process requested that we detach from the rr session.
     // N.B.: In a perfect world, we would only do this for the portion of
     // the execution where we actually need to exclude rr (e.g. because we're
     // testing for the absence of a memory-model-dependent bug).
     if (jl_options.rr_detach && jl_running_under_rr(0)) {
+#ifdef _OS_LINUX_
         rr_detach_teleport();
-        execv("/proc/self/exe", orig_argv);
+        execv("/proc/self/exe", argv);
+#endif
         jl_error("Failed to self-execute");
     }
 
@@ -698,7 +703,7 @@ JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
         jl_lisp_prompt();
         return 0;
     }
-    int ret = true_main(argc, (char**)argv);
+    int ret = true_main(argc, (char**)new_argv);
     jl_atexit_hook(ret);
     return ret;
 }
diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm
index fb3e732d41ca0..0737a343f1e14 100644
--- a/src/jlfrontend.scm
+++ b/src/jlfrontend.scm
@@ -153,13 +153,14 @@
 (define (jl-expand-to-thunk-warn expr file line stmt)
   (let ((warnings '()))
     (with-bindings
-     ((lowering-warning (lambda lst (set! warnings (cons lst warnings)))))
-     (begin0
-      (if stmt
-          (expand-to-thunk-stmt- expr file line)
-          (expand-to-thunk- expr file line))
-      (for-each (lambda (args) (apply julia-logmsg args))
-                (reverse warnings))))))
+     ;; Abuse scm_to_julia here to convert arguments to warn. This is meant for
+     ;; `Expr`s but should be good enough provided we're only passing simple
+     ;; numbers, symbols and strings.
+     ((lowering-warning (lambda lst (set! warnings (cons (cons 'warn lst) warnings)))))
+     (let ((thunk (if stmt
+                      (expand-to-thunk-stmt- expr file line)
+                      (expand-to-thunk- expr file line))))
+       (if (pair? warnings) `(warn ,@(reverse warnings) ,thunk) thunk)))))
 
 (define (jl-expand-to-thunk expr file line)
   (expand-to-thunk- expr file line))
@@ -214,16 +215,6 @@
 ; Utilities for logging messages from the frontend, in a way which can be
 ; controlled from julia code.
 
-; Log a general deprecation message at line node location `lno`
-(define (deprecation-message msg lno)
-  (let* ((lf (extract-line-file lno)) (line (car lf)) (file (cadr lf)))
-    (frontend-depwarn msg file line)))
-
-; Log a syntax deprecation from line node location `lno`
-(define (syntax-deprecation what instead lno)
-  (let* ((lf (extract-line-file lno)) (line (car lf)) (file (cadr lf)))
-    (deprecation-message (format-syntax-deprecation what instead file line #f) lno)))
-
 ; Extract line and file from a line number node, defaulting to (0, none)
 ; respectively if lno is absent (`#f`) or doesn't contain a file
 (define (extract-line-file lno)
@@ -241,21 +232,4 @@
       ""
       (string (if exactloc " at " " around ") file ":" line)))
 
-(define (format-syntax-deprecation what instead file line exactloc)
-  (string "Deprecated syntax `" what "`"
-          (format-file-line file line exactloc)
-          "."
-          (if (equal? instead "") ""
-              (string #\newline "Use `" instead "` instead."))))
-
 (define *scopewarn-opt* 1)
-
-; Corresponds to --depwarn 0="no", 1="yes", 2="error"
-(define *depwarn-opt* 1)
-
-; Emit deprecation warning via julia logging layer.
-(define (frontend-depwarn msg file line)
-  ; (display (string msg "; file = " file "; line = " line #\newline)))
-  (case *depwarn-opt*
-    (1 (julia-logmsg 1000 'depwarn (symbol (string file line)) file line msg))
-    (2 (error msg))))
diff --git a/src/jloptions.c b/src/jloptions.c
index ab1af72a04c79..1ff4da7c5c10b 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -5,12 +5,8 @@
 
 #include "julia.h"
 
-#ifndef _MSC_VER
 #include <unistd.h>
 #include <getopt.h>
-#else
-#include "getopt.h"
-#endif
 #include "julia_assert.h"
 
 #ifdef _OS_WINDOWS_
@@ -27,57 +23,67 @@ JL_DLLEXPORT const char *jl_get_default_sysimg_path(void)
     return &system_image_path[1];
 }
 
+static int jl_options_initialized = 0;
 
-jl_options_t jl_options = { 0,    // quiet
-                            -1,   // banner
-                            NULL, // julia_bindir
-                            NULL, // julia_bin
-                            NULL, // cmds
-                            NULL, // image_file (will be filled in below)
-                            NULL, // cpu_target ("native", "core2", etc...)
-                            0,    // nthreads
-                            0,    // nprocs
-                            NULL, // machine_file
-                            NULL, // project
-                            0,    // isinteractive
-                            0,    // color
-                            JL_OPTIONS_HISTORYFILE_ON, // history file
-                            0,    // startup file
-                            JL_OPTIONS_COMPILE_DEFAULT, // compile_enabled
-                            0,    // code_coverage
-                            0,    // malloc_log
-                            2,    // opt_level
-                            0,    // opt_level_min
+JL_DLLEXPORT void jl_init_options(void)
+{
+    if (jl_options_initialized)
+        return;
+    jl_options =
+        (jl_options_t){ 0,    // quiet
+                        -1,   // banner
+                        NULL, // julia_bindir
+                        NULL, // julia_bin
+                        NULL, // cmds
+                        NULL, // image_file (will be filled in below)
+                        NULL, // cpu_target ("native", "core2", etc...)
+                        0,    // nthreads
+                        0,    // nprocs
+                        NULL, // machine_file
+                        NULL, // project
+                        0,    // isinteractive
+                        0,    // color
+                        JL_OPTIONS_HISTORYFILE_ON, // history file
+                        0,    // startup file
+                        JL_OPTIONS_COMPILE_DEFAULT, // compile_enabled
+                        0,    // code_coverage
+                        0,    // malloc_log
+                        2,    // opt_level
+                        0,    // opt_level_min
 #ifdef JL_DEBUG_BUILD
-                            2,    // debug_level [debug build]
+                        2,    // debug_level [debug build]
 #else
-                            1,    // debug_level [release build]
+                        1,    // debug_level [release build]
 #endif
-                            JL_OPTIONS_CHECK_BOUNDS_DEFAULT, // check_bounds
-                            JL_OPTIONS_DEPWARN_OFF,    // deprecation warning
-                            0,    // method overwrite warning
-                            1,    // can_inline
-                            JL_OPTIONS_POLLY_ON, // polly
-                            NULL, // trace_compile
-                            JL_OPTIONS_FAST_MATH_DEFAULT,
-                            0,    // worker
-                            NULL, // cookie
-                            JL_OPTIONS_HANDLE_SIGNALS_ON,
-                            JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES,
-                            JL_OPTIONS_USE_COMPILED_MODULES_YES,
-                            NULL, // bind-to
-                            NULL, // output-bc
-                            NULL, // output-unopt-bc
-                            NULL, // output-o
-                            NULL, // output-asm
-                            NULL, // output-ji
-                            NULL,    // output-code_coverage
-                            0, // incremental
-                            0, // image_file_specified
-                            JL_OPTIONS_WARN_SCOPE_ON,  // ambiguous scope warning
-                            0, // image-codegen
-                            0, // rr-detach
-};
+                        JL_OPTIONS_CHECK_BOUNDS_DEFAULT, // check_bounds
+                        JL_OPTIONS_DEPWARN_OFF,    // deprecation warning
+                        0,    // method overwrite warning
+                        1,    // can_inline
+                        JL_OPTIONS_POLLY_ON, // polly
+                        NULL, // trace_compile
+                        JL_OPTIONS_FAST_MATH_DEFAULT,
+                        0,    // worker
+                        NULL, // cookie
+                        JL_OPTIONS_HANDLE_SIGNALS_ON,
+                        JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES,
+                        JL_OPTIONS_USE_COMPILED_MODULES_YES,
+                        NULL, // bind-to
+                        NULL, // output-bc
+                        NULL, // output-unopt-bc
+                        NULL, // output-o
+                        NULL, // output-asm
+                        NULL, // output-ji
+                        NULL,    // output-code_coverage
+                        0, // incremental
+                        0, // image_file_specified
+                        JL_OPTIONS_WARN_SCOPE_ON,  // ambiguous scope warning
+                        0, // image-codegen
+                        0, // rr-detach
+                        0, // strip-metadata
+                        0, // strip-ir
+    };
+    jl_options_initialized = 1;
+}
 
 static const char usage[] = "julia [switches] -- [programfile] [args...]\n";
 static const char opts[]  =
@@ -124,7 +130,7 @@ static const char opts[]  =
     " -C, --cpu-target <target> Limit usage of CPU features up to <target>; set to \"help\" to see the available options\n"
     " -O, --optimize={0,1,2,3}  Set the optimization level (default level is 2 if unspecified or 3 if used without a level)\n"
     " --min-optlevel={0,1,2,3}  Set a lower bound on the optimization level (default is 0)\n"
-    " -g, -g <level>            Enable / Set the level of debug info generation"
+    " -g, -g <level>            Enable or set the level of debug info generation"
 #ifdef JL_DEBUG_BUILD
         " (default level for julia-debug is 2 if unspecified or if used without a level)\n"
 #else
@@ -159,6 +165,8 @@ static const char opts_hidden[]  =
     // compiler output options
     " --output-o name           Generate an object file (including system image data)\n"
     " --output-ji name          Generate a system image data file (.ji)\n"
+    " --strip-metadata          Remove docstrings and source location info from system image\n"
+    " --strip-ir                Remove IR (intermediate representation) of compiled functions\n"
 
     // compiler debugging (see the devdocs for tips on using these options)
     " --output-unopt-bc name    Generate unoptimized LLVM bitcode (.bc)\n"
@@ -208,6 +216,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_bug_report,
            opt_image_codegen,
            opt_rr_detach,
+           opt_strip_metadata,
+           opt_strip_ir,
     };
     static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
     static const struct option longopts[] = {
@@ -261,6 +271,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "lisp",            no_argument,       0, 1 },
         { "image-codegen",   no_argument,       0, opt_image_codegen },
         { "rr-detach",       no_argument,       0, opt_rr_detach },
+        { "strip-metadata",  no_argument,       0, opt_strip_metadata },
+        { "strip-ir",        no_argument,       0, opt_strip_ir },
         { 0, 0, 0, 0 }
     };
 
@@ -685,6 +697,12 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         case opt_rr_detach:
             jl_options.rr_detach = 1;
             break;
+        case opt_strip_metadata:
+            jl_options.strip_metadata = 1;
+            break;
+        case opt_strip_ir:
+            jl_options.strip_ir = 1;
+            break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
                       "This is a bug, please report it.", c);
diff --git a/src/jloptions.h b/src/jloptions.h
new file mode 100644
index 0000000000000..0f53bc0f8a4de
--- /dev/null
+++ b/src/jloptions.h
@@ -0,0 +1,58 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_JLOPTIONS_H
+#define JL_JLOPTIONS_H
+
+// NOTE: This struct needs to be kept in sync with JLOptions type in base/options.jl
+
+typedef struct {
+    int8_t quiet;
+    int8_t banner;
+    const char *julia_bindir;
+    const char *julia_bin;
+    const char **cmds;
+    const char *image_file;
+    const char *cpu_target;
+    int32_t nthreads;
+    int32_t nprocs;
+    const char *machine_file;
+    const char *project;
+    int8_t isinteractive;
+    int8_t color;
+    int8_t historyfile;
+    int8_t startupfile;
+    int8_t compile_enabled;
+    int8_t code_coverage;
+    int8_t malloc_log;
+    int8_t opt_level;
+    int8_t opt_level_min;
+    int8_t debug_level;
+    int8_t check_bounds;
+    int8_t depwarn;
+    int8_t warn_overwrite;
+    int8_t can_inline;
+    int8_t polly;
+    const char *trace_compile;
+    int8_t fast_math;
+    int8_t worker;
+    const char *cookie;
+    int8_t handle_signals;
+    int8_t use_sysimage_native_code;
+    int8_t use_compiled_modules;
+    const char *bindto;
+    const char *outputbc;
+    const char *outputunoptbc;
+    const char *outputo;
+    const char *outputasm;
+    const char *outputji;
+    const char *output_code_coverage;
+    int8_t incremental;
+    int8_t image_file_specified;
+    int8_t warn_scope;
+    int8_t image_codegen;
+    int8_t rr_detach;
+    int8_t strip_metadata;
+    int8_t strip_ir;
+} jl_options_t;
+
+#endif
diff --git a/src/jltypes.c b/src/jltypes.c
index 1ae49c0a32eab..f14b364bda9ff 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -19,6 +19,8 @@
 extern "C" {
 #endif
 
+_Atomic(jl_value_t*) cmpswap_names JL_GLOBALLY_ROOTED;
+
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 #define h2index(hv, sz) (size_t)((hv) & ((sz)-1))
@@ -44,12 +46,10 @@ static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
                layout_uses_free_typevars(((jl_uniontype_t*)v)->b, env);
     if (jl_is_vararg(v)) {
         jl_vararg_t *vm = (jl_vararg_t*)v;
-        if (vm->T) {
-            if (layout_uses_free_typevars(vm->T, env))
-                return 1;
-            if (vm->N && layout_uses_free_typevars(vm->N, env))
-                return 1;
-        }
+        if (vm->T && layout_uses_free_typevars(vm->T, env))
+            return 1;
+        if (vm->N && layout_uses_free_typevars(vm->N, env))
+            return 1;
         return 0;
     }
     if (jl_is_unionall(v)) {
@@ -623,7 +623,7 @@ static jl_datatype_t *lookup_type_set(jl_svec_t *cache, jl_value_t **key, size_t
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    jl_datatype_t **tab = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *tab = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t index = h2index(hv, sz);
     size_t orig = index;
     size_t iter = 0;
@@ -646,7 +646,7 @@ static jl_datatype_t *lookup_type_setvalue(jl_svec_t *cache, jl_value_t *key1, j
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    jl_datatype_t **tab = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *tab = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t index = h2index(hv, sz);
     size_t orig = index;
     size_t iter = 0;
@@ -669,7 +669,7 @@ static ssize_t lookup_type_idx_linear(jl_svec_t *cache, jl_value_t **key, size_t
 {
     if (n == 0)
         return -1;
-    jl_datatype_t **data = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *data = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t cl = jl_svec_len(cache);
     ssize_t i;
     for (i = 0; i < cl; i++) {
@@ -686,7 +686,7 @@ static ssize_t lookup_type_idx_linearvalue(jl_svec_t *cache, jl_value_t *key1, j
 {
     if (n == 0)
         return -1;
-    jl_datatype_t **data = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *data = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t cl = jl_svec_len(cache);
     ssize_t i;
     for (i = 0; i < cl; i++) {
@@ -730,9 +730,9 @@ static jl_value_t *lookup_typevalue(jl_typename_t *tn, jl_value_t *key1, jl_valu
     }
 }
 
-static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv)
+static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv, int atomic)
 {
-    jl_datatype_t **tab = (jl_datatype_t**)jl_svec_data(a);
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)jl_svec_data(a);
     size_t sz = jl_svec_len(a);
     if (sz <= 1)
         return 0;
@@ -742,9 +742,12 @@ static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv)
     orig = index;
     size_t maxprobe = max_probe(sz);
     do {
-        jl_value_t *tab_i = (jl_value_t*)tab[index];
+        jl_value_t *tab_i = jl_atomic_load_relaxed(&tab[index]);
         if (tab_i == NULL || tab_i == jl_nothing) {
-            jl_atomic_store_release(&tab[index], val);
+            if (atomic)
+                jl_atomic_store_release(&tab[index], (jl_value_t*)val);
+            else
+                jl_atomic_store_relaxed(&tab[index], (jl_value_t*)val);
             jl_gc_wb(a, val);
             return 1;
         }
@@ -759,10 +762,10 @@ static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz);
 
 static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
 {
-    jl_svec_t *a = val->name->cache;
+    jl_svec_t *a = jl_atomic_load_relaxed(&val->name->cache);
     while (1) {
         JL_GC_PROMISE_ROOTED(a);
-        if (cache_insert_type_set_(a, val, hv))
+        if (cache_insert_type_set_(a, val, hv, 1))
             return;
 
         /* table full */
@@ -785,17 +788,17 @@ static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
 
 static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
 {
-    jl_datatype_t **ol = (jl_datatype_t**)jl_svec_data(a);
+    jl_value_t **ol = jl_svec_data(a);
     size_t sz = jl_svec_len(a);
     while (1) {
         size_t i;
         jl_svec_t *newa = jl_alloc_svec(newsz);
         JL_GC_PUSH1(&newa);
         for (i = 0; i < sz; i += 1) {
-            jl_datatype_t *val = ol[i];
-            if (val != NULL && (jl_value_t*)val != jl_nothing) {
-                uint_t hv = val->hash;
-                if (!cache_insert_type_set_(newa, val, hv)) {
+            jl_value_t *val = ol[i];
+            if (val != NULL && val != jl_nothing) {
+                uint_t hv = ((jl_datatype_t*)val)->hash;
+                if (!cache_insert_type_set_(newa, (jl_datatype_t*)val, hv, 0)) {
                     break;
                 }
             }
@@ -809,7 +812,7 @@ static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
 
 static void cache_insert_type_linear(jl_datatype_t *type, ssize_t insert_at)
 {
-    jl_svec_t *cache = type->name->linearcache;
+    jl_svec_t *cache = jl_atomic_load_relaxed(&type->name->linearcache);
     assert(jl_is_svec(cache));
     size_t n = jl_svec_len(cache);
     if (n == 0 || jl_svecref(cache, n - 1) != NULL) {
@@ -846,7 +849,7 @@ void jl_cache_type_(jl_datatype_t *type)
         cache_insert_type_set(type, hv);
     }
     else {
-        ssize_t idx = lookup_type_idx_linear(type->name->linearcache, key, n);
+        ssize_t idx = lookup_type_idx_linear(jl_atomic_load_relaxed(&type->name->linearcache), key, n);
         assert(idx < 0);
         cache_insert_type_linear(type, ~idx);
     }
@@ -895,19 +898,19 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env);
+                                       jl_typestack_t *stack, jl_typeenv_t *env);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
 static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                     int cacheable, jl_typestack_t *stack, jl_typeenv_t *env, int c)
+                                     jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, cacheable, stack, env);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
-    return inst_datatype_env(ua->body, p, iparams, ntp, cacheable, stack, &e, c + 1);
+    return inst_datatype_env(ua->body, p, iparams, ntp, stack, &e, c + 1);
 }
 
 jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
@@ -923,14 +926,7 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
         jl_value_t *u = jl_unwrap_unionall(tc);
         if (jl_is_datatype(u) && n == jl_nparams((jl_datatype_t*)u) &&
             ((jl_datatype_t*)u)->name->wrapper == tc) {
-            int cacheable = 1;
-            for (i = 0; i < n; i++) {
-                if (jl_has_free_typevars(params[i])) {
-                    cacheable = 0;
-                    break;
-                }
-            }
-            return inst_datatype_env(tc, NULL, params, n, cacheable, NULL, NULL, 0);
+            return inst_datatype_env(tc, NULL, params, n, NULL, NULL, 0);
         }
     }
     JL_GC_PUSH1(&tc);
@@ -966,7 +962,8 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
             }
             // if this is a wrapper, let check_datatype_parameters give the error
             if (!iswrapper)
-                jl_type_error_rt("Type", jl_symbol_name(ua->var->name), (jl_value_t*)ua->var, pi);
+                jl_type_error_rt(jl_is_datatype(inner) ? jl_symbol_name(inner->name->name) : "Type",
+                                 jl_symbol_name(ua->var->name), (jl_value_t*)ua->var, pi);
         }
 
         tc = jl_instantiate_unionall(ua, pi);
@@ -977,20 +974,42 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
 
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1)
 {
-    JL_GC_PUSH1(&p1);
-    jl_value_t *t = jl_apply_type(tc, &p1, 1);
-    JL_GC_POP();
-    return t;
+    return jl_apply_type(tc, &p1, 1);
 }
 
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2)
 {
-    jl_value_t **args;
-    JL_GC_PUSHARGS(args, 2);
-    args[0] = p1; args[1] = p2;
-    jl_value_t *t = jl_apply_type(tc, args, 2);
-    JL_GC_POP();
-    return t;
+    jl_value_t *args[2];
+    args[0] = p1;
+    args[1] = p2;
+    return jl_apply_type(tc, args, 2);
+}
+
+jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
+{
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_pair_type, dt, dt);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
+}
+
+jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
+{
+    jl_value_t *params[2];
+    jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names);
+    if (names == NULL) {
+        params[0] = (jl_value_t*)jl_symbol("old");
+        params[1] = (jl_value_t*)jl_symbol("success");
+        jl_value_t *lnames = jl_f_tuple(NULL, params, 2);
+        if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames))
+            names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames
+    }
+    params[0] = dt;
+    params[1] = (jl_value_t*)jl_bool_type;
+    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
 }
 
 JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
@@ -1177,8 +1196,12 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
                 dt->has_concrete_subtype = 0;
         }
     }
-    if (dt->name == jl_type_typename)
+    if (dt->name == jl_type_typename) {
         cacheable = 0; // the cache for Type ignores parameter normalization, so it can't be used as a regular hash
+        jl_value_t *p = jl_tparam(dt, 0);
+        if (!jl_is_type(p) && !jl_is_typevar(p)) // Type{v} has no subtypes, if v is not a Type
+            dt->has_concrete_subtype = 0;
+    }
     dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable);
     dt->cached_by_hash = cacheable ? (typekey_hash(dt->name, jl_svec_data(dt->parameters), l, 0) != 0) : (dt->hash != 0);
 }
@@ -1197,16 +1220,19 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
     }
     assert(i == np*2);
     wrapper = tn->wrapper;
-    for(i=0; i < np; i++) {
+    for (i = 0; i < np; i++) {
         assert(jl_is_unionall(wrapper));
         jl_tvar_t *tv = ((jl_unionall_t*)wrapper)->var;
         if (!within_typevar(params[i], bounds[2*i], bounds[2*i+1])) {
-            // TODO: pass a new version of `tv` containing the instantiated bounds
+            if (tv->lb != bounds[2*i] || tv->ub != bounds[2*i+1])
+                // pass a new version of `tv` containing the instantiated bounds
+                tv = jl_new_typevar(tv->name, bounds[2*i], bounds[2*i+1]);
+            JL_GC_PUSH1(&tv);
             jl_type_error_rt(jl_symbol_name(tn->name), jl_symbol_name(tv->name), (jl_value_t*)tv, params[i]);
         }
         int j;
-        for(j=2*i+2; j < 2*np; j++) {
-            jl_value_t*bj = bounds[j];
+        for (j = 2*i + 2; j < 2*np; j++) {
+            jl_value_t *bj = bounds[j];
             if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type)
                 bounds[j] = jl_substitute_var(bj, tv, params[i]);
         }
@@ -1309,8 +1335,15 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
             u = (jl_unionall_t*)t;
         }
 
-        if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var))
-            t = jl_instantiate_unionall(u, u->var->ub);
+        if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var)) {
+            JL_TRY {
+                t = jl_instantiate_unionall(u, u->var->ub);
+            }
+            JL_CATCH {
+                // just skip normalization
+                // (may happen for bounds inconsistent with the wrapper's bounds)
+            }
+        }
     }
     JL_GC_POP();
     return t;
@@ -1319,18 +1352,32 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env)
+                                       jl_typestack_t *stack, jl_typeenv_t *env)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
     int istuple = (tn == jl_tuple_typename);
     int isnamedtuple = (tn == jl_namedtuple_typename);
     if (dt->name != jl_type_typename) {
-        for (size_t i = 0; i < ntp; i++)
+        size_t i;
+        for (i = 0; i < ntp; i++)
             iparams[i] = normalize_unionalls(iparams[i]);
     }
 
-    // check type cache
+    // check type cache, if applicable
+    int cacheable = 1;
+    if (istuple) {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (!jl_is_concrete_type(iparams[i]) && iparams[i] != jl_bottom_type)
+                cacheable = 0;
+    }
+    else {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (jl_has_free_typevars(iparams[i]))
+                cacheable = 0;
+    }
     if (cacheable) {
         size_t i;
         for (i = 0; i < ntp; i++) {
@@ -1516,9 +1563,8 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     // leading to incorrect layouts and data races (#40050: the A{T} should be
     // an isbitstype singleton of size 0)
     if (cacheable) {
-        if (dt->layout == NULL && !jl_is_primitivetype(dt) && ndt->types != NULL && ndt->isconcretetype) {
+        if (ndt->layout == NULL && ndt->types != NULL && ndt->isconcretetype)
             jl_compute_field_offsets(ndt);
-        }
         jl_cache_type_(ndt);
         JL_UNLOCK(&typecache_lock); // Might GC
     }
@@ -1529,13 +1575,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
 
 static jl_tupletype_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
 {
-    int cacheable = 1;
-    for (size_t i = 0; i < np; i++) {
-        assert(p[i]);
-        if (!jl_is_concrete_type(p[i]) && p[i] != jl_bottom_type)
-            cacheable = 0;
-    }
-    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, cacheable, NULL, NULL);
+    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL);
 }
 
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params)
@@ -1557,7 +1597,6 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
 {
     jl_tupletype_t *tt = (jl_datatype_t*)lookup_typevalue(jl_tuple_typename, arg1, args, nargs, leaf);
     if (tt == NULL) {
-        int cacheable = 1;
         size_t i;
         jl_svec_t *params = jl_alloc_svec(nargs);
         JL_GC_PUSH1(&params);
@@ -1569,14 +1608,13 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
                 // `jl_typeof(ai)`, but that will require some redesign of the caching
                 // logic.
                 ai = (jl_value_t*)jl_wrap_Type(ai);
-                cacheable = 0;
             }
             else {
                 ai = jl_typeof(ai);
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, cacheable, NULL, NULL);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL);
         JL_GC_POP();
     }
     return tt;
@@ -1644,9 +1682,6 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         iparams = jl_svec_data(ip_heap);
     }
     int bound = 0;
-    int cacheable = 1;
-    if (jl_is_va_tuple(tt))
-        cacheable = 0;
     int i;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
@@ -1655,11 +1690,9 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
-        if (cacheable && !jl_is_concrete_type(pi))
-            cacheable = 0;
     }
     if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
@@ -1746,18 +1779,16 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
     size_t ntp = jl_svec_len(tp);
     jl_value_t **iparams;
     JL_GC_PUSHARGS(iparams, ntp);
-    int cacheable = 1, bound = 0;
+    int bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
         jl_value_t *pi = inst_type_w_(elt, env, stack, check);
         iparams[i] = pi;
         bound |= (pi != elt);
-        if (cacheable && jl_has_free_typevars(pi))
-            cacheable = 0;
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
     if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
@@ -2007,7 +2038,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_methtable_type->types = jl_svec(12, jl_symbol_type, jl_any_type, jl_any_type,
                                        jl_any_type, jl_any_type/*jl_long*/,
                                        jl_any_type, jl_any_type/*module*/,
-                                       jl_any_type/*any vector*/, jl_any_type/*long*/, jl_any_type/*int32*/,
+                                       jl_any_type/*any vector*/, jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
                                        jl_any_type/*uint8*/, jl_any_type/*uint8*/);
     jl_precompute_memoized_dt(jl_methtable_type, 1);
 
@@ -2215,8 +2246,8 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_array_uint8_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint8_type, jl_box_long(1));
     jl_array_int32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_int32_type, jl_box_long(1));
     jl_an_empty_vec_any = (jl_value_t*)jl_alloc_vec_any(0); // used internally
-    jl_nonfunction_mt->leafcache = (jl_array_t*)jl_an_empty_vec_any;
-    jl_type_type_mt->leafcache = (jl_array_t*)jl_an_empty_vec_any;
+    jl_atomic_store_relaxed(&jl_nonfunction_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&jl_type_type_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
 
     jl_expr_type =
         jl_new_datatype(jl_symbol("Expr"), core,
@@ -2326,7 +2357,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "inlineable",
                             "propagate_inbounds",
                             "pure",
-                            "aggressive_constprop"),
+                            "constprop"),
                         jl_svec(19,
                             jl_array_any_type,
                             jl_array_int32_type,
@@ -2346,7 +2377,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_bool_type),
+                            jl_uint8_type),
                         jl_emptysvec,
                         0, 1, 19);
 
@@ -2379,7 +2410,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "isva",
                             "pure",
                             "is_for_opaque_closure",
-                            "aggressive_constprop"),
+                            "constprop"),
                         jl_svec(26,
                             jl_symbol_type,
                             jl_module_type,
@@ -2406,7 +2437,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_bool_type),
+                            jl_uint8_type),
                         jl_emptysvec,
                         0, 1, 10);
 
@@ -2601,13 +2632,8 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
     jl_svecset(jl_methtable_type->types, 6, jl_module_type);
     jl_svecset(jl_methtable_type->types, 7, jl_array_any_type);
-#ifdef __LP64__
-    jl_svecset(jl_methtable_type->types, 8, jl_int64_type); // unsigned long
-    jl_svecset(jl_methtable_type->types, 9, jl_int64_type); // uint32_t plus alignment
-#else
-    jl_svecset(jl_methtable_type->types, 8, jl_int32_type); // DWORD
-    jl_svecset(jl_methtable_type->types, 9, jl_int32_type); // uint32_t
-#endif
+    jl_svecset(jl_methtable_type->types, 8, jl_long_type); // voidpointer
+    jl_svecset(jl_methtable_type->types, 9, jl_long_type); // uint32_t plus alignment
     jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 11, jl_uint8_type);
     jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index a22e714135822..9006fd4c2b380 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -221,13 +221,6 @@
 
 (define (newline? c) (eqv? c #\newline))
 
-(define (skip-to-eol port)
-  (let ((c (peek-char port)))
-    (cond ((eof-object? c)    c)
-          ((eqv? c #\newline) c)
-          (else               (read-char port)
-                              (skip-to-eol port)))))
-
 (define (op-or-sufchar? c) (or (op-suffix-char? c) (opchar? c)))
 
 (define (read-operator port c0 (postfix? #f))
@@ -253,13 +246,13 @@
                                        (and (or (eq? opsym '--) (eq? opsym '.--))
                                             (read-char port)
                                             (or (begin0 (eqv? (peek-char port) #\>)
-                                                        (io.ungetc port #\-))
+                                                        (io.skip port -1)) ; unget -, leaving -
                                                 (error (string "invalid operator \"" newop "\""))))
                                        ;; <- is not an operator but <-- and <--> are
                                        (and (or (eq? opsym '<-) (eq? opsym '.<-))
                                             (read-char port)
                                             (begin0 (eqv? (peek-char port) #\-)
-                                                    (io.ungetc port #\-)))
+                                                    (io.skip port -1))) ; unget -, leaving <
                                        ;; consume suffixes after ', only if parsing a call chain
                                        ;; otherwise 'ᵀ' would parse as (|'| |'ᵀ|)
                                        (and postfix? (eqv? c0 #\') sufchar?))
@@ -278,7 +271,7 @@
                  (if (and (not (eof-object? c)) (pred c))
                      (loop str c)
                      (begin
-                       (io.ungetc port #\_)
+                       (io.skip port -1) ; unget _
                        (list->string (reverse str))))))
         (if (and (not (eof-object? c)) (pred c))
             (begin (read-char port)
@@ -328,7 +321,7 @@
       (if (eqv? (peek-char port) #\.)
           (begin (read-char port)
                  (if (dot-opchar? (peek-char port))
-                     (io.ungetc port #\.)
+                     (io.skip port -1) ; unget .
                      (error (string "invalid numeric constant \""
                                     (get-output-string str) #\. "\""))))))
     (define (read-digs lz _-digit-sep)
@@ -368,7 +361,7 @@
                                           (if (eqv? (peek-char port) #\')
                                               ""
                                               "; add space(s) to clarify")))))
-                     (io.ungetc port #\.))
+                     (io.skip port -1)) ; unget .
                    (begin (write-char #\. str)
                           (read-digs #f #t)
                           (if (eq? pred char-hex?)
@@ -393,7 +386,7 @@
                                   (write-char (read-char port) str))
                               (read-digs #t #f)
                               (disallow-dot))
-                       (io.ungetc port c)))))
+                       (io.skip port -1))))) ; unget c
       (if (and (char? c)
                (or (eq? pred char-bin?) (eq? pred char-oct?)
                    (and (eq? pred char-hex?) (not is-hex-float-literal)))
@@ -495,33 +488,56 @@
        (pair? (cadr t)) (eq? (car (cadr t)) 'core)
        (memq (cadadr t) '(@int128_str @uint128_str @big_str))))
 
+(define (make-bidi-state) '(0 . 0))
+
+(define (update-bidi-state st c)
+  (case c
+    ((#\U202A #\U202B #\U202D #\U202E) (cons (+ (car st) 1) (cdr st))) ;; LRE RLE LRO RLO
+    ((#\U2066 #\U2067 #\U2068)         (cons (car st) (+ (cdr st) 1))) ;; LRI RLI FSI
+    ((#\U202C)                         (cons (- (car st) 1) (cdr st))) ;; PDF
+    ((#\U2069)                         (cons (car st) (- (cdr st) 1))) ;; PDI
+    ((#\newline)                       '(0 . 0))
+    (else st)))
+
+(define (bidi-state-terminated? st) (equal? st '(0 . 0)))
+
+(define (skip-line-comment port)
+  (let ((c (peek-char port)))
+    (cond ((eof-object? c)    c)
+          ((eqv? c #\newline) c)
+          (else               (read-char port)
+                              (skip-line-comment port)))))
+
+(define (skip-multiline-comment port count bds)
+  (let ((c (read-char port)))
+    (if (eof-object? c)
+        (error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
+        (if (eqv? c #\=)
+            (let ((c (peek-char port)))
+              (if (eqv? c #\#)
+                  (begin
+                    (read-char port)
+                    (if (> count 1)
+                        (skip-multiline-comment port (- count 1) bds)
+                        (if (not (bidi-state-terminated? bds))
+                            (error "unbalanced bidirectional formatting in comment"))))
+                  (skip-multiline-comment port count (update-bidi-state bds c))))
+            (if (eqv? c #\#)
+                (skip-multiline-comment port
+                                        (if (eqv? (peek-char port) #\=)
+                                            (begin (read-char port)
+                                                   (+ count 1))
+                                            count)
+                                        bds)
+                (skip-multiline-comment port count (update-bidi-state bds c)))))))
+
 ;; skip to end of comment, starting at #:  either #...<eol> or #= .... =#.
 (define (skip-comment port)
-  (define (skip-multiline-comment port count)
-    (let ((c (read-char port)))
-      (if (eof-object? c)
-          (error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
-          (begin (if (eqv? c #\=)
-                     (let ((c (peek-char port)))
-                       (if (eqv? c #\#)
-                           (begin
-                             (read-char port)
-                             (if (> count 1)
-                                 (skip-multiline-comment port (- count 1))))
-                           (skip-multiline-comment port count)))
-                     (if (eqv? c #\#)
-                         (skip-multiline-comment port
-                                                 (if (eqv? (peek-char port) #\=)
-                                                     (begin (read-char port)
-                                                            (+ count 1))
-                                                     count))
-                         (skip-multiline-comment port count)))))))
-
   (read-char port) ; read # that was already peeked
   (if (eqv? (peek-char port) #\=)
       (begin (read-char port) ; read initial =
-             (skip-multiline-comment port 1))
-      (skip-to-eol port)))
+             (skip-multiline-comment port 1 (make-bidi-state)))
+      (skip-line-comment port)))
 
 (define (skip-ws-and-comments port)
   (skip-ws port #t)
@@ -620,15 +636,6 @@
 (define (space-before-next-token? s)
   (or (skip-ws (ts:port s) #f) (eqv? #\newline (peek-char (ts:port s)))))
 
-;; --- misc ---
-
-; Log a syntax deprecation, attributing it to current-filename and the line
-; number of the stream `s`
-(define (parser-depwarn s what instead)
-  (let ((line (if (number? s) s (input-port-line (if (port? s) s (ts:port s)))))
-        (file current-filename))
-    (frontend-depwarn (format-syntax-deprecation what instead file line #t) file line)))
-
 ;; --- parser ---
 
 ;; parse left-to-right binary operator
@@ -1252,10 +1259,16 @@
                        `(|.| ,ex (inert ($ ,dollarex)))))
                     (else
                      (let ((name (parse-atom s #f)))
-                       (if (and (pair? name) (eq? (car name) 'macrocall))
-                           `(macrocall (|.| ,ex (quote ,(cadr name))) ; move macrocall outside by rewriting A.@B as @A.B
-                                       ,@(cddr name))
-                           `(|.| ,ex (quote ,name))))))))
+                       (cond ((and (pair? name) (eq? (car name) 'macrocall))
+                              `(macrocall (|.| ,ex (quote ,(cadr name))) ; move macrocall outside by rewriting A.@B as @A.B
+                                          ,@(cddr name)))
+                             ((and (pair? name) (eq? (car name) 'do) (eq? (caadr name) 'macrocall))
+                              `(do ,(let ((name (cadr name)))
+                                      `(macrocall (|.| ,ex (quote ,(cadr name))) ; move macrocall outside by rewriting `A.@B() do; end` as `@A.B() do; end`
+                                                  ,@(cddr name)))
+                                   ,(caddr name)))
+                           (else
+                            `(|.| ,ex (quote ,name)))))))))
             ((|'|)
              (if (not (ts:space? s))
                  (begin
@@ -1376,14 +1389,14 @@
             (if (eq? word 'quote)
                 (list 'quote blk)
                 blk))))
-       ((while)  (begin0 (list 'while (parse-cond s) (parse-block s))
+       ((while)  (begin0 (list 'while (parse-cond s) (append (parse-block s) (list (line-number-node s))))
                          (expect-end s word)))
        ((for)
         (let* ((ranges (parse-comma-separated-iters s))
                (body   (parse-block s)))
           (expect-end s word)
           `(for ,(if (length= ranges 1) (car ranges) (cons 'block ranges))
-                ,body)))
+                ,(append body (list (line-number-node s))))))
 
        ((let)
         (let ((binds (if (memv (peek-token s) '(#\newline #\;))
@@ -1509,29 +1522,33 @@
           (let loop ((nxt    (peek-token s))
                      (catchb #f)
                      (catchv #f)
-                     (finalb #f))
+                     (finalb #f)
+                     (elseb #f))
             (take-token s)
             (cond
              ((eq? nxt 'end)
               (list* 'try try-block (or catchv '(false))
                      (or catchb (if finalb '(false) (error "try without catch or finally")))
-                     (if finalb (list finalb) '())))
+                     (cond (elseb  (list (or finalb '(false)) elseb))
+                           (finalb (list finalb))
+                           (else   '()))))
              ((and (eq? nxt 'catch)
                    (not catchb))
               (let ((nl (memv (peek-token s) '(#\newline #\;))))
                 (if (eqv? (peek-token s) #\;)
                     (take-token s))
-                (if (memq (require-token s) '(end finally))
+                (if (memq (require-token s) '(end finally else))
                     (loop (require-token s)
                           '(block)
                           #f
-                          finalb)
+                          finalb
+                          elseb)
                     (let* ((loc (line-number-node s))
                            (var (if nl #f (parse-eq* s)))
                            (var? (and (not nl) (or (symbol? var)
                                                    (and (length= var 2) (eq? (car var) '$))
                                                    (error (string "invalid syntax \"catch " (deparse var) "\"")))))
-                           (catch-block (if (eq? (require-token s) 'finally)
+                           (catch-block (if (memq (require-token s) '(finally else))
                                             `(block ,(line-number-node s))
                                             (parse-block s))))
                       (loop (require-token s)
@@ -1543,16 +1560,30 @@
                                               '()
                                               (cdr catch-block))))
                             (if var? var '(false))
-                            finalb)))))
+                            finalb
+                            elseb)))))
              ((and (eq? nxt 'finally)
                    (not finalb))
-              (let ((fb (if (eq? (require-token s) 'catch)
+              (let ((fb (if (memq (require-token s) '(catch else))
+                            '(block)
+                            (parse-block s))))
+                (loop (require-token s)
+                      catchb
+                      catchv
+                      fb
+                      elseb)))
+             ((and (eq? nxt 'else)
+                   (not elseb))
+              (if (or (not catchb) finalb)
+                  (error "else inside try block needs to be immediately after catch"))
+              (let ((eb (if (eq? (require-token s) 'finally)
                             '(block)
                             (parse-block s))))
                 (loop (require-token s)
                       catchb
                       catchv
-                      fb)))
+                      finalb
+                      eb)))
              (else (expect-end-error nxt 'try))))))
        ((return)          (let ((t (peek-token s)))
                             (if (or (eqv? t #\newline) (closing-token? t))
@@ -2336,24 +2367,28 @@
   (let loop ((c (read-char p))
              (b (open-output-string))
              (e ())
-             (quotes 0))
+             (quotes 0)
+             (bds (make-bidi-state)))
     (cond
       ((eqv? c delim)
        (if (< quotes n)
-           (loop (read-char p) b e (+ quotes 1))
-           (reverse (cons (io.tostring! b) e))))
+           (loop (read-char p) b e (+ quotes 1) bds)
+           (begin
+             (if (not (bidi-state-terminated? bds))
+                 (error "unbalanced bidirectional formatting in string literal"))
+             (reverse (cons (io.tostring! b) e)))))
 
       ((= quotes 1)
        (if (not raw) (write-char #\\ b))
        (write-char delim b)
-       (loop c b e 0))
+       (loop c b e 0 (update-bidi-state bds c)))
 
       ((= quotes 2)
        (if (not raw) (write-char #\\ b))
        (write-char delim b)
        (if (not raw) (write-char #\\ b))
        (write-char delim b)
-       (loop c b e 0))
+       (loop c b e 0 (update-bidi-state bds c)))
 
       ((eqv? c #\\)
        (if raw
@@ -2366,19 +2401,19 @@
                     (io.write b (string.rep "\\" (div count 2)))
                     (if (odd? count)
                         (begin (write-char delim b)
-                               (loop (read-char p) b e 0))
-                        (loop nxch b e 0)))
+                               (loop (read-char p) b e 0 bds))
+                        (loop nxch b e 0 bds)))
                    (else
                     (io.write b (string.rep "\\" count))
                     (write-char nxch b)
-                    (loop (read-char p) b e 0))))
+                    (loop (read-char p) b e 0 (update-bidi-state bds nxch)))))
            (let ((nxch (not-eof-for delim (read-char p))))
              (write-char #\\ b)
              (if (eqv? nxch #\return)
-                 (loop nxch b e 0)
+                 (loop nxch b e 0 bds)
                  (begin
                    (write-char nxch b)
-                   (loop (read-char p) b e 0))))))
+                   (loop (read-char p) b e 0 (update-bidi-state bds nxch)))))))
 
       ((and (eqv? c #\$) (not raw))
        (let* ((ex (parse-interpolate s))
@@ -2388,7 +2423,7 @@
          (loop (read-char p)
                (open-output-string)
                (list* ex (io.tostring! b) e)
-               0)))
+               0 bds)))
 
       ; convert literal \r and \r\n in strings to \n (issue #11988)
       ((eqv? c #\return) ; \r
@@ -2396,11 +2431,11 @@
          (if (eqv? (peek-char p) #\linefeed) ; \r\n
              (read-char p))
          (write-char #\newline b)
-         (loop (read-char p) b e 0)))
+         (loop (read-char p) b e 0 bds)))
 
       (else
        (write-char (not-eof-for delim c) b)
-       (loop (read-char p) b e 0)))))
+       (loop (read-char p) b e 0 (update-bidi-state bds c))))))
 
 (define (not-eof-1 c)
   (if (eof-object? c)
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index f00ea0c9ba6d9..e71cda3f9afee 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -904,7 +904,7 @@
                                x))
                          fields)))
           (attrs (reverse attrs))
-          (defs        (filter (lambda (x) (not (effect-free? x))) defs))
+          (defs        (filter (lambda (x) (not (or (effect-free? x) (eq? (car x) 'string)))) defs))
           (locs        (if (and (pair? fields0) (linenum? (car fields0)))
                            (list (car fields0))
                            '()))
@@ -1332,25 +1332,29 @@
   (let ((tryb   (cadr e))
         (var    (caddr e))
         (catchb (cadddr e)))
-    (cond ((length= e 5)
+    (cond ((and (length> e 4) (not (equal? (caddddr e) '(false))))
            (if (has-unmatched-symbolic-goto? tryb)
                (error "goto from a try/finally block is not permitted"))
-           (let ((finalb (cadddr (cdr e))))
+           (let ((finalb (caddddr e)))
              (expand-forms
               `(tryfinally
-                ,(if (not (equal? catchb '(false)))
-                     `(try ,tryb ,var ,catchb)
-                     `(scope-block ,tryb))
+                ,(if (and (equal? catchb '(false)) (length= e 5))
+                     `(scope-block ,tryb)
+                     `(try ,tryb ,var ,catchb (false) ,@(cdddddr e)))
                 (scope-block ,finalb)))))
-          ((length= e 4)
-           (expand-forms
-            (if (symbol-like? var)
-                `(trycatch (scope-block ,tryb)
-                           (scope-block
-                            (block (= ,var (the_exception))
-                                   ,catchb)))
-                `(trycatch (scope-block ,tryb)
-                           (scope-block ,catchb)))))
+          ((length> e 3)
+           (and (length> e 6) (error "invalid \"try\" form"))
+           (let ((elseb (if (length= e 6) (cdddddr e) '())))
+             (expand-forms
+               `(,(if (null? elseb) 'trycatch 'trycatchelse)
+                 (scope-block ,tryb)
+                 (scope-block
+                   ,(if (symbol-like? var)
+                        `(scope-block
+                          (block (= ,var (the_exception))
+                                 ,catchb))
+                        `(scope-block ,catchb)))
+                 ,@elseb))))
           (else
            (error "invalid \"try\" form")))))
 
@@ -1956,21 +1960,28 @@
                 (else
                  (error (string "invalid " syntax-str " \"" (deparse el) "\""))))))))
 
-(define (expand-if e)
-  (let* ((test (cadr e))
-         (blk? (and (pair? test) (eq? (car test) 'block)))
-         (stmts (if blk? (cdr (butlast test)) '()))
-         (test  (if blk? (last test) test)))
+(define (expand-condition cnd)
+  (let* ((blk? (and (pair? cnd) (eq? (car cnd) 'block)))
+         (stmts (if blk? (cdr (butlast cnd)) '()))
+         (test  (if blk? (last cnd) cnd)))
     (if (and (pair? test) (memq (car test) '(&& |\|\||)))
         (let* ((clauses `(,(car test) ,@(map expand-forms (cdr (flatten-ex (car test) test)))))
                (clauses (if (null? (cdr clauses))
                             (if (eq? (car clauses) '&&) '(true) '(false))
                             clauses)))
-          `(if ,(if blk?
-                    `(block ,@(map expand-forms stmts) ,clauses)
-                    clauses)
-               ,@(map expand-forms (cddr e))))
-        (cons (car e) (map expand-forms (cdr e))))))
+          (if blk?
+              `(block ,@(map expand-forms stmts) ,clauses)
+              clauses))
+        (expand-forms cnd))))
+
+(define (expand-if e)
+  (list* (car e) (expand-condition (cadr e)) (map expand-forms (cddr e))))
+
+(define (expand-while e)
+  `(break-block loop-exit
+                (_while ,(expand-condition (cadr e))
+                        (break-block loop-cont
+                                     (scope-block ,(blockify (expand-forms (caddr e))))))))
 
 (define (expand-vcat e
                      (vcat '((top vcat)))
@@ -2565,13 +2576,7 @@
 
    'if expand-if
    'elseif expand-if
-
-   'while
-   (lambda (e)
-     `(break-block loop-exit
-                   (_while ,(expand-forms (cadr e))
-                           (break-block loop-cont
-                                        (scope-block ,(blockify (expand-forms (caddr e))))))))
+   'while expand-while
 
    'break
    (lambda (e)
@@ -2734,7 +2739,7 @@
   (check-no-return expr)
   (if (has-break-or-continue? expr)
       (error "break or continue outside loop"))
-  (let ((result    (gensy))
+  (let ((result    (make-ssavalue))
         (idx       (gensy))
         (oneresult (make-ssavalue))
         (prod      (make-ssavalue))
@@ -2758,16 +2763,14 @@
     (let ((overall-itr (if (length= itrs 1) (car iv) prod)))
       `(scope-block
         (block
-         (local ,result) (local ,idx)
+         (local ,idx)
          ,.(map (lambda (v r) `(= ,v ,(caddr r))) iv itrs)
          ,.(if (length= itrs 1)
                '()
                `((= ,prod (call (top product) ,@iv))))
          (= ,isz (call (top IteratorSize) ,overall-itr))
          (= ,szunk (call (core isa) ,isz (top SizeUnknown)))
-         (if ,szunk
-             (= ,result (call (curly (core Array) ,ty 1) (core undef) 0))
-             (= ,result (call (top _array_for) ,ty ,overall-itr ,isz)))
+         (= ,result (call (top _array_for) ,ty ,overall-itr ,isz))
          (= ,idx (call (top first) (call (top LinearIndices) ,result)))
          ,(construct-loops (reverse itrs) (reverse iv))
          ,result)))))
@@ -2848,9 +2851,27 @@
   (or (valid-name? e)
       (error (string "invalid identifier name \"" e "\""))))
 
+(define (push-var! tab var val) (put! tab var (cons val (get tab var #f))))
+(define (pop-var! tab var) (put! tab var (cdr (get tab var))))
+
 (define (make-scope (lam #f) (args '()) (locals '()) (globals '()) (sp '()) (renames '()) (prev #f)
                     (soft? #f) (hard? #f) (implicit-globals '()) (warn-vars #f))
-  (vector lam args locals globals sp renames prev soft? hard? implicit-globals warn-vars))
+  (let ((tab (if prev (scope:table prev) (table))))
+    (for-each (lambda (v) (push-var! tab v v)) sp)
+    (for-each (lambda (v) (push-var! tab v v)) locals)
+    (for-each (lambda (pair) (push-var! tab (car pair) (cdr pair))) renames)
+    (for-each (lambda (v) (push-var! tab v `(outerref ,v))) globals)
+    (for-each (lambda (v) (push-var! tab v v)) args)
+    (vector lam args locals globals sp renames prev soft? hard? implicit-globals warn-vars tab)))
+
+(define (pop-scope! scope)
+  (let ((tab (scope:table scope)))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:sp scope))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:locals scope))
+    (for-each (lambda (pair) (pop-var! tab (car pair))) (scope:renames scope))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:globals scope))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:args scope))))
+
 (define (scope:lam s)     (aref s 0))
 (define (scope:args s)    (aref s 1))
 (define (scope:locals s)  (aref s 2))
@@ -2862,6 +2883,7 @@
 (define (scope:hard? s)   (aref s 8))
 (define (scope:implicit-globals s) (aref s 9))
 (define (scope:warn-vars s) (aref s 10))
+(define (scope:table s)   (aref s 11))
 
 (define (var-kind var scope (exclude-top-level-globals #f))
   (if scope
@@ -2899,20 +2921,10 @@
 ;; returns lambdas in the form (lambda (args...) (locals...) body)
 (define (resolve-scopes- e scope (sp '()) (loc #f))
   (cond ((symbol? e)
-         (let lookup ((scope scope))
-           (if scope
-               (cond ((memq e (scope:args scope)) e)
-                     ((memq e (scope:globals scope)) `(outerref ,e))
-                     (else
-                      (let ((r (assq e (scope:renames scope))))
-                        (cond (r (cdr r))
-                              ((memq e (scope:locals scope)) e)
-                              ((memq e (scope:sp scope)) e)
-                              (else
-                               (lookup (scope:prev scope)))))))
-               (if (underscore-symbol? e)
-                   e
-                   `(outerref ,e)))))
+         (let ((val (and scope (get (scope:table scope) e #f))))
+           (cond (val (car val))
+                 ((underscore-symbol? e) e)
+                 (else `(outerref ,e)))))
         ((or (not (pair? e)) (quoted? e) (memq (car e) '(toplevel symbolicgoto symboliclabel toplevel-only)))
          e)
         ((eq? (car e) 'global)
@@ -2950,7 +2962,9 @@
              '(true)))
         ((eq? (car e) 'lambda)
          (let* ((args (lam:argnames e))
-                (body (resolve-scopes- (lam:body e) (make-scope e args '() '() sp '() scope))))
+                (new-scope (make-scope e args '() '() sp '() scope))
+                (body (resolve-scopes- (lam:body e) new-scope)))
+           (pop-scope! new-scope)
            `(lambda ,(cadr e) ,(caddr e) ,body)))
         ((eq? (car e) 'scope-block)
          (let* ((blok            (cadr e)) ;; body of scope-block expression
@@ -3031,8 +3045,7 @@
                          (append (caddr lam) newnames newnames-def)))
            (insert-after-meta ;; return the new, expanded scope-block
             (blockify
-             (resolve-scopes- blok
-                              (make-scope lam
+             (let ((new-scope (make-scope lam
                                           '()
                                           (append locals-nondef locals-def)
                                           globals
@@ -3045,9 +3058,10 @@
                                           (if toplevel?
                                               implicit-globals
                                               (scope:implicit-globals scope))
-                                          warn-vars)
-                              '()
-                              loc))
+                                          warn-vars)))
+               (begin0
+                (resolve-scopes- blok new-scope '() loc)
+                (pop-scope! new-scope))))
             (append! (map (lambda (v) `(local ,v)) newnames)
                      (map (lambda (v) `(local-def ,v)) newnames-def)))
            ))
@@ -3117,14 +3131,20 @@
 (define (free-vars e)
   (table.keys (free-vars- e (table))))
 
-(define (analyze-vars-lambda e env captvars sp new-sp (methsig #f))
+(define (vinfo-to-table vi)
+  (let ((tab (table)))
+    (for-each (lambda (v) (put! tab (car v) v))
+              vi)
+    tab))
+
+(define (analyze-vars-lambda e env captvars sp new-sp methsig tab)
   (let* ((args (lam:args e))
          (locl (caddr e))
          (allv (nconc (map arg-name args) locl))
          (fv   (let* ((fv (diff (free-vars (lam:body e)) allv))
                       ;; add variables referenced in declared types for free vars
                       (dv (apply nconc (map (lambda (v)
-                                              (let ((vi (var-info-for v env)))
+                                              (let ((vi (get tab v #f)))
                                                 (if vi (free-vars (vinfo:type vi)) '())))
                                             fv))))
                  (append (diff dv fv) fv)))
@@ -3144,15 +3164,17 @@
                                                  (not (memq (vinfo:name v) new-sp))
                                                  (not (memq (vinfo:name v) glo))))
                                 env)
-                        (map make-var-info capt-sp))))
-    (analyze-vars (lam:body e)
-                  (append vi
+                        (map make-var-info capt-sp)))
+         (new-env (append vi
                           ;; new environment: add our vars
                           (filter (lambda (v)
                                     (and (not (memq (vinfo:name v) allv))
                                          (not (memq (vinfo:name v) glo))))
-                                  env))
-                  cv (delete-duplicates (append new-sp sp)))
+                                  env))))
+    (analyze-vars (lam:body e)
+                  new-env
+                  cv (delete-duplicates (append new-sp sp))
+                  (vinfo-to-table new-env))
     ;; mark all the vars we capture as captured
     (for-each (lambda (v) (vinfo:set-capt! v #t))
               cv)
@@ -3167,36 +3189,36 @@
 ;; in-place to
 ;;   (var-info-lst captured-var-infos ssavalues static_params)
 ;; where var-info-lst is a list of var-info records
-(define (analyze-vars e env captvars sp)
+(define (analyze-vars e env captvars sp tab)
   (if (or (atom? e) (quoted? e))
       (begin
         (if (symbol? e)
-            (let ((vi (var-info-for e env)))
+            (let ((vi (get tab e #f)))
               (if vi
                   (vinfo:set-read! vi #t))))
         e)
       (case (car e)
         ((local-def) ;; a local that we know has an assignment that dominates all usages
-         (let ((vi (var-info-for (cadr e) env)))
+         (let ((vi (get tab (cadr e) #f)))
               (vinfo:set-never-undef! vi #t)))
         ((=)
-         (let ((vi (and (symbol? (cadr e)) (var-info-for (cadr e) env))))
+         (let ((vi (and (symbol? (cadr e)) (get tab (cadr e) #f))))
            (if vi ; if local or captured
                (begin (if (vinfo:asgn vi)
                           (vinfo:set-sa! vi #f)
                           (vinfo:set-sa! vi #t))
                       (vinfo:set-asgn! vi #t))))
-         (analyze-vars (caddr e) env captvars sp))
+         (analyze-vars (caddr e) env captvars sp tab))
         ((call)
-         (let ((vi (var-info-for (cadr e) env)))
+         (let ((vi (get tab (cadr e) #f)))
            (if vi
                (vinfo:set-called! vi #t))
-           (for-each (lambda (x) (analyze-vars x env captvars sp))
+           (for-each (lambda (x) (analyze-vars x env captvars sp tab))
                      (cdr e))))
         ((decl)
          ;; handle var::T declaration by storing the type in the var-info
          ;; record. for non-symbols or globals, emit a type assertion.
-         (let ((vi (var-info-for (cadr e) env)))
+         (let ((vi (get tab (cadr e) #f)))
            (if vi
                (begin (if (not (equal? (vinfo:type vi) '(core Any)))
                           (error (string "multiple type declarations for \""
@@ -3206,31 +3228,31 @@
                                          "\" declared in inner scope")))
                       (vinfo:set-type! vi (caddr e))))))
         ((lambda)
-         (analyze-vars-lambda e env captvars sp '()))
+         (analyze-vars-lambda e env captvars sp '() #f tab))
         ((with-static-parameters)
          ;; (with-static-parameters func_expr sp_1 sp_2 ...)
          (assert (eq? (car (cadr e)) 'lambda))
          (analyze-vars-lambda (cadr e) env captvars sp
-                              (cddr e)))
+                              (cddr e) #f tab))
         ((method)
          (if (length= e 2)
-             (let ((vi (var-info-for (method-expr-name e) env)))
+             (let ((vi (get tab (method-expr-name e) #f)))
                (if vi
                    (begin (if (vinfo:asgn vi)
                               (vinfo:set-sa! vi #f)
                               (vinfo:set-sa! vi #t))
                           (vinfo:set-asgn! vi #t)))
                e)
-             (begin (analyze-vars (caddr e) env captvars sp)
+             (begin (analyze-vars (caddr e) env captvars sp tab)
                     (assert (eq? (car (cadddr e)) 'lambda))
                     (analyze-vars-lambda (cadddr e) env captvars sp
                                          (method-expr-static-parameters e)
-                                         (caddr e)))))
+                                         (caddr e) tab))))
         ((module toplevel) e)
-        (else (for-each (lambda (x) (analyze-vars x env captvars sp))
+        (else (for-each (lambda (x) (analyze-vars x env captvars sp tab))
                         (cdr e))))))
 
-(define (analyze-variables! e) (analyze-vars e '() '() '()) e)
+(define (analyze-variables! e) (analyze-vars e '() '() '() (table)) e)
 
 ;; pass 4: closure conversion
 
@@ -3498,7 +3520,7 @@ f(x) = yt(x)
          thunk with-static-parameters toplevel-only
          global globalref outerref const-if-global thismodule
          const atomic null true false ssavalue isdefined toplevel module lambda
-         error gc_preserve_begin gc_preserve_end import using export)))
+         error gc_preserve_begin gc_preserve_end import using export inline noinline)))
 
 (define (local-in? s lam)
   (or (assq s (car  (lam:vinfo lam)))
@@ -3588,7 +3610,7 @@ f(x) = yt(x)
             ((eq? (car e) 'symboliclabel)
              (kill)
              #t)
-            ((memq (car e) '(if elseif trycatch tryfinally))
+            ((memq (car e) '(if elseif trycatch tryfinally trycatchelse))
              (let ((prev (table.clone live)))
                (if (eager-any (lambda (e) (begin0 (visit e)
                                                   (kill)))
@@ -3654,7 +3676,7 @@ f(x) = yt(x)
         (and cv (vinfo:asgn cv) (vinfo:capt cv)))))
 
 (define (toplevel-preserving? e)
-  (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally))))
+  (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
 
 (define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq)
   (if toplevel
@@ -4207,6 +4229,29 @@ f(x) = yt(x)
               (emit `(= ,tmp ,cnd))
               tmp)
             cnd)))
+    (define (emit-cond cnd break-labels endl)
+      (let* ((cnd (if (and (pair? cnd) (eq? (car cnd) 'block))
+                       (begin (if (length> cnd 2) (compile (butlast cnd) break-labels #f #f))
+                              (last cnd))
+                       cnd))
+             (or? (and (pair? cnd) (eq? (car cnd) '|\|\||)))
+             (tests (if or?
+                        (let ((short-circuit `(goto _)))
+                          (for-each
+                            (lambda (clause)
+                              (let ((jmp (emit `(gotoifnot ,(compile-cond clause break-labels) ,endl))))
+                                (emit short-circuit)
+                                (set-car! (cddr jmp) (make&mark-label))))
+                            (butlast (cdr cnd)))
+                          (let ((last-jmp (emit `(gotoifnot ,(compile-cond (last (cdr cnd)) break-labels) ,endl))))
+                            (set-car! (cdr short-circuit) (make&mark-label))
+                            (list last-jmp)))
+                        (map (lambda (clause)
+                               (emit `(gotoifnot ,(compile-cond clause break-labels) ,endl)))
+                             (if (and (pair? cnd) (eq? (car cnd) '&&))
+                                 (cdr cnd)
+                                 (list cnd))))))
+          tests))
     (define (emit-assignment lhs rhs)
       (if rhs
           (if (valid-ir-rvalue? lhs rhs)
@@ -4347,28 +4392,7 @@ f(x) = yt(x)
                  (compile (cadr e) break-labels value tail)
                  #f))
             ((if elseif)
-             (let* ((cnd (cadr e))
-                    (cnd (if (and (pair? cnd) (eq? (car cnd) 'block))
-                              (begin (if (length> cnd 2) (compile (butlast cnd) break-labels #f #f))
-                                     (last cnd))
-                              cnd))
-                    (or? (and (pair? cnd) (eq? (car cnd) '|\|\||)))
-                    (tests (if or?
-                               (let ((short-circuit `(goto _)))
-                                 (for-each
-                                   (lambda (clause)
-                                     (let ((jmp (emit `(gotoifnot ,(compile-cond clause break-labels) _))))
-                                       (emit short-circuit)
-                                       (set-car! (cddr jmp) (make&mark-label))))
-                                   (butlast (cdr cnd)))
-                                 (let ((last-jmp (emit `(gotoifnot ,(compile-cond (last (cdr cnd)) break-labels) _))))
-                                   (set-car! (cdr short-circuit) (make&mark-label))
-                                   (list last-jmp)))
-                               (map (lambda (clause)
-                                      (emit `(gotoifnot ,(compile-cond clause break-labels) _)))
-                                    (if (and (pair? cnd) (eq? (car cnd) '&&))
-                                        (cdr cnd)
-                                        (list cnd)))))
+             (let* ((tests (emit-cond (cadr e) break-labels '_))
                     (end-jump `(goto _))
                     (val (if (and value (not tail)) (new-mutable-var) #f)))
                (let ((v1 (compile (caddr e) break-labels value tail)))
@@ -4390,9 +4414,8 @@ f(x) = yt(x)
                    val))))
             ((_while)
              (let* ((endl (make-label))
-                    (topl (make&mark-label))
-                    (test (compile-cond (cadr e) break-labels)))
-               (emit `(gotoifnot ,test ,endl))
+                    (topl (make&mark-label)))
+               (emit-cond (cadr e) break-labels endl)
                (compile (caddr e) break-labels #f #f)
                (emit `(goto ,topl))
                (mark-label endl))
@@ -4446,9 +4469,10 @@ f(x) = yt(x)
             ;; (= tok (enter L)) - push handler with catch block at label L, yielding token
             ;; (leave n) - pop N exception handlers
             ;; (pop_exception tok) - pop exception stack back to state of associated enter
-            ((trycatch tryfinally)
+            ((trycatch tryfinally trycatchelse)
              (let ((handler-token (make-ssavalue))
                    (catch (make-label))
+                   (els   (and (eq? (car e) 'trycatchelse) (make-label)))
                    (endl  (make-label))
                    (last-finally-handler finally-handler)
                    (finally           (if (eq? (car e) 'tryfinally) (new-mutable-var) #f))
@@ -4465,11 +4489,20 @@ f(x) = yt(x)
                  ;; handler block postfix
                  (if (and val v1) (emit-assignment val v1))
                  (if tail
-                     (begin (if v1 (emit-return v1))
+                     (begin (if els
+                                (begin (if (and (not val) v1) (emit v1))
+                                       (emit '(leave 1)))
+                                (if v1 (emit-return v1)))
                             (if (not finally) (set! endl #f)))
                      (begin (emit '(leave 1))
-                            (emit `(goto ,endl))))
+                            (emit `(goto ,(or els endl)))))
                  (set! handler-level (- handler-level 1))
+                 ;; emit else block
+                 (if els
+                     (begin (mark-label els)
+                            (let ((v3 (compile (cadddr e) break-labels value tail))) ;; emit else block code
+                              (if val (emit-assignment val v3)))
+                            (emit `(goto ,endl))))
                  ;; emit either catch or finally block
                  (mark-label catch)
                  (emit `(leave 1))
@@ -4592,7 +4625,7 @@ f(x) = yt(x)
                (cons (car e) args)))
 
             ;; metadata expressions
-            ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope)
+            ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline)
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (cond ((eq? (car e) 'line)
                       (set! current-loc e)
@@ -4737,7 +4770,7 @@ f(x) = yt(x)
           (begin (set! linetable (cons (make-lineinfo name file line) linetable))
                  (set! current-loc 1)))
       (if (or reachable
-              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope))))
+              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope inline noinline))))
           (begin (set! code (cons e code))
                  (set! i (+ i 1))
                  (set! locs (cons current-loc locs)))))
diff --git a/src/julia.expmap b/src/julia.expmap
index 5f03eccbfcad6..558dfec6bd260 100644
--- a/src/julia.expmap
+++ b/src/julia.expmap
@@ -6,15 +6,10 @@
     __stack_chk_guard;
     asprintf;
     bitvector_*;
-    ev_break;
-    get_exename;
-    getlocalip;
-    int32hash;
-    int64hash;
-    int64to32hash;
     ios_*;
-    iswprint;
+    small_arraylist_grow;
     jl_*;
+    ijl_*;
     rec_backtrace;
     julia_*;
     libsupport_init;
@@ -33,9 +28,11 @@
     utf8proc_*;
     jlbacktrace;
     jlbacktracet;
-    julia_type_to_llvm;
     _IO_stdin_used;
     __ZN4llvm23createLowerSimdLoopPassEv;
+    _Z24jl_coverage_data_pointerN4llvm9StringRefEi;
+    _Z22jl_coverage_alloc_lineN4llvm9StringRefEi;
+    _Z22jl_malloc_data_pointerN4llvm9StringRefEi;
     LLVMExtra*;
 
     /* freebsd */
diff --git a/src/julia.h b/src/julia.h
index 3455817cf1a92..26a6117822bb3 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -3,6 +3,12 @@
 #ifndef JULIA_H
 #define JULIA_H
 
+#ifdef LIBRARY_EXPORTS
+#include "jl_internal_funcs.inc"
+#undef jl_setjmp
+#undef jl_longjmp
+#endif
+
 //** Configuration options that affect the Julia ABI **//
 // if this is not defined, only individual dimension sizes are
 // stored and not total length, to save space.
@@ -57,15 +63,6 @@
 #  define JL_USED_FUNC __attribute__((used))
 #  define JL_SECTION(name) __attribute__((section(name)))
 #  define JL_THREAD_LOCAL __thread
-#elif defined(_COMPILER_MICROSOFT_)
-#  define JL_NORETURN __declspec(noreturn)
-// This is the closest I can find for __attribute__((const))
-#  define JL_CONST_FUNC __declspec(noalias)
-// Does MSVC have this?
-#  define JL_USED_FUNC
-// TODO: Figure out what to do on MSVC
-#  define JL_SECTION(x)
-#  define JL_THREAD_LOCAL __declspec(threaD)
 #else
 #  define JL_NORETURN
 #  define JL_CONST_FUNC
@@ -73,31 +70,12 @@
 #  define JL_THREAD_LOCAL
 #endif
 
-#if defined(__has_feature) // Clang flavor
-#if __has_feature(address_sanitizer)
-#define JL_ASAN_ENABLED
-#endif
-#if __has_feature(memory_sanitizer)
-#define JL_MSAN_ENABLED
-#endif
-#if __has_feature(thread_sanitizer)
-#if __clang_major__ < 11
-#error Thread sanitizer runtime libraries in clang < 11 leak memory and cannot be used
-#endif
-#define JL_TSAN_ENABLED
-#endif
-#else // GCC flavor
-#if defined(__SANITIZE_ADDRESS__)
-#define JL_ASAN_ENABLED
-#endif
-#endif // __has_feature
-
 #define container_of(ptr, type, member) \
     ((type *) ((char *)(ptr) - offsetof(type, member)))
 
 typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
 
-#include "atomics.h"
+#include "julia_atomics.h"
 #include "julia_threads.h"
 #include "julia_assert.h"
 
@@ -127,7 +105,7 @@ JL_EXTENSION struct _jl_taggedvalue_t {
     // jl_value_t value;
 };
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 JL_DLLEXPORT jl_taggedvalue_t *_jl_astaggedvalue(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 #define jl_astaggedvalue(v) _jl_astaggedvalue((jl_value_t*)(v))
 jl_value_t *_jl_valueof(jl_taggedvalue_t *tv JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
@@ -146,7 +124,7 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 {
     // Do not call this on a value that is already initialized.
     jl_taggedvalue_t *tag = jl_astaggedvalue(v);
-    jl_atomic_store_relaxed(&tag->type, (jl_value_t*)t);
+    jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)&tag->type, (jl_value_t*)t);
 }
 #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t))
 
@@ -154,8 +132,8 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 // The string data is nul-terminated and hangs off the end of the struct.
 typedef struct _jl_sym_t {
     JL_DATA_TYPE
-    struct _jl_sym_t *left;
-    struct _jl_sym_t *right;
+    _Atomic(struct _jl_sym_t*) left;
+    _Atomic(struct _jl_sym_t*) right;
     uintptr_t hash;    // precomputed hash value
     // JL_ATTRIBUTE_ALIGN_PTRSIZE(char name[]);
 } jl_sym_t;
@@ -238,23 +216,19 @@ typedef jl_call_t *jl_callptr_t;
 
 // "speccall" calling convention signatures.
 // This describes some of the special ABI used by compiled julia functions.
-JL_DLLEXPORT extern jl_call_t jl_fptr_args;
+extern jl_call_t jl_fptr_args;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_args_addr;
 typedef jl_value_t *(*jl_fptr_args_t)(jl_value_t*, jl_value_t**, uint32_t);
 
-JL_DLLEXPORT extern jl_call_t jl_fptr_const_return;
+extern jl_call_t jl_fptr_const_return;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_const_return_addr;
 
-JL_DLLEXPORT extern jl_call_t jl_fptr_sparam;
+extern jl_call_t jl_fptr_sparam;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_sparam_addr;
 typedef jl_value_t *(*jl_fptr_sparam_t)(jl_value_t*, jl_value_t**, uint32_t, jl_svec_t*);
 
-JL_DLLEXPORT extern jl_call_t jl_fptr_interpret_call;
-
-JL_EXTENSION typedef union {
-    void* fptr;
-    jl_fptr_args_t fptr1;
-    // 2 constant
-    jl_fptr_sparam_t fptr3;
-    // 4 interpreter
-} jl_generic_specptr_t;
+extern jl_call_t jl_fptr_interpret_call;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_interpret_call_addr;
 
 typedef struct _jl_method_instance_t jl_method_instance_t;
 
@@ -274,7 +248,8 @@ typedef struct _jl_code_info_t {
     jl_value_t *ssavaluetypes; // types of ssa values (or count of them)
     jl_array_t *ssaflags; // flags associated with each statement:
         // 0 = inbounds
-        // 1,2 = <reserved> inlinehint,always-inline,noinline
+        // 1 = inline
+        // 2 = noinline
         // 3 = <reserved> strict-ieee (strictfp)
         // 4 = effect-free (may be deleted if unused)
         // 5-6 = <unused>
@@ -296,7 +271,8 @@ typedef struct _jl_code_info_t {
     uint8_t inlineable;
     uint8_t propagate_inbounds;
     uint8_t pure;
-    uint8_t aggressive_constprop;
+    // uint8 settings
+    uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none
 } jl_code_info_t;
 
 // This type describes a single method definition, and stores data
@@ -314,13 +290,13 @@ typedef struct _jl_method_t {
     jl_value_t *sig;
 
     // table of all jl_method_instance_t specializations we have
-    jl_svec_t *specializations; // allocated as [hashable, ..., NULL, linear, ....]
-    jl_array_t *speckeyset; // index lookup by hash into specializations
+    _Atomic(jl_svec_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....]
+    _Atomic(jl_array_t*) speckeyset; // index lookup by hash into specializations
 
     jl_value_t *slot_syms; // compacted list of slot names (String)
     jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
     jl_value_t *source;  // original code template (jl_code_info_t, but may be compressed), null for builtins
-    struct _jl_method_instance_t *unspecialized;  // unspecialized executable method instance, or null
+    _Atomic(struct _jl_method_instance_t*) unspecialized;  // unspecialized executable method instance, or null
     jl_value_t *generator;  // executable code-generating function if available
     jl_array_t *roots;  // pointers in generated code (shared to reduce memory), or null
     jl_svec_t *ccallable; // svec(rettype, sig) if a ccallable entry point is requested for this
@@ -328,7 +304,7 @@ typedef struct _jl_method_t {
     // cache of specializations of this method for invoke(), i.e.
     // cases where this method was called even though it was not necessarily
     // the most specific for the argument types.
-    jl_typemap_t *invokes;
+    _Atomic(jl_typemap_t*) invokes;
 
     // A function that compares two specializations of this method, returning
     // `true` if the first signature is to be considered "smaller" than the
@@ -344,7 +320,8 @@ typedef struct _jl_method_t {
     uint8_t isva;
     uint8_t pure;
     uint8_t is_for_opaque_closure;
-    uint8_t aggressive_constprop;
+    // uint8 settings
+    uint8_t constprop;     // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
 
 // hidden fields:
     // lock for modifications to the method
@@ -366,7 +343,7 @@ struct _jl_method_instance_t {
     jl_value_t *uninferred; // cached uncompressed code, for generated functions, top-level thunks, or the interpreter
     jl_array_t *backedges; // list of method-instances which contain a call into this method-instance
     jl_array_t *callbacks; // list of callback functions to inform external caches about invalidations
-    struct _jl_code_instance_t *cache;
+    _Atomic(struct _jl_code_instance_t*) cache;
     uint8_t inInference; // flags to tell if inference is running on this object
 };
 
@@ -385,7 +362,7 @@ typedef struct jl_opaque_closure_t {
 typedef struct _jl_code_instance_t {
     JL_DATA_TYPE
     jl_method_instance_t *def; // method this is specialized from
-    struct _jl_code_instance_t *next; // pointer to the next cache entry
+    _Atomic(struct _jl_code_instance_t*) next; // pointer to the next cache entry
 
     // world range for which this object is valid to use
     size_t min_world;
@@ -400,9 +377,15 @@ typedef struct _jl_code_instance_t {
 
     // compilation state cache
     uint8_t isspecsig; // if specptr is a specialized function signature for specTypes->rettype
-    uint8_t precompile;  // if set, this will be added to the output system image
-    jl_callptr_t invoke; // jlcall entry point
-    jl_generic_specptr_t specptr; // private data for `jlcall entry point`
+    _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
+    _Atomic(jl_callptr_t) invoke; // jlcall entry point
+    union _jl_generic_specptr_t {
+        _Atomic(void*) fptr;
+        _Atomic(jl_fptr_args_t) fptr1;
+        // 2 constant
+        _Atomic(jl_fptr_sparam_t) fptr3;
+        // 4 interpreter
+    } specptr; // private data for `jlcall entry point
 } jl_code_instance_t;
 
 // all values are callable as Functions
@@ -436,8 +419,8 @@ typedef struct {
     // `wrapper` is either the only instantiation of the type (if no parameters)
     // or a UnionAll accepting parameters to make an instantiation.
     jl_value_t *wrapper;
-    jl_svec_t *cache;        // sorted array
-    jl_svec_t *linearcache;  // unsorted array
+    _Atomic(jl_svec_t*) cache;        // sorted array
+    _Atomic(jl_svec_t*) linearcache;  // unsorted array
     struct _jl_methtable_t *mt;
     jl_array_t *partial;     // incomplete instantiations of this type
     intptr_t hash;
@@ -526,10 +509,10 @@ typedef struct {
 typedef struct {
     // not first-class
     jl_sym_t *name;
-    jl_value_t *value;
-    jl_value_t *globalref;  // cached GlobalRef for this binding
-    struct _jl_module_t *owner;  // for individual imported bindings
-    uint8_t constp;
+    _Atomic(jl_value_t*) value;
+    _Atomic(jl_value_t*) globalref;  // cached GlobalRef for this binding
+    struct _jl_module_t* owner;  // for individual imported bindings -- TODO: make _Atomic
+    uint8_t constp:1;
     uint8_t exportp:1;
     uint8_t imported:1;
     uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
@@ -550,7 +533,7 @@ typedef struct _jl_module_t {
     uint64_t build_id;
     jl_uuid_t uuid;
     size_t primary_world;
-    uint32_t counter;
+    _Atomic(uint32_t) counter;
     int32_t nospecialize;  // global bit flags: initialization for new methods
     int8_t optlevel;
     int8_t compile;
@@ -562,7 +545,7 @@ typedef struct _jl_module_t {
 // one Type-to-Value entry
 typedef struct _jl_typemap_entry_t {
     JL_DATA_TYPE
-    struct _jl_typemap_entry_t *next; // invasive linked list
+    _Atomic(struct _jl_typemap_entry_t*) next; // invasive linked list
     jl_tupletype_t *sig; // the type signature for this entry
     jl_tupletype_t *simplesig; // a simple signature for fast rejection
     jl_svec_t *guardsigs;
@@ -587,23 +570,23 @@ typedef struct _jl_typemap_level_t {
     // next split may be on Type{T} as LeafTypes then TypeName's parents up to Any
     // next split may be on LeafType
     // next split may be on TypeName
-    jl_array_t *arg1; // contains LeafType
-    jl_array_t *targ; // contains Type{LeafType}
-    jl_array_t *name1; // contains non-abstract TypeName, for parents up to (excluding) Any
-    jl_array_t *tname; // contains a dict of Type{TypeName}, for parents up to Any
+    _Atomic(jl_array_t*) arg1; // contains LeafType
+    _Atomic(jl_array_t*) targ; // contains Type{LeafType}
+    _Atomic(jl_array_t*) name1; // contains non-abstract TypeName, for parents up to (excluding) Any
+    _Atomic(jl_array_t*) tname; // contains a dict of Type{TypeName}, for parents up to Any
     // next a linear list of things too complicated at this level for analysis (no more levels)
-    jl_typemap_entry_t *linear;
+    _Atomic(jl_typemap_entry_t*) linear;
     // finally, start a new level if the type at offs is Any
-    jl_typemap_t *any;
+    _Atomic(jl_typemap_t*) any;
 } jl_typemap_level_t;
 
 // contains the TypeMap for one Type
 typedef struct _jl_methtable_t {
     JL_DATA_TYPE
     jl_sym_t *name; // sometimes a hack used by serialization to handle kwsorter
-    jl_typemap_t *defs;
-    jl_array_t *leafcache;
-    jl_typemap_t *cache;
+    _Atomic(jl_typemap_t*) defs;
+    _Atomic(jl_array_t*) leafcache;
+    _Atomic(jl_typemap_t*) cache;
     intptr_t max_args;  // max # of non-vararg arguments in a signature
     jl_value_t *kwsorter;  // keyword argument sorter function
     jl_module_t *module; // used for incremental serialization to locate original binding
@@ -686,7 +669,7 @@ extern JL_DLLIMPORT jl_datatype_t *jl_initerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_stackovf_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_memory_exception JL_GLOBALLY_ROOTED;
@@ -726,6 +709,7 @@ extern JL_DLLIMPORT jl_typename_t *jl_llvmpointer_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_namedtuple_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_namedtuple_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_task_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_pair_type JL_GLOBALLY_ROOTED;
 
 extern JL_DLLIMPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
@@ -776,7 +760,7 @@ struct _jl_gcframe_t {
 #define JL_GC_ENCODE_PUSHARGS(n)   (((size_t)(n))<<2)
 #define JL_GC_ENCODE_PUSH(n)       ((((size_t)(n))<<2)|1)
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 // When running with the analyzer make these real function calls, that are
 // easier to detect in the analyzer
@@ -907,7 +891,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
 #define jl_svec_set_len_unsafe(t,n) (((jl_svec_t*)(t))->length=(n))
 #define jl_svec_data(t) ((jl_value_t**)((char*)(t) + sizeof(jl_svec_t)))
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 STATIC_INLINE jl_value_t *jl_svecset(
     void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT,
@@ -919,7 +903,7 @@ STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NO
     assert(i < jl_svec_len(t));
     // while svec is supposedly immutable, in practice we sometimes publish it first
     // and set the values lazily
-    return jl_atomic_load_relaxed(jl_svec_data(t) + i);
+    return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)jl_svec_data(t) + i);
 }
 STATIC_INLINE jl_value_t *jl_svecset(
     void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT,
@@ -951,7 +935,7 @@ JL_DLLEXPORT size_t jl_array_len_(jl_array_t *a);
 
 JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT;
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 jl_value_t **jl_array_ptr_data(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 STATIC_INLINE jl_value_t *jl_array_ptr_set(
@@ -963,7 +947,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i)
 {
     assert(((jl_array_t*)a)->flags.ptrarray);
     assert(i < jl_array_len(a));
-    return jl_atomic_load_relaxed(((jl_value_t**)(jl_array_data(a))) + i);
+    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i);
 }
 STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
@@ -971,7 +955,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
 {
     assert(((jl_array_t*)a)->flags.ptrarray);
     assert(i < jl_array_len(a));
-    jl_atomic_store_relaxed(((jl_value_t**)(jl_array_data(a))) + i, (jl_value_t*)x);
+    jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i, (jl_value_t*)x);
     if (x) {
         if (((jl_array_t*)a)->flags.how == 3) {
             a = jl_array_data_owner(a);
@@ -1381,6 +1365,8 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p
 JL_DLLEXPORT jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n);
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
+JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
+JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
@@ -1403,7 +1389,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *src);
 JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb);
 JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb);
 JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettype, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...);
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na);
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup);
@@ -1425,7 +1411,7 @@ JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len);
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void);
 JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  jl_module_t *module,
-                                                 jl_value_t **bp, jl_value_t *bp_owner,
+                                                 _Atomic(jl_value_t*) *bp, jl_value_t *bp_owner,
                                                  jl_binding_t *bnd);
 JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
 JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo);
@@ -1494,8 +1480,8 @@ JL_DLLEXPORT void        jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *r
 JL_DLLEXPORT int         jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld);
 JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a);
-int jl_uniontype_size(jl_value_t *ty, size_t *sz) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al) JL_NOTSAFEPOINT;
+int jl_uniontype_size(jl_value_t *ty, size_t *sz);
+JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al);
 
 // arrays
 JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *dims);
@@ -1725,11 +1711,11 @@ enum JL_RTLD_CONSTANT {
 };
 #define JL_RTLD_DEFAULT (JL_RTLD_LAZY | JL_RTLD_DEEPBIND)
 
-typedef void *jl_uv_libhandle; // compatible with dlopen (void*) / LoadLibrary (HMODULE)
-JL_DLLEXPORT jl_uv_libhandle jl_load_dynamic_library(const char *fname, unsigned flags, int throw_err);
-JL_DLLEXPORT jl_uv_libhandle jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_dlclose(jl_uv_libhandle handle) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_dlsym(jl_uv_libhandle handle, const char *symbol, void ** value, int throw_err) JL_NOTSAFEPOINT;
+typedef void *jl_libhandle; // compatible with dlopen (void*) / LoadLibrary (HMODULE)
+JL_DLLEXPORT jl_libhandle jl_load_dynamic_library(const char *fname, unsigned flags, int throw_err);
+JL_DLLEXPORT jl_libhandle jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_dlclose(jl_libhandle handle) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_dlsym(jl_libhandle handle, const char *symbol, void ** value, int throw_err) JL_NOTSAFEPOINT;
 
 // evaluation
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v);
@@ -1833,36 +1819,26 @@ typedef struct _jl_task_t {
     uint64_t rngState1;
     uint64_t rngState2;
     uint64_t rngState3;
-    uint8_t _state;
+    _Atomic(uint8_t) _state;
     uint8_t sticky; // record whether this Task can be migrated to a new thread
-    uint8_t _isexception; // set if `result` is an exception to throw or that we exited with
+    _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
 
 // hidden state:
     // id of owning thread - does not need to be defined until the task runs
-    int16_t tid;
+    _Atomic(int16_t) tid;
     // multiqueue priority
     int16_t prio;
     // saved gc stack top for context switches
     jl_gcframe_t *gcstack;
     size_t world_age;
     // quick lookup for current ptls
-    jl_tls_states_t *ptls; // == jl_all_tls_states[tid]
+    jl_ptls_t ptls; // == jl_all_tls_states[tid]
     // saved exception stack
     jl_excstack_t *excstack;
     // current exception handler
     jl_handler_t *eh;
-
-    union {
-        jl_ucontext_t ctx; // saved thread state
-#ifdef _OS_WINDOWS_
-        jl_ucontext_t copy_stack_ctx;
-#else
-        struct jl_stack_context_t copy_stack_ctx;
-#endif
-    };
-#if defined(JL_TSAN_ENABLED)
-    void *tsan_state;
-#endif
+    // saved thread state
+    jl_ucontext_t ctx;
     void *stkbuf; // malloc'd memory (either copybuf or stack)
     size_t bufsz; // actual sizeof stkbuf
     unsigned int copy_stack:31; // sizeof stack for copybuf
@@ -1884,7 +1860,7 @@ JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e);
 JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
 
-#include "locks.h"   // requires jl_task_t definition
+#include "julia_locks.h"   // requires jl_task_t definition
 
 JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh);
 JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh);
@@ -1894,16 +1870,27 @@ JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT;
 
 #if defined(_OS_WINDOWS_)
 #if defined(_COMPILER_GCC_)
-int __attribute__ ((__nothrow__,__returns_twice__)) (jl_setjmp)(jmp_buf _Buf);
+JL_DLLEXPORT int __attribute__ ((__nothrow__,__returns_twice__)) (jl_setjmp)(jmp_buf _Buf);
 __declspec(noreturn) __attribute__ ((__nothrow__)) void (jl_longjmp)(jmp_buf _Buf, int _Value);
+JL_DLLEXPORT int __attribute__ ((__nothrow__,__returns_twice__)) (ijl_setjmp)(jmp_buf _Buf);
+__declspec(noreturn) __attribute__ ((__nothrow__)) void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #else
-int (jl_setjmp)(jmp_buf _Buf);
+JL_DLLEXPORT int (jl_setjmp)(jmp_buf _Buf);
 void (jl_longjmp)(jmp_buf _Buf, int _Value);
+JL_DLLEXPORT int (ijl_setjmp)(jmp_buf _Buf);
+void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #endif
+#ifdef LIBRARY_EXPORTS
+#define jl_setjmp_f ijl_setjmp
+#define jl_setjmp_name "ijl_setjmp"
+#define jl_setjmp(a,b) ijl_setjmp(a)
+#define jl_longjmp(a,b) ijl_longjmp(a,b)
+#else
 #define jl_setjmp_f jl_setjmp
 #define jl_setjmp_name "jl_setjmp"
 #define jl_setjmp(a,b) jl_setjmp(a)
 #define jl_longjmp(a,b) jl_longjmp(a,b)
+#endif
 #elif defined(_OS_EMSCRIPTEN_)
 #define jl_setjmp(a,b) setjmp(a)
 #define jl_longjmp(a,b) longjmp(a,b)
@@ -1923,7 +1910,7 @@ void (jl_longjmp)(jmp_buf _Buf, int _Value);
 #endif
 
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 // This is hard. Ideally we'd teach the static analyzer about the extra control
 // flow edges. But for now, just hide this as best we can
@@ -1948,24 +1935,32 @@ extern int had_exception;
 
 // I/O system -----------------------------------------------------------------
 
-#define JL_STREAM uv_stream_t
+struct uv_loop_s;
+struct uv_handle_s;
+struct uv_stream_s;
+#ifdef _OS_WINDOWS_
+typedef HANDLE jl_uv_os_fd_t;
+#else
+typedef int jl_uv_os_fd_t;
+#endif
+#define JL_STREAM struct uv_stream_s
 #define JL_STDOUT jl_uv_stdout
 #define JL_STDERR jl_uv_stderr
 #define JL_STDIN  jl_uv_stdin
 
 JL_DLLEXPORT int jl_process_events(void);
 
-JL_DLLEXPORT uv_loop_t *jl_global_event_loop(void);
+JL_DLLEXPORT struct uv_loop_s *jl_global_event_loop(void);
 
-JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle);
+JL_DLLEXPORT void jl_close_uv(struct uv_handle_s *handle);
 
 JL_DLLEXPORT jl_array_t *jl_take_buffer(ios_t *s);
 
 typedef struct {
     void *data;
-    uv_loop_t *loop;
-    uv_handle_type type;
-    uv_os_fd_t file;
+    struct uv_loop_s *loop;
+    int type; // enum uv_handle_type
+    jl_uv_os_fd_t file;
 } jl_uv_file_t;
 
 #ifdef __GNUC__
@@ -1975,10 +1970,10 @@ typedef struct {
 #define _JL_FORMAT_ATTR(type, str, arg)
 #endif
 
-JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n);
-JL_DLLEXPORT int jl_printf(uv_stream_t *s, const char *format, ...)
+JL_DLLEXPORT void jl_uv_puts(struct uv_stream_s *stream, const char *str, size_t n);
+JL_DLLEXPORT int jl_printf(struct uv_stream_s *s, const char *format, ...)
     _JL_FORMAT_ATTR(printf, 2, 3);
-JL_DLLEXPORT int jl_vprintf(uv_stream_t *s, const char *format, va_list args)
+JL_DLLEXPORT int jl_vprintf(struct uv_stream_s *s, const char *format, va_list args)
     _JL_FORMAT_ATTR(printf, 2, 0);
 JL_DLLEXPORT void jl_safe_printf(const char *str, ...) JL_NOTSAFEPOINT
     _JL_FORMAT_ATTR(printf, 1, 2);
@@ -2003,56 +1998,11 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT; // deprecated
 JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT;
 
 // julia options -----------------------------------------------------------
-// NOTE: This struct needs to be kept in sync with JLOptions type in base/options.jl
-typedef struct {
-    int8_t quiet;
-    int8_t banner;
-    const char *julia_bindir;
-    const char *julia_bin;
-    const char **cmds;
-    const char *image_file;
-    const char *cpu_target;
-    int32_t nthreads;
-    int32_t nprocs;
-    const char *machine_file;
-    const char *project;
-    int8_t isinteractive;
-    int8_t color;
-    int8_t historyfile;
-    int8_t startupfile;
-    int8_t compile_enabled;
-    int8_t code_coverage;
-    int8_t malloc_log;
-    int8_t opt_level;
-    int8_t opt_level_min;
-    int8_t debug_level;
-    int8_t check_bounds;
-    int8_t depwarn;
-    int8_t warn_overwrite;
-    int8_t can_inline;
-    int8_t polly;
-    const char *trace_compile;
-    int8_t fast_math;
-    int8_t worker;
-    const char *cookie;
-    int8_t handle_signals;
-    int8_t use_sysimage_native_code;
-    int8_t use_compiled_modules;
-    const char *bindto;
-    const char *outputbc;
-    const char *outputunoptbc;
-    const char *outputo;
-    const char *outputasm;
-    const char *outputji;
-    const char *output_code_coverage;
-    int8_t incremental;
-    int8_t image_file_specified;
-    int8_t warn_scope;
-    int8_t image_codegen;
-    int8_t rr_detach;
-} jl_options_t;
-
-extern JL_DLLEXPORT jl_options_t jl_options;
+
+#include "jloptions.h"
+
+extern JL_DLLIMPORT jl_options_t jl_options;
+
 JL_DLLEXPORT ssize_t jl_sizeof_jl_options(void);
 
 // Parse an argc/argv pair to extract general julia options, passing back out
@@ -2177,7 +2127,6 @@ typedef struct {
     // generic_context(f, args...) instead of f(args...).
     jl_value_t *generic_context;
 } jl_cgparams_t;
-extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
 
 #ifdef __cplusplus
diff --git a/src/julia_assert.h b/src/julia_assert.h
index 6cf89d0e470a5..4b120fd9e845b 100644
--- a/src/julia_assert.h
+++ b/src/julia_assert.h
@@ -21,7 +21,10 @@
 #  endif
 #else
 #  ifdef JL_NDEBUG
-#    undef JL_NDEBUG
+#    define NDEBUG
+#    include <assert.h>
+#    undef NDEBUG
+#  else
+#    include <assert.h>
 #  endif
-#  include <assert.h>
 #endif
diff --git a/src/julia_atomics.h b/src/julia_atomics.h
new file mode 100644
index 0000000000000..1f1a7a46cc9b6
--- /dev/null
+++ b/src/julia_atomics.h
@@ -0,0 +1,310 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_ATOMICS_H
+#define JL_ATOMICS_H
+
+#if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
+#  error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
+#endif
+
+// Low-level atomic operations
+#ifdef __cplusplus
+#include <atomic>
+using std::memory_order_relaxed;
+using std::memory_order_consume;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+using std::atomic_thread_fence;
+using std::atomic_signal_fence;
+using std::atomic_load;
+using std::atomic_load_explicit;
+using std::atomic_store;
+using std::atomic_store_explicit;
+using std::atomic_fetch_add;
+using std::atomic_fetch_add_explicit;
+using std::atomic_fetch_and;
+using std::atomic_fetch_and_explicit;
+using std::atomic_fetch_or;
+using std::atomic_fetch_or_explicit;
+using std::atomic_compare_exchange_strong;
+using std::atomic_compare_exchange_strong_explicit;
+using std::atomic_exchange;
+using std::atomic_exchange_explicit;
+extern "C" {
+#define _Atomic(T) std::atomic<T>
+#else
+#include <stdatomic.h>
+#endif
+#include <signal.h> // for sig_atomic_t
+
+#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
+#  include <immintrin.h>
+#endif
+
+enum jl_memory_order {
+    jl_memory_order_unspecified = -2,
+    jl_memory_order_invalid = -1,
+    jl_memory_order_notatomic = 0,
+    jl_memory_order_unordered,
+    jl_memory_order_monotonic,
+    jl_memory_order_consume,
+    jl_memory_order_acquire,
+    jl_memory_order_release,
+    jl_memory_order_acq_rel,
+    jl_memory_order_seq_cst
+};
+
+/**
+ * Thread synchronization primitives:
+ *
+ * These roughly follows the c11/c++11 memory model and the act as memory
+ * barriers at both the compiler level and the hardware level.
+ * The only exception is the GC safepoint and GC state transitions for which
+ * we use only a compiler (signal) barrier and use the signal handler to do the
+ * synchronization in order to lower the mutator overhead as much as possible.
+ *
+ * We use the compiler intrinsics to implement a similar API to the c11/c++11
+ * one instead of using it directly because, we need interoperability between
+ * code written in different languages. The current c++ standard (c++14) does
+ * not allow using c11 atomic functions or types and there's currently no
+ * guarantee that the two types are compatible (although most of them probably
+ * are). We also need to access these atomic variables from the LLVM JIT code
+ * which is very hard unless the layout of the object is fully specified.
+ */
+#define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+#define jl_fence_release() atomic_thread_fence(memory_order_release)
+#define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst)
+
+#ifdef __cplusplus
+}
+// implicit conversion wasn't correctly specified 2017, so many compilers get
+// this wrong thus we include the correct definitions here (with implicit
+// conversion), instead of using the macro version
+template<class T>
+T jl_atomic_load(std::atomic<T> *ptr)
+{
+     return std::atomic_load<T>(ptr);
+}
+template<class T>
+T jl_atomic_load_explicit(std::atomic<T> *ptr, std::memory_order order)
+{
+     return std::atomic_load_explicit<T>(ptr, order);
+}
+#define jl_atomic_load_relaxed(ptr) jl_atomic_load_explicit(ptr, memory_order_relaxed)
+#define jl_atomic_load_acquire(ptr) jl_atomic_load_explicit(ptr, memory_order_acquire)
+template<class T, class S>
+void jl_atomic_store(std::atomic<T> *ptr, S desired)
+{
+     std::atomic_store<T>(ptr, desired);
+}
+template<class T, class S>
+void jl_atomic_store_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+     std::atomic_store_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_store_relaxed(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_relaxed)
+#define jl_atomic_store_release(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_release)
+template<class T, class S>
+T jl_atomic_fetch_add(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_add<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_add_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_add_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_add_relaxed(ptr, val) jl_atomic_fetch_add_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_and(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_and<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_and_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_and_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_and_relaxed(ptr, val) jl_atomic_fetch_and_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_or(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_or<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_or_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_or_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_or_relaxed(ptr, val) jl_atomic_fetch_or_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+bool jl_atomic_cmpswap(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong<T>(ptr, expected, val);
+}
+template<class T, class S>
+bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::memory_order order)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
+}
+#define jl_atomic_cmpswap_relaxed(ptr, val) jl_atomic_cmpswap_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
+{
+     return std::atomic_exchange<T>(ptr, desired);
+}
+template<class T, class S>
+T jl_atomic_exchange_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+     return std::atomic_exchange_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed)
+extern "C" {
+#else
+
+#  define jl_atomic_fetch_add_relaxed(obj, arg)         \
+    atomic_fetch_add_explicit(obj, arg, memory_order_relaxed)
+#  define jl_atomic_fetch_add(obj, arg)                 \
+    atomic_fetch_add(obj, arg)
+#  define jl_atomic_fetch_and_relaxed(obj, arg)         \
+    atomic_fetch_and_explicit(obj, arg, memory_order_relaxed)
+#  define jl_atomic_fetch_and(obj, arg)                 \
+    atomic_fetch_and(obj, arg)
+#  define jl_atomic_fetch_or_relaxed(obj, arg)          \
+    atomic_fetch_or_explicit(obj, arg, __ATOMIC_RELAXED)
+#  define jl_atomic_fetch_or(obj, arg)                  \
+    atomic_fetch_or(obj, arg)
+#  define jl_atomic_cmpswap(obj, expected, desired)     \
+    atomic_compare_exchange_strong(obj, expected, desired)
+#  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
+// TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
+#  define jl_atomic_exchange(obj, desired)       \
+    atomic_exchange(obj, desired)
+#  define jl_atomic_exchange_relaxed(obj, desired)      \
+    atomic_exchange_explicit(obj, desired, memory_order_relaxed)
+#  define jl_atomic_store(obj, val)                     \
+    atomic_store(obj, val)
+#  define jl_atomic_store_relaxed(obj, val)             \
+    atomic_store_explicit(obj, val, memory_order_relaxed)
+
+#  if defined(__clang__) || !(defined(_CPU_X86_) || defined(_CPU_X86_64_))
+// Clang doesn't have this bug...
+#    define jl_atomic_store_release(obj, val)           \
+    atomic_store_explicit(obj, val, memory_order_release)
+#  else
+// Workaround a GCC bug when using store with release order by using the
+// stronger version instead.
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
+// fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27
+#    define jl_atomic_store_release(obj, val) do {      \
+        jl_signal_fence();                              \
+        atomic_store_explicit(obj, val, memory_order_release);   \
+    } while (0)
+#  endif
+#  define jl_atomic_load(obj)                   \
+    atomic_load(obj)
+#  define jl_atomic_load_acquire(obj)           \
+    atomic_load_explicit(obj, memory_order_acquire)
+#ifdef _COMPILER_TSAN_ENABLED_
+// For the sake of tsan, call these loads consume ordering since they will act
+// as such on the processors we support while normally, the compiler would
+// upgrade this to acquire ordering, which is strong (and slower) than we want.
+#  define jl_atomic_load_relaxed(obj)           \
+    atomic_load_explicit(obj, memory_order_consume)
+#else
+#  define jl_atomic_load_relaxed(obj)           \
+    atomic_load_explicit(obj, memory_order_relaxed)
+#endif
+#endif
+
+#ifdef __clang_gcanalyzer__
+// for the purposes of the GC analyzer, we can turn these into non-atomic
+// expressions with similar properties (for the sake of the analyzer, we don't
+// care if it is an exact match for behavior)
+
+#undef _Atomic
+#define _Atomic(T) T
+
+#undef jl_atomic_exchange
+#undef jl_atomic_exchange_relaxed
+#define jl_atomic_exchange(obj, desired) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *p__analyzer__ = (desired); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_exchange_relaxed jl_atomic_exchange
+
+#undef jl_atomic_cmpswap
+#undef jl_atomic_cmpswap_relaxed
+#define jl_atomic_cmpswap(obj, expected, desired) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            __typeof__((expected)) x__analyzer__ = (expected); \
+            int eq__analyzer__ = memcmp(&temp__analyzer__, x__analyzer__, sizeof(temp__analyzer__)) == 0; \
+            if (eq__analyzer__) \
+                *p__analyzer__ = (desired); \
+            else \
+                *x__analyzer__ = temp__analyzer__; \
+            eq__analyzer__; \
+        }))
+#define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
+
+#undef jl_atomic_store
+#undef jl_atomic_store_release
+#undef jl_atomic_store_relaxed
+#define jl_atomic_store(obj, val)         (*(obj) = (val))
+#define jl_atomic_store_release jl_atomic_store
+#define jl_atomic_store_relaxed jl_atomic_store
+
+#undef jl_atomic_load
+#undef jl_atomic_load_acquire
+#undef jl_atomic_load_relaxed
+#define jl_atomic_load(obj)         (*(obj))
+#define jl_atomic_load_acquire jl_atomic_load
+#define jl_atomic_load_relaxed jl_atomic_load
+
+#undef jl_atomic_fetch_add
+#undef jl_atomic_fetch_and
+#undef jl_atomic_fetch_or
+#undef jl_atomic_fetch_add_relaxed
+#undef jl_atomic_fetch_and_relaxed
+#undef jl_atomic_fetch_or_relaxed
+#define jl_atomic_fetch_add(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ + (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_and(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ & (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_or(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ | (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_add_relaxed jl_atomic_fetch_add
+#define jl_atomic_fetch_and_relaxed jl_atomic_fetch_and
+#define jl_atomic_fetch_or_relaxed jl_atomic_fetch_or
+
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_ATOMICS_H
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 44bab66b083f9..ee46eb88297af 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -4,7 +4,11 @@
 #define JL_INTERNAL_H
 
 #include "options.h"
-#include "locks.h"
+#include "julia_locks.h"
+#include "support/utils.h"
+#include "support/hashing.h"
+#include "support/ptrhash.h"
+#include "support/strtod.h"
 #include <uv.h>
 #if !defined(_WIN32)
 #include <unistd.h>
@@ -15,11 +19,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-#ifdef JL_ASAN_ENABLED
+#ifdef _COMPILER_ASAN_ENABLED_
 void __sanitizer_start_switch_fiber(void**, const void*, size_t);
 void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
 #endif
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
 void *__tsan_create_fiber(unsigned flags);
 void *__tsan_get_current_fiber(void);
 void __tsan_destroy_fiber(void *fiber);
@@ -99,13 +103,15 @@ void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #endif
 #endif
 
+extern JL_DLLEXPORT uintptr_t __stack_chk_guard;
+
 // If this is detected in a backtrace of segfault, it means the functions
 // that use this value must be reworked into their async form with cb arg
 // provided and with JL_UV_LOCK used around the calls
 static uv_loop_t *const unused_uv_loop_arg = (uv_loop_t *)0xBAD10;
 
 extern jl_mutex_t jl_uv_mutex;
-extern int jl_uv_n_waiters;
+extern _Atomic(int) jl_uv_n_waiters;
 void JL_UV_LOCK(void);
 #define JL_UV_UNLOCK() JL_UNLOCK(&jl_uv_mutex)
 
@@ -154,14 +160,11 @@ static inline uint64_t cycleclock(void)
 
 #include "timing.h"
 
-extern uint8_t *jl_measure_compile_time;
-extern uint64_t *jl_cumulative_compile_time;
+// Global *atomic* integers controlling *process-wide* measurement of compilation time.
+extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
+extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
 
-#ifdef _COMPILER_MICROSOFT_
-#  define jl_return_address() ((uintptr_t)_ReturnAddress())
-#else
-#  define jl_return_address() ((uintptr_t)__builtin_return_address(0))
-#endif
+#define jl_return_address() ((uintptr_t)__builtin_return_address(0))
 
 STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 {
@@ -183,14 +186,16 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
 {
     size_t i;
+    _Atomic(void*) *srcpa = (_Atomic(void*)*)srcp;
+    _Atomic(void*) *dstpa = (_Atomic(void*)*)dstp;
     if (dstp < srcp || dstp > srcp + n) {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i));
+            jl_atomic_store_relaxed(dstpa + i, jl_atomic_load_relaxed(srcpa + i));
         }
     }
     else {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
+            jl_atomic_store_relaxed(dstpa + n - i - 1, jl_atomic_load_relaxed(srcpa + n - i - 1));
         }
     }
 }
@@ -205,7 +210,7 @@ static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOT
 // useful constants
 extern jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
 extern jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
-extern size_t jl_world_counter;
+extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
 extern tracer_cb jl_newmeth_tracer;
@@ -213,10 +218,10 @@ void jl_call_tracer(tracer_cb callback, jl_value_t *tracee);
 void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
 
-extern size_t jl_page_size;
+extern JL_DLLEXPORT size_t jl_page_size;
 extern jl_function_t *jl_typeinf_func;
-extern size_t jl_typeinf_world;
-extern jl_typemap_entry_t *call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT size_t jl_typeinf_world;
+extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
 extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
 
 JL_DLLEXPORT extern int jl_lineno;
@@ -225,8 +230,8 @@ JL_DLLEXPORT extern const char *jl_filename;
 JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
                                           int osize);
 JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t allocsz);
-int jl_gc_classify_pools(size_t sz, int *osize);
-extern jl_mutex_t gc_perm_lock;
+JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize);
+extern uv_mutex_t gc_perm_lock;
 void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
     unsigned align, unsigned offset) JL_NOTSAFEPOINT;
 void *jl_gc_perm_alloc(size_t sz, int zero,
@@ -406,7 +411,7 @@ jl_value_t *jl_permbox64(jl_datatype_t *t, int64_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
 
 // this sizeof(__VA_ARGS__) trick can't be computed until C11, but that only matters to Clang in some situations
-#if !defined(__clang_analyzer__) && !(defined(JL_ASAN_ENABLED) || defined(JL_TSAN_ENABLED))
+#if !defined(__clang_analyzer__) && !(defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
 #ifdef __GNUC__
 #define jl_perm_symsvec(n, ...) \
     (jl_perm_symsvec)(__extension__({                                         \
@@ -415,6 +420,16 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
                 "Number of passed arguments does not match expected number"); \
             n;                                                                \
         }), __VA_ARGS__)
+#ifdef jl_svec
+#undef jl_svec
+#define jl_svec(n, ...) \
+    (ijl_svec)(__extension__({                                                \
+            static_assert(                                                    \
+                n == sizeof((void *[]){ __VA_ARGS__ })/sizeof(void *),        \
+                "Number of passed arguments does not match expected number"); \
+            n;                                                                \
+        }), __VA_ARGS__)
+#else
 #define jl_svec(n, ...) \
     (jl_svec)(__extension__({                                                 \
             static_assert(                                                    \
@@ -424,6 +439,7 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
         }), __VA_ARGS__)
 #endif
 #endif
+#endif
 
 jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz);
 JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
@@ -457,8 +473,8 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT
     }
 }
 
-void gc_debug_print_status(void);
-void gc_debug_critical_error(void);
+void jl_gc_debug_print_status(void);
+JL_DLLEXPORT void jl_gc_debug_critical_error(void);
 void jl_print_gc_stats(JL_STREAM *s);
 void jl_gc_reset_alloc_count(void);
 uint32_t jl_get_gs_ctr(void);
@@ -474,11 +490,26 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
     return v;
 }
 
+// -- helper types -- //
+
+typedef struct {
+    uint8_t pure:1;
+    uint8_t propagate_inbounds:1;
+    uint8_t inlineable:1;
+    uint8_t inferred:1;
+    uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+} jl_code_info_flags_bitfield_t;
+
+typedef union {
+    jl_code_info_flags_bitfield_t bits;
+    uint8_t packed;
+} jl_code_info_flags_t;
 
 // -- functions -- //
 
-jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
-jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
+// jl_code_info_flag_t code_info_flags(uint8_t pure, uint8_t propagate_inbounds, uint8_t inlineable, uint8_t inferred, uint8_t constprop);
+JL_DLLEXPORT jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
+JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
 jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
 void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
@@ -499,7 +530,7 @@ int jl_valid_type_param(jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t nargs);
 
 void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
-jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
+JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
 JL_DLLEXPORT jl_value_t *jl_get_keyword_sorter(jl_value_t *f);
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
@@ -514,7 +545,7 @@ void jl_install_default_signal_handlers(void);
 void restore_signals(void);
 void jl_install_thread_signal_handler(jl_ptls_t ptls);
 
-jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
+JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
 
 extern uv_loop_t *jl_io_loop;
 void jl_uv_flush(uv_stream_t *stream);
@@ -529,7 +560,7 @@ typedef struct jl_typeenv_t {
 int jl_tuple_isa(jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 int jl_tuple1_isa(jl_value_t *child1, jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 
-int jl_has_intersect_type_not_kind(jl_value_t *t);
+JL_DLLEXPORT int jl_has_intersect_type_not_kind(jl_value_t *t);
 int jl_subtype_invariant(jl_value_t *a, jl_value_t *b, int ta);
 int jl_has_concrete_subtype(jl_value_t *typ);
 jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf);
@@ -539,8 +570,8 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
 int jl_obviously_unequal(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v);
 int jl_has_fixed_layout(jl_datatype_t *t);
-int jl_struct_try_layout(jl_datatype_t *dt);
-int jl_type_mappable_to_c(jl_value_t *ty);
+JL_DLLEXPORT int jl_struct_try_layout(jl_datatype_t *dt);
+JL_DLLEXPORT int jl_type_mappable_to_c(jl_value_t *ty);
 jl_svec_t *jl_outer_unionall_vars(jl_value_t *u);
 jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, int *issubty);
 jl_value_t *jl_type_intersection_env(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
@@ -551,16 +582,16 @@ JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b);
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
-jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
-jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
+JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
 int jl_count_union_components(jl_value_t *v);
-jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT;
 jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module,
                                    jl_datatype_t *super, jl_svec_t *parameters);
 jl_datatype_t *jl_new_uninitialized_datatype(void);
 void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable);
-jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
+JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
 jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n);
 void jl_reinstantiate_inner_types(jl_datatype_t *t);
 jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type);
@@ -574,9 +605,9 @@ jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module);
 jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st);
 void jl_foreach_reachable_mtable(void (*visit)(jl_methtable_t *mt, void *env), void *env);
 void jl_init_main_module(void);
-int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
 jl_array_t *jl_get_loaded_modules(void);
-int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
+JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
 
 jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
 
@@ -586,7 +617,7 @@ jl_value_t *jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t *src);
 jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e,
                                           jl_code_info_t *src,
                                           jl_svec_t *sparam_vals);
-int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT;
 jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule);
 
 jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
@@ -604,14 +635,14 @@ JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
     jl_method_t *method) JL_NOTSAFEPOINT;
 jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT);
 
-int jl_pointer_egal(jl_value_t *t);
-jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_pointer_egal(jl_value_t *t);
+JL_DLLEXPORT jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 void jl_compute_field_offsets(jl_datatype_t *st);
 jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
                                              int isunboxed, int hasptr, int isunion, int elsz);
 void jl_module_run_initializer(jl_module_t *m);
 jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b);
+JL_DLLEXPORT void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b);
 extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED;
 extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
 int jl_compile_extern_c(void *llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
@@ -688,17 +719,6 @@ STATIC_INLINE jl_vararg_kind_t jl_va_tuple_kind(jl_datatype_t *t) JL_NOTSAFEPOIN
     return jl_vararg_kind(jl_tparam(t,l-1));
 }
 
-#ifdef JL_USE_INTEL_JITEVENTS
-extern char jl_using_intel_jitevents;
-#endif
-#ifdef JL_USE_OPROFILE_JITEVENTS
-extern char jl_using_oprofile_jitevents;
-#endif
-#ifdef JL_USE_PERF_JITEVENTS
-extern char jl_using_perf_jitevents;
-#endif
-extern char jl_using_gdb_jitevents;
-
 // -- init.c -- //
 
 void jl_init_types(void) JL_GC_DISABLED;
@@ -708,24 +728,25 @@ void jl_init_common_symbols(void);
 void jl_init_primitives(void) JL_GC_DISABLED;
 void jl_init_llvm(void);
 void jl_init_codegen(void);
+void jl_init_runtime_ccall(void);
 void jl_init_intrinsic_functions(void);
 void jl_init_intrinsic_properties(void);
 void jl_init_tasks(void) JL_GC_DISABLED;
 void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo);
-void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
+jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
 void jl_gc_init(void);
 void jl_init_uv(void);
-void jl_init_debuginfo(void);
 void jl_init_thread_heap(jl_ptls_t ptls);
 void jl_init_int32_int64_cache(void);
+JL_DLLEXPORT void jl_init_options(void);
 
 void jl_teardown_codegen(void);
 
 void jl_set_base_ctx(char *__stk);
 
-extern ssize_t jl_tls_offset;
-extern const int jl_tls_elf_support;
+extern JL_DLLEXPORT ssize_t jl_tls_offset;
+extern JL_DLLEXPORT const int jl_tls_elf_support;
 void jl_init_threading(void);
 void jl_start_threads(void);
 
@@ -736,7 +757,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
     uintptr_t safepoint_addr = (uintptr_t)jl_safepoint_pages;
     return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3;
 }
-extern uint32_t jl_gc_running;
+extern _Atomic(uint32_t) jl_gc_running;
 // All the functions are safe to be called from within a signal handler
 // provided that the thread will not be interrupted by another asynchronous
 // signal.
@@ -781,15 +802,15 @@ typedef DWORD jl_pgcstack_key_t;
 #else
 typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
 #endif
-void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k);
+JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k);
 
-#if !defined(__clang_analyzer__)
+#if !defined(__clang_gcanalyzer__)
 static inline void jl_set_gc_and_wait(void)
 {
     jl_task_t *ct = jl_current_task;
     // reading own gc state doesn't need atomic ops since no one else
     // should store to it.
-    int8_t state = ct->ptls->gc_state;
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
     jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
     jl_safepoint_wait_gc();
     jl_atomic_store_release(&ct->ptls->gc_state, state);
@@ -798,42 +819,42 @@ static inline void jl_set_gc_and_wait(void)
 void jl_gc_set_permalloc_region(void *start, void *end);
 
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
-        int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLEXPORT void *jl_get_llvmf_defn(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
-JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLEXPORT jl_value_t *jl_dump_llvm_asm(void *F, const char* asm_variant, const char *debuginfo);
+JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLEXPORT jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo);
+JL_DLLEXPORT jl_value_t *jl_dump_function_asm(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
 
-void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int policy);
+void *jl_create_native(jl_array_t *methods, const jl_cgparams_t *cgparams, int policy);
 void jl_dump_native(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
         const char *sysimg_data, size_t sysimg_len);
 int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p) JL_NOTSAFEPOINT;
-void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
+JL_DLLEXPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
         int32_t *func_idx, int32_t *specfunc_idx);
 
 // the first argument to jl_idtable_rehash is used to return a value
 // make sure it is rooted if it is used after the function returns
 JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz);
-jl_value_t **jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
+_Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t*);
 
 JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module);
-jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
+JL_DLLEXPORT jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
 jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_svec_t *sp);
 JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
-jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
+JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
 JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world);
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(
     jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams);
 JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_method_instance_t *caller);
 JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
 
-uint32_t jl_module_next_counter(jl_module_t *m);
+uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
 
-int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
 
 jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
                      size_t offset, jl_value_t *options);
@@ -957,7 +978,7 @@ typedef struct {
 uint64_t jl_getUnwindInfo(uint64_t dwBase);
 #ifdef _OS_WINDOWS_
 #include <dbghelp.h>
-JL_DLLEXPORT EXCEPTION_DISPOSITION __julia_personality(
+JL_DLLEXPORT EXCEPTION_DISPOSITION NTAPI __julia_personality(
         PEXCEPTION_RECORD ExceptionRecord, void *EstablisherFrame, PCONTEXT ContextRecord, void *DispatcherContext);
 extern HANDLE hMainThread;
 typedef CONTEXT bt_context_t;
@@ -969,7 +990,7 @@ typedef struct {
     CONTEXT context;
 } bt_cursor_t;
 #endif
-extern jl_mutex_t jl_in_stackwalk;
+extern JL_DLLEXPORT uv_mutex_t jl_in_stackwalk;
 #elif !defined(JL_DISABLE_LIBUNWIND)
 // This gives unwind only local unwinding options ==> faster code
 #  define UNW_LOCAL_ONLY
@@ -996,14 +1017,14 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t
 size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
 #endif
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
-void jl_critical_error(int sig, bt_context_t *context);
+void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct);
 JL_DLLEXPORT void jl_raise_debugger(void);
 int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;
 void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT;
 void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_data) JL_NOTSAFEPOINT;
 #ifdef _OS_WINDOWS_
-void jl_refresh_dbg_module_list(void);
+JL_DLLEXPORT void jl_refresh_dbg_module_list(void);
 #endif
 // *to is NULL or malloc'd pointer, from is allowed to be NULL
 STATIC_INLINE char *jl_copy_str(char **to, const char *from) JL_NOTSAFEPOINT
@@ -1084,21 +1105,21 @@ STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed)
     return *seed % max;
 }
 
-// libuv stuff:
-JL_DLLEXPORT extern void *jl_libjulia_handle;
 JL_DLLEXPORT extern void *jl_libjulia_internal_handle;
 JL_DLLEXPORT extern void *jl_RTLD_DEFAULT_handle;
 #if defined(_OS_WINDOWS_)
 JL_DLLEXPORT extern void *jl_exe_handle;
+JL_DLLEXPORT extern void *jl_libjulia_handle;
+JL_DLLEXPORT extern const char *jl_crtdll_basename;
 extern void *jl_ntdll_handle;
 extern void *jl_kernel32_handle;
 extern void *jl_crtdll_handle;
 extern void *jl_winsock_handle;
 #endif
 
-void *jl_get_library_(const char *f_lib, int throw_err);
+JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err);
 #define jl_get_library(f_lib) jl_get_library_(f_lib, 1)
-JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd);
+JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd);
 JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name);
 JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
     jl_value_t *fobj, jl_datatype_t *result, htable_t *cache, jl_svec_t *fill,
@@ -1108,13 +1129,9 @@ JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
 
 // Windows only
 #define JL_EXE_LIBNAME                  ((const char*)1)
-#define JL_LIBJULIA_INTERNAL_DL_LIBNAME ((const char*)2)
-#if defined(JL_DEBUG_BUILD)
-#define JL_LIBJULIA_DL_LIBNAME          "libjulia-debug"
-#else
-#define JL_LIBJULIA_DL_LIBNAME          "libjulia"
-#endif
-const char *jl_dlfind_win32(const char *name);
+#define JL_LIBJULIA_DL_LIBNAME          ((const char*)2)
+#define JL_LIBJULIA_INTERNAL_DL_LIBNAME ((const char*)3)
+JL_DLLEXPORT const char *jl_dlfind_win32(const char *name);
 
 // libuv wrappers:
 JL_DLLEXPORT int jl_fs_rename(const char *src_path, const char *dst_path);
@@ -1126,7 +1143,7 @@ extern JL_DLLEXPORT jl_value_t *jl_segv_exception;
 
 // -- Runtime intrinsics -- //
 JL_DLLEXPORT const char *jl_intrinsic_name(int f) JL_NOTSAFEPOINT;
-unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
 
 STATIC_INLINE int is_valid_intrinsic_elptr(jl_value_t *ety)
 {
@@ -1223,6 +1240,7 @@ JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
 
 JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a);
+JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a);
 JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
 JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
 JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);
@@ -1234,8 +1252,8 @@ JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;
 // -- synchronization utilities -- //
 
 extern jl_mutex_t typecache_lock;
-extern jl_mutex_t codegen_lock;
-extern jl_mutex_t safepoint_lock;
+extern JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
+extern uv_mutex_t safepoint_lock;
 
 #if defined(__APPLE__)
 void jl_mach_gc_end(void);
@@ -1246,11 +1264,11 @@ void jl_mach_gc_end(void);
 typedef uint_t (*smallintset_hash)(size_t val, jl_svec_t *data);
 typedef int (*smallintset_eq)(size_t val, const void *key, jl_svec_t *data, uint_t hv);
 ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv);
-void jl_smallintset_insert(jl_array_t **pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data);
+void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data);
 
 // -- typemap.c -- //
 
-void jl_typemap_insert(jl_typemap_t **cache, jl_value_t *parent,
+void jl_typemap_insert(_Atomic(jl_typemap_t*) *cache, jl_value_t *parent,
         jl_typemap_entry_t *newrec, int8_t offs);
 jl_typemap_entry_t *jl_typemap_alloc(
         jl_tupletype_t *type, jl_tupletype_t *simpletype, jl_svec_t *guardsigs,
@@ -1313,7 +1331,7 @@ JL_DLLEXPORT int8_t jl_svec_isassigned(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t
 JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i);
 
 
-unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
+JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
 
 void register_eh_frames(uint8_t *Addr, size_t Size);
 void deregister_eh_frames(uint8_t *Addr, size_t Size);
@@ -1340,60 +1358,101 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
             jl_value_t *file, jl_value_t *line, jl_value_t *kwargs,
             jl_value_t *msg);
 
-int isabspath(const char *in) JL_NOTSAFEPOINT;
-
-extern jl_sym_t *call_sym;    extern jl_sym_t *invoke_sym;
-extern jl_sym_t *empty_sym;   extern jl_sym_t *top_sym;
-extern jl_sym_t *module_sym;  extern jl_sym_t *slot_sym;
-extern jl_sym_t *export_sym;  extern jl_sym_t *import_sym;
-extern jl_sym_t *toplevel_sym; extern jl_sym_t *quote_sym;
-extern jl_sym_t *line_sym;     extern jl_sym_t *incomplete_sym;
-extern jl_sym_t *goto_sym;    extern jl_sym_t *goto_ifnot_sym;
-extern jl_sym_t *return_sym;
-extern jl_sym_t *lambda_sym;  extern jl_sym_t *assign_sym;
-extern jl_sym_t *globalref_sym; extern jl_sym_t *do_sym;
-extern jl_sym_t *method_sym;  extern jl_sym_t *core_sym;
-extern jl_sym_t *enter_sym;   extern jl_sym_t *leave_sym;
-extern jl_sym_t *exc_sym;     extern jl_sym_t *error_sym;
-extern jl_sym_t *new_sym;     extern jl_sym_t *using_sym;
-extern jl_sym_t *splatnew_sym;
-extern jl_sym_t *new_opaque_closure_sym;
-extern jl_sym_t *opaque_closure_method_sym;
-extern jl_sym_t *pop_exception_sym;
-extern jl_sym_t *const_sym;   extern jl_sym_t *thunk_sym;
-extern jl_sym_t *foreigncall_sym; extern jl_sym_t *as_sym;
-extern jl_sym_t *global_sym; extern jl_sym_t *list_sym;
-extern jl_sym_t *dot_sym;    extern jl_sym_t *newvar_sym;
-extern jl_sym_t *boundscheck_sym; extern jl_sym_t *inbounds_sym;
-extern jl_sym_t *aliasscope_sym; extern jl_sym_t *popaliasscope_sym;
-extern jl_sym_t *copyast_sym; extern jl_sym_t *cfunction_sym;
-extern jl_sym_t *pure_sym; extern jl_sym_t *loopinfo_sym;
-extern jl_sym_t *meta_sym; extern jl_sym_t *inert_sym;
-extern jl_sym_t *polly_sym; extern jl_sym_t *unused_sym;
-extern jl_sym_t *static_parameter_sym; extern jl_sym_t *inline_sym;
-extern jl_sym_t *noinline_sym; extern jl_sym_t *generated_sym;
-extern jl_sym_t *generated_only_sym; extern jl_sym_t *isdefined_sym;
-extern jl_sym_t *propagate_inbounds_sym; extern jl_sym_t *specialize_sym;
-extern jl_sym_t *aggressive_constprop_sym;
-extern jl_sym_t *nospecialize_sym; extern jl_sym_t *macrocall_sym;
-extern jl_sym_t *colon_sym; extern jl_sym_t *hygienicscope_sym;
-extern jl_sym_t *throw_undef_if_not_sym; extern jl_sym_t *getfield_undefref_sym;
-extern jl_sym_t *gc_preserve_begin_sym; extern jl_sym_t *gc_preserve_end_sym;
-extern jl_sym_t *coverageeffect_sym; extern jl_sym_t *escape_sym;
-extern jl_sym_t *optlevel_sym; extern jl_sym_t *compile_sym;
-extern jl_sym_t *infer_sym;
-extern jl_sym_t *atom_sym; extern jl_sym_t *statement_sym; extern jl_sym_t *all_sym;
-
-extern jl_sym_t *atomic_sym;
-extern jl_sym_t *not_atomic_sym;
-extern jl_sym_t *unordered_sym;
-extern jl_sym_t *monotonic_sym; // or relaxed_sym?
-extern jl_sym_t *acquire_sym;
-extern jl_sym_t *release_sym;
-extern jl_sym_t *acquire_release_sym;
-extern jl_sym_t *sequentially_consistent_sym; // or strong_sym?
-enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
-enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
+JL_DLLEXPORT int jl_isabspath(const char *in) JL_NOTSAFEPOINT;
+
+extern JL_DLLEXPORT jl_sym_t *jl_call_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_invoke_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_invoke_modify_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_empty_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_top_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_module_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_slot_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_export_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_import_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_quote_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_line_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_incomplete_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_goto_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_goto_ifnot_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_return_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_lineinfo_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_lambda_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_assign_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_globalref_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_do_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_method_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_core_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_enter_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_leave_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_pop_exception_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_exc_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_error_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_new_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_using_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_splatnew_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_block_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_new_opaque_closure_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_opaque_closure_method_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_const_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_as_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_global_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_list_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_dot_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_pure_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_meta_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_inert_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_polly_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_unused_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_static_parameter_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_inline_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_noinline_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_generated_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_generated_only_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_isdefined_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_propagate_inbounds_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_specialize_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_colon_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_throw_undef_if_not_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_getfield_undefref_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_gc_preserve_begin_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_gc_preserve_end_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_coverageeffect_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_escape_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_atom_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_statement_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_all_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_compile_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_force_compile_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_infer_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_atomic_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_not_atomic_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_unordered_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_monotonic_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_release_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
+
+JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
+JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
 
 struct _jl_sysimg_fptrs_t;
 
@@ -1431,7 +1490,7 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 #define JL_GCC_IGNORE_STOP
 #endif // _COMPILER_GCC_
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
   // Not a safepoint (so it dosn't free other values), but an artificial use.
   // Usually this is unnecessary because the analyzer can see all real uses,
   // but sometimes real uses are harder for the analyzer to see, or it may
@@ -1448,4 +1507,39 @@ uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
 }
 #endif
 
+#ifdef USE_DTRACE
+#include "uprobes.h.gen"
+
+// uprobes.h.gen on systems with DTrace, is auto-generated to include
+// `JL_PROBE_{PROBE}` and `JL_PROBE_{PROBE}_ENABLED()` macros for every probe
+// defined in uprobes.d
+//
+// If the arguments to `JL_PROBE_{PROBE}` are expensive to compute, the call to
+// these functions must be guarded by a JL_PROBE_{PROBE}_ENABLED() check, to
+// minimize performance impact when probing is off. As an example:
+//
+//    if (JL_PROBE_GC_STOP_THE_WORLD_ENABLED())
+//        JL_PROBE_GC_STOP_THE_WORLD();
+
+#else
+// define a dummy version of the probe functions
+#define JL_PROBE_GC_BEGIN(collection) do ; while (0)
+#define JL_PROBE_GC_STOP_THE_WORLD() do ; while (0)
+#define JL_PROBE_GC_MARK_BEGIN() do ; while (0)
+#define JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes) do ; while (0)
+#define JL_PROBE_GC_SWEEP_BEGIN(full) do ; while (0)
+#define JL_PROBE_GC_SWEEP_END() do ; while (0)
+#define JL_PROBE_GC_END() do ; while (0)
+#define JL_PROBE_GC_FINALIZER() do ; while (0)
+
+#define JL_PROBE_GC_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_STOP_THE_WORLD_ENABLED() (0)
+#define JL_PROBE_GC_MARK_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_MARK_END_ENABLED() (0)
+#define JL_PROBE_GC_SWEEP_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_SWEEP_END_ENABLED()  (0)
+#define JL_PROBE_GC_END_ENABLED() (0)
+#define JL_PROBE_GC_FINALIZER_ENABLED() (0)
+#endif
+
 #endif
diff --git a/src/locks.h b/src/julia_locks.h
similarity index 79%
rename from src/locks.h
rename to src/julia_locks.h
index 0605cefbd1218..8da0fc8ac9537 100644
--- a/src/locks.h
+++ b/src/julia_locks.h
@@ -11,29 +11,27 @@ extern "C" {
 
 // Lock acquire and release primitives
 
-// JL_LOCK and jl_mutex_lock are GC safe points while JL_LOCK_NOGC
-// and jl_mutex_lock_nogc are not.
+// JL_LOCK and jl_mutex_lock are GC safe points, use uv_mutex_t if that is not desired.
 // Always use JL_LOCK unless no one holding the lock can trigger a GC or GC
-// safepoint. JL_LOCK_NOGC should only be needed for GC internal locks.
+// safepoint. uv_mutex_t should only be needed for GC internal locks.
 // The JL_LOCK* and JL_UNLOCK* macros are no-op for non-threading build
 // while the jl_mutex_* functions are always locking and unlocking the locks.
 
 static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint)
 {
-    jl_thread_t self = jl_thread_self();
-    jl_thread_t owner = jl_atomic_load_relaxed(&lock->owner);
-    jl_task_t *ct = jl_current_task;
+    jl_task_t *self = jl_current_task;
+    jl_task_t *owner = jl_atomic_load_relaxed(&lock->owner);
     if (owner == self) {
         lock->count++;
         return;
     }
     while (1) {
-        if (owner == 0 && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
+        if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
             lock->count = 1;
             return;
         }
         if (safepoint) {
-            jl_gc_safepoint_(ct->ptls);
+            jl_gc_safepoint_(self->ptls);
         }
         jl_cpu_pause();
         owner = jl_atomic_load_relaxed(&lock->owner);
@@ -42,7 +40,7 @@ static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint)
 
 static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
 {
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
     // Hide this body from the analyzer, otherwise it complains that we're calling
     // a non-safepoint from this function. The 0 arguments guarantees that we do
     // not reach the safepoint, but the analyzer can't figure that out
@@ -90,13 +88,13 @@ static inline void jl_mutex_lock(jl_mutex_t *lock)
 
 static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
 {
-    jl_thread_t self = jl_thread_self();
-    jl_thread_t owner = jl_atomic_load_acquire(&lock->owner);
+    jl_task_t *self = jl_current_task;
+    jl_task_t *owner = jl_atomic_load_acquire(&lock->owner);
     if (owner == self) {
         lock->count++;
         return 1;
     }
-    if (owner == 0 && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
+    if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
         lock->count = 1;
         return 1;
     }
@@ -114,11 +112,11 @@ static inline int jl_mutex_trylock(jl_mutex_t *lock)
 }
 static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
 {
-#ifndef __clang_analyzer__
-    assert(lock->owner == jl_thread_self() &&
+#ifndef __clang_gcanalyzer__
+    assert(jl_atomic_load_relaxed(&lock->owner) == jl_current_task &&
            "Unlocking a lock in a different thread.");
     if (--lock->count == 0) {
-        jl_atomic_store_release(&lock->owner, 0);
+        jl_atomic_store_release(&lock->owner, (jl_task_t*)NULL);
         jl_cpu_wake();
     }
 #endif
@@ -129,14 +127,14 @@ static inline void jl_mutex_unlock(jl_mutex_t *lock)
     jl_mutex_unlock_nogc(lock);
     jl_lock_frame_pop();
     JL_SIGATOMIC_END();
-    if (jl_gc_have_pending_finalizers) {
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
         jl_gc_run_pending_finalizers(jl_current_task); // may GC
     }
 }
 
 static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT
 {
-    lock->owner = 0;
+    jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
     lock->count = 0;
 }
 
diff --git a/src/julia_threads.h b/src/julia_threads.h
index f10c9f538915d..371eb51250115 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -4,7 +4,10 @@
 #ifndef JL_THREADS_H
 #define JL_THREADS_H
 
-#include <atomics.h>
+#include "julia_atomics.h"
+#ifndef _OS_WINDOWS_
+#include "pthread.h"
+#endif
 // threading ------------------------------------------------------------------
 
 #ifdef __cplusplus
@@ -32,8 +35,12 @@ JL_DLLEXPORT void jl_threading_profile(void);
 
 #ifdef _OS_WINDOWS_
 #define JL_HAVE_UCONTEXT
-typedef win32_ucontext_t jl_ucontext_t;
+typedef win32_ucontext_t jl_stack_context_t;
+typedef jl_stack_context_t _jl_ucontext_t;
 #else
+typedef struct {
+    jl_jmp_buf uc_mcontext;
+} jl_stack_context_t;
 #if !defined(JL_HAVE_UCONTEXT) && \
     !defined(JL_HAVE_ASM) && \
     !defined(JL_HAVE_UNW_CONTEXT) && \
@@ -56,16 +63,11 @@ typedef win32_ucontext_t jl_ucontext_t;
 #endif
 #endif
 
-
-struct jl_stack_context_t {
-    jl_jmp_buf uc_mcontext;
-};
-
 #if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
-typedef struct jl_stack_context_t jl_ucontext_t;
+typedef jl_stack_context_t _jl_ucontext_t;
 #endif
 #if defined(JL_HAVE_ASYNCIFY)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN not currently supported with asyncify
 #endif
 typedef struct {
@@ -75,19 +77,30 @@ typedef struct {
     // __asyncify_data struct.
     void *stackbottom;
     void *stacktop;
-} jl_ucontext_t;
+} _jl_ucontext_t;
 #endif
 #if defined(JL_HAVE_UNW_CONTEXT)
 #define UNW_LOCAL_ONLY
 #include <libunwind.h>
-typedef unw_context_t jl_ucontext_t;
+typedef unw_context_t _jl_ucontext_t;
 #endif
 #if defined(JL_HAVE_UCONTEXT)
 #include <ucontext.h>
-typedef ucontext_t jl_ucontext_t;
+typedef ucontext_t _jl_ucontext_t;
 #endif
 #endif
 
+typedef struct {
+    union {
+        _jl_ucontext_t ctx;
+        jl_stack_context_t copy_ctx;
+    };
+#if defined(_COMPILER_TSAN_ENABLED_)
+    void *tsan_state;
+#endif
+} jl_ucontext_t;
+
+
 // handle to reference an OS thread
 #ifdef _OS_WINDOWS_
 typedef DWORD jl_thread_t;
@@ -95,9 +108,11 @@ typedef DWORD jl_thread_t;
 typedef pthread_t jl_thread_t;
 #endif
 
+struct _jl_task_t;
+
 // Recursive spin lock
 typedef struct {
-    volatile jl_thread_t owner;
+    _Atomic(struct _jl_task_t*) owner;
     uint32_t count;
 } jl_mutex_t;
 
@@ -108,13 +123,13 @@ typedef struct {
 } jl_gc_pool_t;
 
 typedef struct {
-    int64_t     allocd;
-    int64_t     freed;
-    uint64_t    malloc;
-    uint64_t    realloc;
-    uint64_t    poolalloc;
-    uint64_t    bigalloc;
-    uint64_t    freecall;
+    _Atomic(int64_t) allocd;
+    _Atomic(int64_t) freed;
+    _Atomic(uint64_t) malloc;
+    _Atomic(uint64_t) realloc;
+    _Atomic(uint64_t) poolalloc;
+    _Atomic(uint64_t) bigalloc;
+    _Atomic(uint64_t) freecall;
 } jl_thread_gc_num_t;
 
 typedef struct {
@@ -180,7 +195,6 @@ typedef struct {
     // this makes sure that a single objects can only appear once in
     // the lists (the mark bit cannot be flipped to `0` without sweeping)
     void *big_obj[1024];
-    jl_mutex_t stack_lock;
     void **pc_stack;
     void **pc_stack_end;
     jl_gc_mark_data_t *data_stack;
@@ -194,7 +208,7 @@ typedef struct _jl_tls_states_t {
     int16_t tid;
     uint64_t rngseed;
     volatile size_t *safepoint;
-    int8_t sleep_check_state; // read/write from foreign threads
+    _Atomic(int8_t) sleep_check_state; // read/write from foreign threads
     // Whether it is safe to execute GC at the same time.
 #define JL_GC_STATE_WAITING 1
     // gc_state = 1 means the thread is doing GC or is waiting for the GC to
@@ -202,7 +216,7 @@ typedef struct _jl_tls_states_t {
 #define JL_GC_STATE_SAFE 2
     // gc_state = 2 means the thread is running unmanaged code that can be
     //              execute at the same time with the GC.
-    int8_t gc_state; // read from foreign threads
+    _Atomic(int8_t) gc_state; // read from foreign threads
     // execution of certain certain impure
     // statements is prohibited from certain
     // callbacks (such as generated functions)
@@ -214,10 +228,8 @@ typedef struct _jl_tls_states_t {
     int finalizers_inhibited;
     jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
     jl_thread_gc_num_t gc_num;
-    uv_mutex_t sleep_lock;
-    uv_cond_t wake_signal;
     volatile sig_atomic_t defer_signal;
-    struct _jl_task_t *current_task;
+    _Atomic(struct _jl_task_t*) current_task;
     struct _jl_task_t *next_task;
     struct _jl_task_t *previous_task;
     struct _jl_task_t *root_task;
@@ -225,13 +237,9 @@ typedef struct _jl_tls_states_t {
     void *stackbase;
     size_t stacksize;
     union {
-        jl_ucontext_t base_ctx; // base context of stack
+        _jl_ucontext_t base_ctx; // base context of stack
         // This hack is needed to support always_copy_stacks:
-#ifdef _OS_WINDOWS_
-        jl_ucontext_t copy_stack_ctx;
-#else
-        struct jl_stack_context_t copy_stack_ctx;
-#endif
+        jl_stack_context_t copy_stack_ctx;
     };
     // Temp storage for exception thrown in signal handler. Not rooted.
     struct _jl_value_t *sig_exception;
@@ -239,7 +247,7 @@ typedef struct _jl_tls_states_t {
     struct _jl_bt_element_t *bt_data; // JL_MAX_BT_SIZE + 1 elements long
     size_t bt_size;    // Size for backtrace in transit in bt_data
     // Atomically set by the sender, reset by the handler.
-    volatile sig_atomic_t signal_request;
+    volatile _Atomic(sig_atomic_t) signal_request; // TODO: no actual reason for this to be _Atomic
     // Allow the sigint to be raised asynchronously
     // this is limited to the few places we do synchronous IO
     // we can make this more general (similar to defer_signal) if necessary
@@ -271,6 +279,11 @@ typedef struct _jl_tls_states_t {
 
 typedef jl_tls_states_t *jl_ptls_t;
 
+#ifndef LIBRARY_EXPORTS
+// deprecated (only for external consumers)
+JL_DLLEXPORT void *jl_get_ptls_states(void);
+#endif
+
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
 #  define jl_cpu_pause() _mm_delay_64(100)
@@ -293,7 +306,7 @@ typedef jl_tls_states_t *jl_ptls_t;
 JL_DLLEXPORT void (jl_cpu_pause)(void);
 JL_DLLEXPORT void (jl_cpu_wake)(void);
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 // Note that the sigint safepoint can also trigger GC, albeit less likely
 void jl_gc_safepoint_(jl_ptls_t tls);
 void jl_sigint_safepoint(jl_ptls_t tls);
@@ -328,9 +341,9 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
 STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
                                               int8_t state)
 {
-    return jl_gc_state_set(ptls, state, ptls->gc_state);
+    return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
 }
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 int8_t jl_gc_unsafe_enter(jl_ptls_t ptls); // Can be a safepoint
 int8_t jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT;
 int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT;
@@ -347,21 +360,10 @@ JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on);
 JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
-extern JL_DLLEXPORT int jl_gc_have_pending_finalizers;
+extern JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers;
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
 
-// Copied from libuv. Add `JL_CONST_FUNC` so that the compiler
-// can optimize this better.
-static inline jl_thread_t JL_CONST_FUNC jl_thread_self(void)
-{
-#ifdef _OS_WINDOWS_
-    return GetCurrentThreadId();
-#else
-    return pthread_self();
-#endif
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index ec7060bd10a5e..d67c6ba0ee28c 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -628,6 +628,21 @@ void Optimizer::checkInst(Instruction *I)
                 use_info.hasunknownmem = true;
             return true;
         }
+        if (isa<AtomicCmpXchgInst>(inst) || isa<AtomicRMWInst>(inst)) {
+            // Only store value count
+            if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
+                use_info.escaped = true;
+                return false;
+            }
+            use_info.hasload = true;
+            auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            if (cur.offset == UINT32_MAX || !use_info.addMemOp(inst, use->getOperandNo(),
+                                                               cur.offset, storev->getType(),
+                                                               true, *pass.DL))
+                use_info.hasunknownmem = true;
+            use_info.refload = true;
+            return true;
+        }
         if (isa<AddrSpaceCastInst>(inst) || isa<BitCastInst>(inst)) {
             push_inst(inst);
             return true;
@@ -1149,6 +1164,7 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
 {
     auto tag = orig_inst->getArgOperand(2);
     // `julia.typeof` is only legal on the original pointer, no need to scan recursively
+    size_t last_deleted = removed.size();
     for (auto user: orig_inst->users()) {
         if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
@@ -1161,6 +1177,8 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             }
         }
     }
+    while (last_deleted < removed.size())
+        removed[last_deleted++]->replaceUsesOfWith(orig_inst, UndefValue::get(orig_inst->getType()));
 }
 
 void Optimizer::splitOnStack(CallInst *orig_inst)
@@ -1328,6 +1346,22 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             store->eraseFromParent();
             return;
         }
+        else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            auto slot_idx = find_slot(offset);
+            auto &slot = slots[slot_idx];
+            assert(slot.offset <= offset && slot.offset + slot.size >= offset);
+            IRBuilder<> builder(user);
+            Value *newptr;
+            if (slot.isref) {
+                assert(slot.offset == offset);
+                newptr = slot.slot;
+            }
+            else {
+                Value *Val = isa<AtomicCmpXchgInst>(user) ? cast<AtomicCmpXchgInst>(user)->getNewValOperand() : cast<AtomicRMWInst>(user)->getValOperand();
+                newptr = slot_gep(slot, offset, Val->getType(), builder);
+            }
+            *use = newptr;
+        }
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             assert(callee); // makes it clear for clang analyser that `callee` is not NULL
@@ -1511,7 +1545,7 @@ Pass *createAllocOptPass()
     return new AllocOpt();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddAllocOptPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddAllocOptPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createAllocOptPass());
 }
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
new file mode 100644
index 0000000000000..47d87a0e557f8
--- /dev/null
+++ b/src/llvm-cpufeatures.cpp
@@ -0,0 +1,151 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+//
+// Lower intrinsics that expose subtarget information to the language. This makes it
+// possible to write code that changes behavior based on, e.g., the availability of
+// specific CPU features.
+//
+// The following intrinsics are supported:
+// - julia.cpu.have_fma.$typ: returns 1 if the platform supports hardware-accelerated FMA.
+//
+// Some of these intrinsics are overloaded, i.e., they are suffixed with a type name.
+// To extend support, make sure codegen (in intrinsics.cpp) knows how to emit them.
+//
+// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig
+//      instead of using the global target machine?
+
+#include "llvm-version.h"
+
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/PassManager.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Support/Debug.h>
+
+#include "julia.h"
+
+#define DEBUG_TYPE "cpufeatures"
+
+using namespace llvm;
+
+extern TargetMachine *jl_TargetMachine;
+
+// whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
+Optional<bool> always_have_fma(Function &intr) {
+    auto intr_name = intr.getName();
+    auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
+
+#if defined(_OS_WINDOWS_)
+    // FMA on Windows is weirdly broken (#43088)
+    return false;
+#elif defined(_CPU_AARCH64_)
+    return typ == "f32" || typ == "f64";
+#else
+    (void)typ;
+    return {};
+#endif
+}
+
+bool have_fma(Function &intr, Function &caller) {
+    auto unconditional = always_have_fma(intr);
+    if (unconditional.hasValue())
+        return unconditional.getValue();
+
+    auto intr_name = intr.getName();
+    auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
+
+    Attribute FSAttr = caller.getFnAttribute("target-features");
+    StringRef FS =
+        FSAttr.isValid() ? FSAttr.getValueAsString() : jl_TargetMachine->getTargetFeatureString();
+
+    SmallVector<StringRef, 6> Features;
+    FS.split(Features, ',');
+    for (StringRef Feature : Features)
+#if defined _CPU_ARM_
+      if (Feature == "+vfp4")
+        return typ == "f32" || typ == "f64";
+      else if (Feature == "+vfp4sp")
+        return typ == "f32";
+#else
+      if (Feature == "+fma" || Feature == "+fma4")
+        return typ == "f32" || typ == "f64";
+#endif
+
+    return false;
+}
+
+void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) {
+    if (have_fma(intr, caller))
+        I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
+    else
+        I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0));
+
+    return;
+}
+
+bool lowerCPUFeatures(Module &M)
+{
+    SmallVector<Instruction*,6> Materialized;
+
+    for (auto &F: M.functions()) {
+        auto FN = F.getName();
+
+        if (FN.startswith("julia.cpu.have_fma.")) {
+            for (Use &U: F.uses()) {
+                User *RU = U.getUser();
+                CallInst *I = cast<CallInst>(RU);
+                lowerHaveFMA(F, *I->getParent()->getParent(), I);
+                Materialized.push_back(I);
+            }
+        }
+    }
+
+    if (!Materialized.empty()) {
+        for (auto I: Materialized) {
+            I->eraseFromParent();
+        }
+        return true;
+    } else {
+        return false;
+    }
+}
+
+struct CPUFeatures : PassInfoMixin<CPUFeatures> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM)
+{
+    lowerCPUFeatures(M);
+    return PreservedAnalyses::all();
+}
+
+namespace {
+struct CPUFeaturesLegacy : public ModulePass {
+    static char ID;
+    CPUFeaturesLegacy() : ModulePass(ID) {};
+
+    bool runOnModule(Module &M)
+    {
+        return lowerCPUFeatures(M);
+    }
+};
+
+char CPUFeaturesLegacy::ID = 0;
+static RegisterPass<CPUFeaturesLegacy>
+        Y("CPUFeatures",
+          "Lower calls to CPU feature testing intrinsics.",
+          false,
+          false);
+}
+
+Pass *createCPUFeaturesPass()
+{
+    return new CPUFeaturesLegacy();
+}
+
+extern "C" JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
+{
+    unwrap(PM)->add(createCPUFeaturesPass());
+}
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index 43d80e208bf0b..3e328424e26d2 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -20,6 +20,7 @@
 
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/IR/Module.h>
 #include <llvm/Support/Debug.h>
 
@@ -27,15 +28,7 @@ using namespace llvm;
 
 namespace {
 
-struct DemoteFloat16Pass : public FunctionPass {
-    static char ID;
-    DemoteFloat16Pass() : FunctionPass(ID){};
-
-private:
-    bool runOnFunction(Function &F) override;
-};
-
-bool DemoteFloat16Pass::runOnFunction(Function &F)
+static bool demoteFloat16(Function &F)
 {
     auto &ctx = F.getContext();
     auto T_float16 = Type::getHalfTy(ctx);
@@ -132,20 +125,44 @@ bool DemoteFloat16Pass::runOnFunction(Function &F)
         return false;
 }
 
-char DemoteFloat16Pass::ID = 0;
-static RegisterPass<DemoteFloat16Pass>
+} // end anonymous namespace
+
+struct DemoteFloat16 : PassInfoMixin<DemoteFloat16> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+PreservedAnalyses DemoteFloat16::run(Function &F, FunctionAnalysisManager &AM)
+{
+    demoteFloat16(F);
+    return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct DemoteFloat16Legacy : public FunctionPass {
+    static char ID;
+    DemoteFloat16Legacy() : FunctionPass(ID){};
+
+private:
+    bool runOnFunction(Function &F) override {
+        return demoteFloat16(F);
+    }
+};
+
+char DemoteFloat16Legacy::ID = 0;
+static RegisterPass<DemoteFloat16Legacy>
         Y("DemoteFloat16",
           "Demote Float16 operations to Float32 equivalents.",
           false,
           false);
-}
+} // end anonymous namespac
 
 Pass *createDemoteFloat16Pass()
 {
-    return new DemoteFloat16Pass();
+    return new DemoteFloat16Legacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createDemoteFloat16Pass());
 }
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index bc68edda2cad7..c55095daab300 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -218,16 +218,17 @@ bool FinalLowerGC::doInitialization(Module &M) {
 
 bool FinalLowerGC::doFinalization(Module &M)
 {
+    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
+    queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr;
     auto used = M.getGlobalVariable("llvm.compiler.used");
     if (!used)
         return false;
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
     SmallPtrSet<Constant*, 16> InitAsSet(
         functionList,
         functionList + sizeof(functionList) / sizeof(void*));
     bool changed = false;
     SmallVector<Constant*, 16> init;
-    ConstantArray *CA = dyn_cast<ConstantArray>(used->getInitializer());
+    ConstantArray *CA = cast<ConstantArray>(used->getInitializer());
     for (auto &Op : CA->operands()) {
         Constant *C = cast_or_null<Constant>(Op);
         if (InitAsSet.count(C->stripPointerCasts())) {
@@ -332,7 +333,7 @@ Pass *createFinalLowerGCPass()
     return new FinalLowerGC();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createFinalLowerGCPass());
 }
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index 29b8c9ac4e60c..bfdca51603890 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -203,7 +203,7 @@ Pass *createGCInvariantVerifierPass(bool Strong) {
     return new GCInvariantVerifier(Strong);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass(LLVMPassManagerRef PM, LLVMBool Strong)
+extern "C" JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_impl(LLVMPassManagerRef PM, LLVMBool Strong)
 {
     unwrap(PM)->add(createGCInvariantVerifierPass(Strong));
 }
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index bf80af612c423..91bdea4fea557 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -134,7 +134,7 @@ Pass *createJuliaLICMPass()
     return new JuliaLICMPass();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraJuliaLICMPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraJuliaLICMPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createJuliaLICMPass());
 }
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index d8ad3d62d4cc1..ed5fe7c43a598 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -709,7 +709,7 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
         else
             Numbers[i] = Number;
     }
-    if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
+    if (auto VTy = dyn_cast<FixedVectorType>(SI->getType())) {
         if (NumRoots != Numbers.size()) {
             // broadcast the scalar root number to fill the vector
             assert(NumRoots == 1);
@@ -736,11 +736,12 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
     SmallVector<PHINode *, 2> lifted;
     std::vector<int> Numbers;
     unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<VectorType>(Phi->getType())) {
+    if (auto VTy = dyn_cast<FixedVectorType>(Phi->getType())) {
         NumRoots = VTy->getNumElements();
         Numbers.resize(NumRoots);
     }
     else {
+        // TODO: SVE
         assert(isa<PointerType>(Phi->getType()) && "unimplemented");
     }
     for (unsigned i = 0; i < NumRoots; ++i) {
@@ -1152,12 +1153,14 @@ static bool isConstGV(GlobalVariable *gv)
     return gv->isConstant() || gv->getMetadata("julia.constgv");
 }
 
-static bool isLoadFromConstGV(LoadInst *LI, bool &task_local);
-static bool isLoadFromConstGV(Value *v, bool &task_local)
+typedef llvm::SmallPtrSet<PHINode*, 1> PhiSet;
+
+static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen = nullptr);
+static bool isLoadFromConstGV(Value *v, bool &task_local, PhiSet *seen = nullptr)
 {
     v = v->stripInBoundsOffsets();
     if (auto LI = dyn_cast<LoadInst>(v))
-        return isLoadFromConstGV(LI, task_local);
+        return isLoadFromConstGV(LI, task_local, seen);
     if (auto gv = dyn_cast<GlobalVariable>(v))
         return isConstGV(gv);
     // null pointer
@@ -1168,12 +1171,19 @@ static bool isLoadFromConstGV(Value *v, bool &task_local)
         return (CE->getOpcode() == Instruction::IntToPtr &&
                 isa<ConstantData>(CE->getOperand(0)));
     if (auto SL = dyn_cast<SelectInst>(v))
-        return (isLoadFromConstGV(SL->getTrueValue(), task_local) &&
-                isLoadFromConstGV(SL->getFalseValue(), task_local));
+        return (isLoadFromConstGV(SL->getTrueValue(), task_local, seen) &&
+                isLoadFromConstGV(SL->getFalseValue(), task_local, seen));
     if (auto Phi = dyn_cast<PHINode>(v)) {
+        PhiSet ThisSet(&Phi, &Phi);
+        if (!seen)
+            seen = &ThisSet;
+        else if (seen->count(Phi))
+            return true;
+        else
+            seen->insert(Phi);
         auto n = Phi->getNumIncomingValues();
         for (unsigned i = 0; i < n; ++i) {
-            if (!isLoadFromConstGV(Phi->getIncomingValue(i), task_local)) {
+            if (!isLoadFromConstGV(Phi->getIncomingValue(i), task_local, seen)) {
                 return false;
             }
         }
@@ -1205,7 +1215,7 @@ static bool isLoadFromConstGV(Value *v, bool &task_local)
 //
 // The white list implemented here and above in `isLoadFromConstGV(Value*)` should
 // cover all the cases we and LLVM generates.
-static bool isLoadFromConstGV(LoadInst *LI, bool &task_local)
+static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
 {
     // We only emit single slot GV in codegen
     // but LLVM global merging can change the pointer operands to GEPs/bitcasts
@@ -1215,7 +1225,7 @@ static bool isLoadFromConstGV(LoadInst *LI, bool &task_local)
                {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
         if (gv)
             return true;
-        return isLoadFromConstGV(load_base, task_local);
+        return isLoadFromConstGV(load_base, task_local, seen);
     }
     if (gv)
         return isConstGV(gv);
@@ -1550,8 +1560,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     // Known functions emitted in codegen that are not safepoints
                     if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func ||
                         callee == gc_preserve_end_func || callee == typeof_func ||
-                        callee == pgcstack_getter || callee->getName() == "jl_egal__unboxed" ||
-                        callee->getName() == "jl_lock_value" || callee->getName() == "jl_unlock_value" ||
+                        callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
+                        callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
                         callee == write_barrier_func || callee->getName() == "memcmp") {
                         continue;
                     }
@@ -1573,7 +1583,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 for (Use &U : CI->arg_operands()) {
                     // Find all callee rooted arguments.
                     // Record them instead of simply remove them from live values here
-                    // since they can be useful during refinment
+                    // since they can be useful during refinement
                     // (e.g. to remove roots of objects that are refined to these)
                     Value *V = U;
                     if (isa<Constant>(V) || !isa<PointerType>(V->getType()) ||
@@ -2286,10 +2296,12 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
                 // `julia.gc_alloc_obj` except it doesn't set the tag.
                 auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
+                auto ptlsLoad = get_current_ptls_from_task(builder, CI->getArgOperand(0));
+                auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
                 auto newI = builder.CreateCall(
                     allocBytesIntrinsic,
                     {
-                        CI->getArgOperand(0),
+                        ptls,
                         builder.CreateIntCast(
                             CI->getArgOperand(1),
                             allocBytesIntrinsic->getFunctionType()->getParamType(1),
@@ -2690,7 +2702,7 @@ Pass *createLateLowerGCFramePass() {
     return new LateLowerGCFrame();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLateLowerGCFramePass());
 }
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index 7382f3b74c080..324c591f77be8 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -21,6 +21,7 @@
 
 #include "julia.h"
 #include "julia_assert.h"
+#include "codegen_shared.h"
 
 #define DEBUG_TYPE "lower_handlers"
 #undef DEBUG
@@ -95,11 +96,11 @@ static void ensure_enter_function(Module &M)
     auto T_pint8 = PointerType::get(T_int8, 0);
     auto T_void = Type::getVoidTy(M.getContext());
     auto T_int32 = Type::getInt32Ty(M.getContext());
-    if (!M.getNamedValue("jl_enter_handler")) {
+    if (!M.getNamedValue(XSTR(jl_enter_handler))) {
         std::vector<Type*> ehargs(0);
         ehargs.push_back(T_pint8);
         Function::Create(FunctionType::get(T_void, ehargs, false),
-                         Function::ExternalLinkage, "jl_enter_handler", &M);
+                         Function::ExternalLinkage, XSTR(jl_enter_handler), &M);
     }
     if (!M.getNamedValue(jl_setjmp_name)) {
         std::vector<Type*> args2(0);
@@ -118,8 +119,8 @@ bool LowerExcHandlers::doInitialization(Module &M) {
     if (!except_enter_func)
         return false;
     ensure_enter_function(M);
-    leave_func = M.getFunction("jl_pop_handler");
-    jlenter_func = M.getFunction("jl_enter_handler");
+    leave_func = M.getFunction(XSTR(jl_pop_handler));
+    jlenter_func = M.getFunction(XSTR(jl_enter_handler));
     setjmp_func = M.getFunction(jl_setjmp_name);
 
     auto T_pint8 = Type::getInt8PtrTy(M.getContext(), 0);
@@ -176,7 +177,7 @@ bool LowerExcHandlers::runOnFunction(Function &F) {
 
     /* Step 2: EH Frame lowering */
     // Allocate stack space for each handler. We allocate these as separate
-    // allocas so the optimizer can later merge and reaarange them if it wants
+    // allocas so the optimizer can later merge and rearrange them if it wants
     // to.
     Value *handler_sz = ConstantInt::get(Type::getInt32Ty(F.getContext()),
                                          sizeof(jl_handler_t));
@@ -245,7 +246,7 @@ Pass *createLowerExcHandlersPass()
     return new LowerExcHandlers();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLowerExcHandlersPass());
 }
diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp
index e5d63667df476..7166698db356f 100644
--- a/src/llvm-muladd.cpp
+++ b/src/llvm-muladd.cpp
@@ -9,6 +9,7 @@
 
 #include <llvm/IR/Value.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
@@ -34,15 +35,6 @@ using namespace llvm;
  * when `%v0` has no other use
  */
 
-struct CombineMulAdd : public FunctionPass {
-    static char ID;
-    CombineMulAdd() : FunctionPass(ID)
-    {}
-
-private:
-    bool runOnFunction(Function &F) override;
-};
-
 // Return true if this function shouldn't be called again on the other operand
 // This will always return false on LLVM 5.0+
 static bool checkCombine(Module *m, Instruction *addOp, Value *maybeMul, Value *addend,
@@ -60,7 +52,7 @@ static bool checkCombine(Module *m, Instruction *addOp, Value *maybeMul, Value *
     return false;
 }
 
-bool CombineMulAdd::runOnFunction(Function &F)
+static bool combineMulAdd(Function &F)
 {
     Module *m = F.getParent();
     for (auto &BB: F) {
@@ -90,17 +82,39 @@ bool CombineMulAdd::runOnFunction(Function &F)
     return true;
 }
 
-char CombineMulAdd::ID = 0;
-static RegisterPass<CombineMulAdd> X("CombineMulAdd", "Combine mul and add to muladd",
+struct CombineMulAdd : PassInfoMixin<CombineMulAdd> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+PreservedAnalyses CombineMulAdd::run(Function &F, FunctionAnalysisManager &AM)
+{
+    combineMulAdd(F);
+    return PreservedAnalyses::all();
+}
+
+
+struct CombineMulAddLegacy : public FunctionPass {
+    static char ID;
+    CombineMulAddLegacy() : FunctionPass(ID)
+    {}
+
+private:
+    bool runOnFunction(Function &F) override {
+        return combineMulAdd(F);
+    }
+};
+
+char CombineMulAddLegacy::ID = 0;
+static RegisterPass<CombineMulAddLegacy> X("CombineMulAdd", "Combine mul and add to muladd",
                                      false /* Only looks at CFG */,
                                      false /* Analysis Pass */);
 
 Pass *createCombineMulAddPass()
 {
-    return new CombineMulAdd();
+    return new CombineMulAddLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createCombineMulAddPass());
 }
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index 68081eb53d3a5..7cd50ac144c1a 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -43,11 +43,13 @@ using namespace llvm;
 extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr,
                                                   bool isConstant=false);
 
+extern Optional<bool> always_have_fma(Function&);
+
 namespace {
 
 // These are valid detail cloning conditions in the target flags.
 constexpr uint32_t clone_mask =
-    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH;
+    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU;
 
 struct MultiVersioning;
 
@@ -272,13 +274,13 @@ struct CloneCtx {
     Constant *get_ptrdiff32(Constant *ptr, Constant *base) const;
     template<typename T>
     Constant *emit_offset_table(const std::vector<T*> &vars, StringRef name) const;
+    void rewrite_alias(GlobalAlias *alias, Function* F);
 
     LLVMContext &ctx;
     Type *T_size;
     Type *T_int32;
     Type *T_void;
     PointerType *T_psize;
-    PointerType *T_pvoidfunc;
     MDNode *tbaa_const;
     MultiVersioning *pass;
     std::vector<jl_target_spec_t> specs;
@@ -295,6 +297,8 @@ struct CloneCtx {
     std::vector<std::pair<Constant*,uint32_t>> gv_relocs{};
     // Mapping from function id (i.e. 0-based index in `fvars`) to GVs to be initialized.
     std::map<uint32_t,GlobalVariable*> const_relocs;
+    // Functions that were referred to by a global alias, and might not have other uses.
+    std::set<uint32_t> alias_relocs;
     bool has_veccall{false};
     bool has_cloneall{false};
 };
@@ -342,7 +346,6 @@ CloneCtx::CloneCtx(MultiVersioning *pass, Module &M)
       T_int32(Type::getInt32Ty(ctx)),
       T_void(Type::getVoidTy(ctx)),
       T_psize(PointerType::get(T_size, 0)),
-      T_pvoidfunc(FunctionType::get(T_void, false)->getPointerTo()),
       tbaa_const(tbaa_make_child("jtbaa_const", nullptr, true).first),
       pass(pass),
       specs(jl_get_llvm_clone_targets()),
@@ -403,7 +406,12 @@ void CloneCtx::clone_function(Function *F, Function *new_f, ValueToValueMapTy &v
         vmap[&*J] = &*DestI++;
     }
     SmallVector<ReturnInst*,8> Returns;
+#if JL_LLVM_VERSION >= 130000
+    // We are cloning into the same module
+    CloneFunctionInto(new_f, F, vmap, CloneFunctionChangeType::GlobalChanges, Returns);
+#else
     CloneFunctionInto(new_f, F, vmap, true, Returns);
+#endif
 }
 
 // Clone all clone_all targets. Makes sure that the base targets are all available.
@@ -464,6 +472,16 @@ uint32_t CloneCtx::collect_func_info(Function &F)
                     if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
                         flag |= JL_TARGET_CLONE_MATH;
                     }
+                    else if (name.startswith("julia.cpu.")) {
+                        if (name.startswith("julia.cpu.have_fma.")) {
+                            // for some platforms we know they always do (or don't) support
+                            // FMA. in those cases we don't need to clone the function.
+                            if (!always_have_fma(*callee).hasValue())
+                                flag |= JL_TARGET_CLONE_CPU;
+                        } else {
+                            flag |= JL_TARGET_CLONE_CPU;
+                        }
+                    }
                 }
             }
             else if (auto store = dyn_cast<StoreInst>(&I)) {
@@ -697,6 +715,54 @@ Constant *CloneCtx::rewrite_gv_init(const Stack& stack)
     return res;
 }
 
+// replace an alias to a function with a trampoline and (uninitialized) global variable slot
+void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
+{
+    assert(!is_vector(F->getFunctionType()));
+
+    Function *trampoline =
+        Function::Create(F->getFunctionType(), alias->getLinkage(), "", &M);
+    trampoline->copyAttributesFrom(F);
+    trampoline->takeName(alias);
+    alias->eraseFromParent();
+
+    uint32_t id;
+    GlobalVariable *slot;
+    std::tie(id, slot) = get_reloc_slot(F);
+    for (auto &grp: groups) {
+        grp.relocs.insert(id);
+        for (auto &tgt: grp.clones) {
+            tgt.relocs.insert(id);
+        }
+    }
+    alias_relocs.insert(id);
+
+    auto BB = BasicBlock::Create(ctx, "top", trampoline);
+    IRBuilder<> irbuilder(BB);
+
+    auto ptr = irbuilder.CreateLoad(F->getType(), slot);
+    ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+    ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ctx, None));
+
+    std::vector<Value *> Args;
+    for (auto &arg : trampoline->args())
+        Args.push_back(&arg);
+    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, makeArrayRef(Args));
+    if (F->isVarArg())
+#if (defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_PPC64_))
+        abort();    // musttail support is very bad on ARM, PPC, PPC64 (as of LLVM 3.9)
+#else
+        call->setTailCallKind(CallInst::TCK_MustTail);
+#endif
+    else
+        call->setTailCallKind(CallInst::TCK_Tail);
+
+    if (F->getReturnType() == T_void)
+        irbuilder.CreateRetVoid();
+    else
+        irbuilder.CreateRet(call);
+}
+
 void CloneCtx::fix_gv_uses()
 {
     auto single_pass = [&] (Function *orig_f) {
@@ -707,8 +773,14 @@ void CloneCtx::fix_gv_uses()
             auto info = uses.get_info();
             // We only support absolute pointer relocation.
             assert(info.samebits);
-            // And only for non-constant global variable initializers
-            auto val = cast<GlobalVariable>(info.val);
+            GlobalVariable *val;
+            if (auto alias = dyn_cast<GlobalAlias>(info.val)) {
+                rewrite_alias(alias, orig_f);
+                continue;
+            }
+            else {
+                val = cast<GlobalVariable>(info.val);
+            }
             assert(info.use->getOperandNo() == 0);
             assert(!val->isConstant());
             auto fid = get_func_id(orig_f);
@@ -734,8 +806,8 @@ std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F)
     auto id = get_func_id(F);
     auto &slot = const_relocs[id];
     if (!slot)
-        slot = new GlobalVariable(M, T_pvoidfunc, false, GlobalVariable::InternalLinkage,
-                                  ConstantPointerNull::get(T_pvoidfunc),
+        slot = new GlobalVariable(M, F->getType(), false, GlobalVariable::InternalLinkage,
+                                  ConstantPointerNull::get(F->getType()),
                                   F->getName() + ".reloc_slot");
     return std::make_pair(id, slot);
 }
@@ -815,10 +887,9 @@ void CloneCtx::fix_inst_uses()
                     uint32_t id;
                     GlobalVariable *slot;
                     std::tie(id, slot) = get_reloc_slot(orig_f);
-                    Instruction *ptr = new LoadInst(T_pvoidfunc, slot, "", false, insert_before);
+                    Instruction *ptr = new LoadInst(orig_f->getType(), slot, "", false, insert_before);
                     ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
                     ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ctx, None));
-                    ptr = new BitCastInst(ptr, F->getType(), "", insert_before);
                     use_i->setOperand(info.use->getOperandNo(),
                                       rewrite_inst_use(uses.get_stack(), ptr,
                                                        insert_before));
@@ -843,15 +914,6 @@ template<typename T>
 inline T *CloneCtx::add_comdat(T *G) const
 {
 #if defined(_OS_WINDOWS_)
-    // Add comdat information to make MSVC link.exe happy
-    // it's valid to emit this for ld.exe too,
-    // but makes it very slow to link for no benefit
-#if defined(_COMPILER_MICROSOFT_)
-    Comdat *jl_Comdat = G->getParent()->getOrInsertComdat(G->getName());
-    // ELF only supports Comdat::Any
-    jl_Comdat->setSelectionKind(Comdat::NoDuplicates);
-    G->setComdat(jl_Comdat);
-#endif
     // add __declspec(dllexport) to everything marked for export
     if (G->getLinkage() == GlobalValue::ExternalLinkage)
         G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
@@ -955,6 +1017,9 @@ void CloneCtx::emit_metadata()
                 values.push_back(id_v);
                 values.push_back(get_ptrdiff32(it->second, gbase));
             }
+            if (alias_relocs.find(id) != alias_relocs.end()) {
+                shared_relocs.insert(id);
+            }
         }
         values[0] = ConstantInt::get(T_int32, values.size() / 2);
         ArrayType *vars_type = ArrayType::get(T_int32, values.size());
@@ -1082,7 +1147,7 @@ Pass *createMultiVersioningPass()
     return new MultiVersioning();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createMultiVersioningPass());
 }
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index 0eed7aec98f0b..89263033ce565 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -15,6 +15,7 @@
 #include "codegen_shared.h"
 #include "julia_assert.h"
 #include "llvm-pass-helpers.h"
+#include "jl_internal_funcs.inc"
 
 using namespace llvm;
 
@@ -209,9 +210,9 @@ namespace jl_intrinsics {
 }
 
 namespace jl_well_known {
-    static const char *GC_BIG_ALLOC_NAME = "jl_gc_big_alloc";
-    static const char *GC_POOL_ALLOC_NAME = "jl_gc_pool_alloc";
-    static const char *GC_QUEUE_ROOT_NAME = "jl_gc_queue_root";
+    static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
+    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
+    static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
 
     using jl_intrinsics::addGCAllocAttributes;
 
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index a6afcda870911..e41c85afbf31e 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -306,7 +306,7 @@ Pass *createPropagateJuliaAddrspaces() {
     return new PropagateJuliaAddrspaces();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createPropagateJuliaAddrspaces());
 }
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index 9cecceac9a187..c971774f23e6c 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -160,15 +160,6 @@ template<typename T>
 inline T *LowerPTLS::add_comdat(T *G) const
 {
 #if defined(_OS_WINDOWS_)
-    // Add comdat information to make MSVC link.exe happy
-    // it's valid to emit this for ld.exe too,
-    // but makes it very slow to link for no benefit
-#if defined(_COMPILER_MICROSOFT_)
-    Comdat *jl_Comdat = G->getParent()->getOrInsertComdat(G->getName());
-    // ELF only supports Comdat::Any
-    jl_Comdat->setSelectionKind(Comdat::NoDuplicates);
-    G->setComdat(jl_Comdat);
-#endif
     // add __declspec(dllexport) to everything marked for export
     if (G->getLinkage() == GlobalValue::ExternalLinkage)
         G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
@@ -316,7 +307,7 @@ Pass *createLowerPTLSPass(bool imaging_mode)
     return new LowerPTLS(imaging_mode);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass(LLVMPassManagerRef PM, LLVMBool imaging_mode)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_impl(LLVMPassManagerRef PM, LLVMBool imaging_mode)
 {
     unwrap(PM)->add(createLowerPTLSPass(imaging_mode));
 }
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index ada10c8d5f1f9..6908783288940 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -325,7 +325,7 @@ bool RemoveAddrspacesPass::runOnModule(Module &M)
 
         Function *NF = Function::Create(
                 NFTy, F->getLinkage(), F->getAddressSpace(), Name, &M);
-        NF->copyAttributesFrom(F);
+        // no need to copy attributes here, that's done by CloneFunctionInto
         VMap[F] = NF;
     }
 
@@ -345,7 +345,11 @@ bool RemoveAddrspacesPass::runOnModule(Module &M)
         for (auto MD : MDs)
             NGV->addMetadata(
                     MD.first,
+#if JL_LLVM_VERSION >= 130000
+                    *MapMetadata(MD.second, VMap));
+#else
                     *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
+#endif
 
         copyComdat(NGV, GV);
 
@@ -372,13 +376,33 @@ bool RemoveAddrspacesPass::runOnModule(Module &M)
                 NF,
                 F,
                 VMap,
+#if JL_LLVM_VERSION >= 130000
+                CloneFunctionChangeType::GlobalChanges,
+#else
                 /*ModuleLevelChanges=*/true,
+#endif
                 Returns,
                 "",
                 nullptr,
                 &TypeRemapper,
                 &Materializer);
 
+        // CloneFunctionInto unconditionally copies the attributes from F to NF,
+        // without considering e.g. the byval attribute type.
+        AttributeList Attrs = F->getAttributes();
+        LLVMContext &C = F->getContext();
+        for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+            for (Attribute::AttrKind TypedAttr :
+                 {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
+                if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
+                    Attrs = Attrs.replaceAttributeType(C, i, TypedAttr,
+                                                       TypeRemapper.remapType(Ty));
+                    break;
+                }
+            }
+        }
+        NF->setAttributes(Attrs);
+
         if (F->hasPersonalityFn())
             NF->setPersonalityFn(MapValue(F->getPersonalityFn(), VMap));
 
@@ -469,7 +493,7 @@ Pass *createRemoveJuliaAddrspacesPass()
     return new RemoveJuliaAddrspacesPass();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createRemoveJuliaAddrspacesPass());
 }
diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp
index e9e1dd23149cc..40b0ecd735b13 100644
--- a/src/llvm-remove-ni.cpp
+++ b/src/llvm-remove-ni.cpp
@@ -3,6 +3,7 @@
 #include "llvm-version.h"
 
 #include <llvm/IR/Module.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/Support/Debug.h>
 
@@ -14,31 +15,48 @@ using namespace llvm;
 
 namespace {
 
-struct RemoveNIPass : public ModulePass {
+static bool removeNI(Module &M)
+{
+    auto dlstr = M.getDataLayoutStr();
+    auto nistart = dlstr.find("-ni:");
+    if (nistart == std::string::npos)
+        return false;
+    auto len = dlstr.size();
+    auto niend = nistart + 1;
+    for (; niend < len; niend++) {
+        if (dlstr[niend] == '-') {
+            break;
+        }
+    }
+    dlstr.erase(nistart, niend - nistart);
+    M.setDataLayout(dlstr);
+    return true;
+}
+}
+
+struct RemoveNI : PassInfoMixin<RemoveNI> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+PreservedAnalyses RemoveNI::run(Module &M, ModuleAnalysisManager &AM)
+{
+    removeNI(M);
+    return PreservedAnalyses::all();
+}
+
+namespace {
+struct RemoveNILegacy : public ModulePass {
     static char ID;
-    RemoveNIPass() : ModulePass(ID) {};
+    RemoveNILegacy() : ModulePass(ID) {};
 
     bool runOnModule(Module &M)
     {
-        auto dlstr = M.getDataLayoutStr();
-        auto nistart = dlstr.find("-ni:");
-        if (nistart == std::string::npos)
-            return false;
-        auto len = dlstr.size();
-        auto niend = nistart + 1;
-        for (; niend < len; niend++) {
-            if (dlstr[niend] == '-') {
-                break;
-            }
-        }
-        dlstr.erase(nistart, niend - nistart);
-        M.setDataLayout(dlstr);
-        return true;
+        return removeNI(M);
     }
 };
 
-char RemoveNIPass::ID = 0;
-static RegisterPass<RemoveNIPass>
+char RemoveNILegacy::ID = 0;
+static RegisterPass<RemoveNILegacy>
         Y("RemoveNI",
           "Remove non-integral address space.",
           false,
@@ -47,10 +65,10 @@ static RegisterPass<RemoveNIPass>
 
 Pass *createRemoveNIPass()
 {
-    return new RemoveNIPass();
+    return new RemoveNILegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveNIPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createRemoveNIPass());
 }
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index afcfa60082ad8..8d80a535b2319 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -30,35 +30,7 @@
 
 namespace llvm {
 
-
-/// This pass should run after reduction variables have been converted to phi nodes,
-/// otherwise floating-point reductions might not be recognized as such and
-/// prevent SIMDization.
-struct LowerSIMDLoop : public ModulePass {
-    static char ID;
-    LowerSIMDLoop() : ModulePass(ID)
-    {
-    }
-
-    protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        ModulePass::getAnalysisUsage(AU);
-        AU.addRequired<LoopInfoWrapperPass>();
-        AU.addPreserved<LoopInfoWrapperPass>();
-        AU.setPreservesCFG();
-    }
-
-    private:
-    bool runOnModule(Module &M) override;
-
-    bool markLoopInfo(Module &M, Function *marker);
-
-    /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
-    /// mark the ops as permitting reassociation/commuting.
-    /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
-    void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const;
-};
+namespace {
 
 static unsigned getReduceOpcode(Instruction *J, Instruction *operand)
 {
@@ -80,7 +52,10 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand)
     }
 }
 
-void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const
+/// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
+/// mark the ops as permitting reassociation/commuting.
+/// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
+static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
 {
     typedef SmallVector<Instruction*, 8> chainVector;
     chainVector chain;
@@ -130,18 +105,7 @@ void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const
     }
 }
 
-bool LowerSIMDLoop::runOnModule(Module &M)
-{
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    bool Changed = false;
-    if (loopinfo_marker)
-        Changed |= markLoopInfo(M, loopinfo_marker);
-
-    return Changed;
-}
-
-bool LowerSIMDLoop::markLoopInfo(Module &M, Function *marker)
+static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Function &)> GetLI)
 {
     bool Changed = false;
     std::vector<Instruction*> ToDelete;
@@ -149,7 +113,7 @@ bool LowerSIMDLoop::markLoopInfo(Module &M, Function *marker)
         Instruction *I = cast<Instruction>(U);
         ToDelete.push_back(I);
 
-        LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(*I->getParent()->getParent()).getLoopInfo();
+        LoopInfo &LI = GetLI(*I->getParent()->getParent());
         Loop *L = LI.getLoopFor(I->getParent());
         I->removeFromParent();
         if (!L)
@@ -243,18 +207,84 @@ bool LowerSIMDLoop::markLoopInfo(Module &M, Function *marker)
     return Changed;
 }
 
-char LowerSIMDLoop::ID = 0;
+} // end anonymous namespace
+
+
+/// This pass should run after reduction variables have been converted to phi nodes,
+/// otherwise floating-point reductions might not be recognized as such and
+/// prevent SIMDization.
+struct LowerSIMDLoop : PassInfoMixin<LowerSIMDLoop> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+
+PreservedAnalyses LowerSIMDLoop::run(Module &M, ModuleAnalysisManager &AM)
+{
+    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
+
+    if (!loopinfo_marker)
+        return PreservedAnalyses::all();
+
+    FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+    auto GetLI = [&FAM](Function &F) -> LoopInfo & {
+        return FAM.getResult<LoopAnalysis>(F);
+    };
+
+    markLoopInfo(M, loopinfo_marker, GetLI);
+
+    return PreservedAnalyses::all();
+}
+
+namespace {
+class LowerSIMDLoopLegacy : public ModulePass {
+    //LowerSIMDLoop Impl;
+
+public:
+  static char ID;
+
+  LowerSIMDLoopLegacy() : ModulePass(ID) {
+  }
+
+  bool runOnModule(Module &M) override {
+    bool Changed = false;
+
+    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
+
+    auto GetLI = [this](Function &F) -> LoopInfo & {
+        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
+    };
+
+    if (loopinfo_marker)
+        Changed |= markLoopInfo(M, loopinfo_marker, GetLI);
+
+    return Changed;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override
+  {
+      ModulePass::getAnalysisUsage(AU);
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      AU.setPreservesCFG();
+  }
+};
+
+} // end anonymous namespace
+
+char LowerSIMDLoopLegacy::ID = 0;
 
-static RegisterPass<LowerSIMDLoop> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
+static RegisterPass<LowerSIMDLoopLegacy> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
                                      false /* Only looks at CFG */,
                                      false /* Analysis Pass */);
 
 JL_DLLEXPORT Pass *createLowerSimdLoopPass()
 {
-    return new LowerSIMDLoop();
+    return new LowerSIMDLoopLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLowerSimdLoopPass());
 }
diff --git a/src/llvm-version.h b/src/llvm-version.h
index f59f7826c334d..94576cd42895e 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -13,15 +13,9 @@
     #error Only LLVM versions >= 11.0.0 are supported by Julia
 #endif
 
-#ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
-#define LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING 0
-#endif
-
-#ifndef LLVM_ENABLE_STATS
-#define LLVM_ENABLE_STATS 0
-#endif
-
+#ifdef __cplusplus
 #if defined(__GNUC__) && (__GNUC__ >= 9)
 // Added in GCC 9, this warning is annoying
 #pragma GCC diagnostic ignored "-Winit-list-lifetime"
 #endif
+#endif
diff --git a/src/llvmcalltest.cpp b/src/llvmcalltest.cpp
index fee0fed72c0d9..1ce8e9fe55bef 100644
--- a/src/llvmcalltest.cpp
+++ b/src/llvmcalltest.cpp
@@ -13,9 +13,20 @@
 
 using namespace llvm;
 
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+#  define DLLEXPORT __declspec(dllexport)
+#else
+# if defined(_OS_LINUX_)
+#  define DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#endif
+
 extern "C" {
 
-JL_DLLEXPORT const char *MakeIdentityFunction(jl_value_t* jl_AnyTy) {
+DLLEXPORT const char *MakeIdentityFunction(jl_value_t* jl_AnyTy) {
     LLVMContext Ctx;
     PointerType *AnyTy = PointerType::get(StructType::get(Ctx), 0);
     // FIXME: get AnyTy via jl_type_to_llvm(Ctx, jl_AnyTy)
@@ -40,7 +51,7 @@ JL_DLLEXPORT const char *MakeIdentityFunction(jl_value_t* jl_AnyTy) {
     return strdup(buf.c_str());
 }
 
-JL_DLLEXPORT const char *MakeLoadGlobalFunction() {
+DLLEXPORT const char *MakeLoadGlobalFunction() {
     LLVMContext Ctx;
 
     auto M = new Module("shadow", Ctx);
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index 5e55c7bbb29c1..34414d24bde75 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -271,7 +271,9 @@
   (define (other x) (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
   (case (car e)
     ((where) `(where ,(recur (cadr e)) ,@(map other (cddr e))))
-    ((|::|)  `(|::| ,(recur (cadr e)) ,(other (caddr e))))
+    ((|::|)  (if (length= e 2)
+                 `(|::| ,(other (cadr e)))
+                 `(|::| ,(recur (cadr e)) ,(other (caddr e)))))
     ((call)  `(call ,(other (cadr e))
                     ,@(map (lambda (x)
                              (resolve-expansion-vars-with-new-env x env m parent-scope #t))
@@ -352,7 +354,7 @@
                                    ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))))
                              (else
                               `(global ,(resolve-expansion-vars-with-new-env arg env m parent-scope inarg))))))
-           ((using import export meta line inbounds boundscheck loopinfo) (map unescape e))
+           ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e))
            ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted.
            ((symboliclabel) e)
            ((symbolicgoto) e)
@@ -397,13 +399,18 @@
              ((not (length> e 2)) e)
              ((and (pair? (cadr e))
                    (eq? (caadr e) '|::|))
-              `(kw (|::|
-                    ,(if inarg
-                         (resolve-expansion-vars- (cadr (cadr e)) env m parent-scope inarg)
-                         ;; in keyword arg A=B, don't transform "A"
-                         (unescape (cadr (cadr e))))
-                    ,(resolve-expansion-vars- (caddr (cadr e)) env m parent-scope inarg))
-                   ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+              (let* ((type-decl (cadr e)) ;; [argname]::type
+                     (argname   (and (length> type-decl 2) (cadr type-decl)))
+                     (type      (if argname (caddr type-decl) (cadr type-decl))))
+                `(kw (|::|
+                      ,@(if argname
+                            (list (if inarg
+                                      (resolve-expansion-vars- argname env m parent-scope inarg)
+                                      ;; in keyword arg A=B, don't transform "A"
+                                      (unescape argname)))
+                            '())
+                      ,(resolve-expansion-vars- type env m parent-scope inarg))
+                     ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))))
              (else
               `(kw ,(if inarg
                         (resolve-expansion-vars- (cadr e) env m parent-scope inarg)
diff --git a/src/method.c b/src/method.c
index 48b074e800904..d8cd1c30a94e1 100644
--- a/src/method.c
+++ b/src/method.c
@@ -73,23 +73,24 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
     }
     else if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        if (e->head == global_sym && binding_effects) {
+        if (e->head == jl_global_sym && binding_effects) {
             // execute the side-effects of "global x" decl immediately:
             // creates uninitialized mutable binding in module for each global
             jl_toplevel_eval_flex(module, expr, 0, 1);
             expr = jl_nothing;
         }
-        if (jl_is_toplevel_only_expr(expr) || e->head == const_sym ||
-            e->head == coverageeffect_sym || e->head == copyast_sym ||
-            e->head == quote_sym || e->head == inert_sym ||
-            e->head == meta_sym || e->head == inbounds_sym ||
-            e->head == boundscheck_sym || e->head == loopinfo_sym ||
-            e->head == aliasscope_sym || e->head == popaliasscope_sym) {
+        if (jl_is_toplevel_only_expr(expr) || e->head == jl_const_sym ||
+            e->head == jl_coverageeffect_sym || e->head == jl_copyast_sym ||
+            e->head == jl_quote_sym || e->head == jl_inert_sym ||
+            e->head == jl_meta_sym || e->head == jl_inbounds_sym ||
+            e->head == jl_boundscheck_sym || e->head == jl_loopinfo_sym ||
+            e->head == jl_aliasscope_sym || e->head == jl_popaliasscope_sym ||
+            e->head == jl_inline_sym || e->head == jl_noinline_sym) {
             // ignore these
         }
         else {
             size_t i = 0, nargs = jl_array_len(e->args);
-            if (e->head == opaque_closure_method_sym) {
+            if (e->head == jl_opaque_closure_method_sym) {
                 if (nargs != 4) {
                     jl_error("opaque_closure_method: invalid syntax");
                 }
@@ -102,7 +103,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 }
                 return (jl_value_t*)jl_make_opaque_closure_method(module, name, nargs, functionloc, (jl_code_info_t*)ci);
             }
-            if (e->head == cfunction_sym) {
+            if (e->head == jl_cfunction_sym) {
                 JL_NARGS(cfunction method definition, 5, 5); // (type, func, rt, at, cc)
                 jl_value_t *typ = jl_exprarg(e, 0);
                 if (!jl_is_type(typ))
@@ -144,7 +145,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 JL_TYPECHK(cfunction method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
                 return expr;
             }
-            if (e->head == foreigncall_sym) {
+            if (e->head == jl_foreigncall_sym) {
                 JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, cc, narg)
                 jl_value_t *rt = jl_exprarg(e, 1);
                 jl_value_t *at = jl_exprarg(e, 2);
@@ -179,14 +180,14 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 jl_exprargset(e, 0, resolve_globals(jl_exprarg(e, 0), module, sparam_vals, binding_effects, 1));
                 i++;
             }
-            if (e->head == method_sym || e->head == module_sym) {
+            if (e->head == jl_method_sym || e->head == jl_module_sym) {
                 i++;
             }
             for (; i < nargs; i++) {
                 // TODO: this should be making a copy, not mutating the source
                 jl_exprargset(e, i, resolve_globals(jl_exprarg(e, i), module, sparam_vals, binding_effects, eager_resolve));
             }
-            if (e->head == call_sym && jl_expr_nargs(e) == 3 &&
+            if (e->head == jl_call_sym && jl_expr_nargs(e) == 3 &&
                     jl_is_globalref(jl_exprarg(e, 0)) &&
                     jl_is_globalref(jl_exprarg(e, 1)) &&
                     jl_is_quotenode(jl_exprarg(e, 2))) {
@@ -204,13 +205,15 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (fe_mod->istopmod && !strcmp(jl_symbol_name(fe_sym), "getproperty") && jl_is_symbol(s)) {
                     if (eager_resolve || jl_binding_resolved_p(me_mod, me_sym)) {
                         jl_binding_t *b = jl_get_binding(me_mod, me_sym);
-                        if (b && b->constp && b->value && jl_is_module(b->value)) {
-                            return jl_module_globalref((jl_module_t*)b->value, (jl_sym_t*)s);
+                        if (b && b->constp) {
+                            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+                            if (v && jl_is_module(v))
+                                return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
                         }
                     }
                 }
             }
-            if (e->head == call_sym && nargs > 0 &&
+            if (e->head == jl_call_sym && nargs > 0 &&
                     jl_is_globalref(jl_exprarg(e, 0))) {
                 // TODO: this hack should be deleted once llvmcall is fixed
                 jl_value_t *fe = jl_exprarg(e, 0);
@@ -219,7 +222,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (jl_binding_resolved_p(fe_mod, fe_sym)) {
                     // look at some known called functions
                     jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
-                    if (b && b->constp && b->value == jl_builtin_tuple) {
+                    if (b && b->constp && jl_atomic_load_relaxed(&b->value) == jl_builtin_tuple) {
                         size_t j;
                         for (j = 1; j < nargs; j++) {
                             if (!jl_is_quotenode(jl_exprarg(e, j)))
@@ -243,7 +246,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
     return expr;
 }
 
-void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
                               int binding_effects)
 {
     size_t i, l = jl_array_len(stmts);
@@ -253,6 +256,11 @@ void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *spar
     }
 }
 
+jl_value_t *expr_arg1(jl_value_t *expr) {
+    jl_array_t *args = ((jl_expr_t*)expr)->args;
+    return jl_array_ptr_ref(args, 0);
+}
+
 // copy a :lambda Expr into its CodeInfo representation,
 // including popping of known meta nodes
 static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
@@ -274,21 +282,32 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     jl_gc_wb(li, li->code);
     size_t n = jl_array_len(body);
     jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code);
+    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n);
+    jl_gc_wb(li, li->ssaflags);
+    int inbounds_depth = 0; // number of stacked inbounds
+    // isempty(inline_flags): no user annotation
+    // last(inline_flags) == 1: inline region
+    // last(inline_flags) == 0: noinline region
+    arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
     for (j = 0; j < n; j++) {
         jl_value_t *st = bd[j];
-        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == meta_sym) {
+        int is_flag_stmt = 0;
+        // check :meta expression
+        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_meta_sym) {
             size_t k, ins = 0, na = jl_expr_nargs(st);
             jl_array_t *meta = ((jl_expr_t*)st)->args;
             for (k = 0; k < na; k++) {
                 jl_value_t *ma = jl_array_ptr_ref(meta, k);
-                if (ma == (jl_value_t*)pure_sym)
+                if (ma == (jl_value_t*)jl_pure_sym)
                     li->pure = 1;
-                else if (ma == (jl_value_t*)inline_sym)
+                else if (ma == (jl_value_t*)jl_inline_sym)
                     li->inlineable = 1;
-                else if (ma == (jl_value_t*)propagate_inbounds_sym)
+                else if (ma == (jl_value_t*)jl_propagate_inbounds_sym)
                     li->propagate_inbounds = 1;
-                else if (ma == (jl_value_t*)aggressive_constprop_sym)
-                    li->aggressive_constprop = 1;
+                else if (ma == (jl_value_t*)jl_aggressive_constprop_sym)
+                    li->constprop = 1;
+                else if (ma == (jl_value_t*)jl_no_constprop_sym)
+                    li->constprop = 2;
                 else
                     jl_array_ptr_set(meta, ins++, ma);
             }
@@ -297,10 +316,60 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             else
                 jl_array_del_end(meta, na - ins);
         }
-        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == return_sym) {
+        // check other flag expressions
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_inbounds_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true)       // push
+                inbounds_depth += 1;
+            else if (arg1 == (jl_value_t*)jl_false) // clear
+                inbounds_depth = 0;
+            else if (inbounds_depth > 0)            // pop
+                inbounds_depth -= 1;
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_inline_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true) // enter inline region
+                arraylist_push(inline_flags, (void*)1);
+            else {                            // exit inline region
+                assert(arg1 == (jl_value_t*)jl_false);
+                arraylist_pop(inline_flags);
+            }
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_noinline_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true) // enter noinline region
+                arraylist_push(inline_flags, (void*)0);
+            else {                             // exit noinline region
+                assert(arg1 == (jl_value_t*)jl_false);
+                arraylist_pop(inline_flags);
+            }
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym) {
             jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
         }
+
+        if (is_flag_stmt)
+            jl_array_uint8_set(li->ssaflags, j, 0);
+        else {
+            uint8_t flag = 0;
+            if (inbounds_depth > 0)
+                flag |= 1 << 0;
+            if (inline_flags->len > 0) {
+                void* inline_flag = inline_flags->items[inline_flags->len - 1];
+                flag |= 1 << (inline_flag ? 1 : 2);
+            }
+            jl_array_uint8_set(li->ssaflags, j, flag);
+        }
     }
+    assert(inline_flags->len == 0); // malformed otherwise
+    arraylist_free(inline_flags);
+    free(inline_flags);
     jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1);
     jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0);
     size_t nslots = jl_array_len(vis);
@@ -313,7 +382,6 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     jl_gc_wb(li, li->slotflags);
     li->ssavaluetypes = jl_box_long(nssavalue);
     jl_gc_wb(li, li->ssavaluetypes);
-    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, 0);
 
     // Flags that need to be copied to slotflags
     const uint8_t vinfo_mask = 8 | 16 | 32 | 64;
@@ -323,14 +391,14 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
         jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(vi, 0);
         assert(jl_is_symbol(name));
         char *str = jl_symbol_name(name);
-        if (i > 0 && name != unused_sym) {
+        if (i > 0 && name != jl_unused_sym) {
             if (str[0] == '#') {
                 // convention for renamed variables: #...#original_name
                 char *nxt = strchr(str + 1, '#');
                 if (nxt)
                     name = jl_symbol(nxt+1);
                 else if (str[1] == 's')  // compiler-generated temporaries, #sXXX
-                    name = empty_sym;
+                    name = jl_empty_sym;
             }
         }
         jl_array_ptr_set(li->slotnames, i, name);
@@ -350,7 +418,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
     li->uninferred = NULL;
     li->backedges = NULL;
     li->callbacks = NULL;
-    li->cache = NULL;
+    jl_atomic_store_relaxed(&li->cache, NULL);
     li->inInference = 0;
     return li;
 }
@@ -379,7 +447,7 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     src->propagate_inbounds = 0;
     src->pure = 0;
     src->edges = jl_nothing;
-    src->aggressive_constprop = 0;
+    src->constprop = 0;
     return src;
 }
 
@@ -491,7 +559,7 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
             func = jl_expand_and_resolve(ex, def->module, linfo->sparam_vals);
 
             if (!jl_is_code_info(func)) {
-                if (jl_is_expr(func) && ((jl_expr_t*)func)->head == error_sym) {
+                if (jl_is_expr(func) && ((jl_expr_t*)func)->head == jl_error_sym) {
                     ct->ptls->in_pure_callback = 0;
                     jl_toplevel_eval(def->module, (jl_value_t*)func);
                 }
@@ -503,7 +571,7 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
         // correctness of method identity
         for (int i = 0; i < jl_array_len(func->code); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(func->code, i);
-            if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == new_opaque_closure_sym) {
+            if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_new_opaque_closure_sym) {
                 linfo->uninferred = jl_copy_ast((jl_value_t*)func);
                 jl_gc_wb(linfo, linfo->uninferred);
                 break;
@@ -552,7 +620,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     int gen_only = 0;
     for (j = 1; j < m->nargs && j <= sizeof(m->nospecialize) * 8; j++) {
         jl_value_t *ai = jl_array_ptr_ref(src->slotnames, j);
-        if (ai == (jl_value_t*)unused_sym) {
+        if (ai == (jl_value_t*)jl_unused_sym) {
             // TODO: enable this. currently it triggers a bug on arguments like
             // ::Type{>:Missing}
             //int sn = j-1;
@@ -566,7 +634,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     }
     m->called = called;
     m->pure = src->pure;
-    m->aggressive_constprop = src->aggressive_constprop;
+    m->constprop = src->constprop;
     jl_add_function_name_to_lineinfo(src, (jl_value_t*)m->name);
 
     jl_array_t *copy = NULL;
@@ -578,9 +646,9 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     copy = jl_alloc_vec_any(n);
     for (i = 0; i < n; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
-        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == meta_sym) {
+        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_meta_sym) {
             size_t nargs = jl_expr_nargs(st);
-            if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)nospecialize_sym) {
+            if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_nospecialize_sym) {
                 if (nargs == 1) // bare `@nospecialize` is special: it prevents specialization on all args
                     m->nospecialize = -1;
                 size_t j;
@@ -604,12 +672,12 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                 }
                 st = jl_nothing;
             }
-            else if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)specialize_sym) {
+            else if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_specialize_sym) {
                 if (nargs == 1) // bare `@specialize` is special: it causes specialization on all args
                     m->nospecialize = 0;
                 st = jl_nothing;
             }
-            else if (nargs == 2 && jl_exprarg(st, 0) == (jl_value_t*)generated_sym) {
+            else if (nargs == 2 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_sym) {
                 m->generator = NULL;
                 jl_value_t *gexpr = jl_exprarg(st, 1);
                 if (jl_expr_nargs(gexpr) == 7) {
@@ -626,7 +694,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                 }
                 st = jl_nothing;
             }
-            else if (nargs == 1 && jl_exprarg(st, 0) == (jl_value_t*)generated_only_sym) {
+            else if (nargs == 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_only_sym) {
                 gen_only = 1;
                 st = jl_nothing;
             }
@@ -658,8 +726,8 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     jl_task_t *ct = jl_current_task;
     jl_method_t *m =
         (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type);
-    m->specializations = jl_emptysvec;
-    m->speckeyset = (jl_array_t*)jl_an_empty_vec_any;
+    jl_atomic_store_relaxed(&m->specializations, jl_emptysvec);
+    jl_atomic_store_relaxed(&m->speckeyset, (jl_array_t*)jl_an_empty_vec_any);
     m->sig = NULL;
     m->slot_syms = NULL;
     m->roots = NULL;
@@ -667,22 +735,22 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->module = module;
     m->external_mt = NULL;
     m->source = NULL;
-    m->unspecialized = NULL;
+    jl_atomic_store_relaxed(&m->unspecialized, NULL);
     m->generator = NULL;
     m->name = NULL;
-    m->file = empty_sym;
+    m->file = jl_empty_sym;
     m->line = 0;
     m->called = 0xff;
     m->nospecialize = module->nospecialize;
     m->nkw = 0;
-    m->invokes = NULL;
+    jl_atomic_store_relaxed(&m->invokes, NULL);
     m->recursion_relation = NULL;
     m->isva = 0;
     m->nargs = 0;
     m->primary_world = 1;
     m->deleted_world = ~(size_t)0;
     m->is_for_opaque_closure = 0;
-    m->aggressive_constprop = 0;
+    m->constprop = 0;
     JL_MUTEX_INIT(&m->writelock);
     return m;
 }
@@ -708,7 +776,7 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
     m->nargs = jl_unbox_long(nargs) + 1;
     assert(jl_is_linenode(functionloc));
     jl_value_t *file = jl_linenode_file(functionloc);
-    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : empty_sym;
+    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
     m->line = jl_linenode_line(functionloc);
     jl_method_set_source(m, ci);
     JL_GC_POP();
@@ -718,24 +786,25 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
 // empty generic function def
 JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  jl_module_t *module,
-                                                 jl_value_t **bp, jl_value_t *bp_owner,
+                                                 _Atomic(jl_value_t*) *bp,
+                                                 jl_value_t *bp_owner,
                                                  jl_binding_t *bnd)
 {
     jl_value_t *gf = NULL;
 
     assert(name && bp);
-    if (bnd && bnd->value != NULL && !bnd->constp)
+    if (bnd && jl_atomic_load_relaxed(&bnd->value) != NULL && !bnd->constp)
         jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(bnd->name));
-    if (*bp != NULL) {
-        gf = *bp;
+    gf = jl_atomic_load_relaxed(bp);
+    if (gf != NULL) {
         if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
             jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
     }
     if (bnd)
         bnd->constp = 1;
-    if (*bp == NULL) {
+    if (gf == NULL) {
         gf = (jl_value_t*)jl_new_generic_function(name, module);
-        *bp = gf;
+        jl_atomic_store(bp, gf); // TODO: fix constp assignment data race
         if (bp_owner) jl_gc_wb(bp_owner, gf);
     }
     return gf;
@@ -831,7 +900,7 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 
     // TODO: derive our debug name from the syntax instead of the type
     name = mt->name;
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt) {
+    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || external_mt) {
         // our value for `name` is bad, try to guess what the syntax might have had,
         // like `jl_static_show_func_sig` might have come up with
         jl_datatype_t *dt = jl_first_argument_datatype(argtype);
@@ -861,7 +930,7 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     m->nargs = nargs;
     assert(jl_is_linenode(functionloc));
     jl_value_t *file = jl_linenode_file(functionloc);
-    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : empty_sym;
+    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
     m->line = jl_linenode_line(functionloc);
     jl_method_set_source(m, f);
 
@@ -877,7 +946,7 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
         jl_value_t *elt = jl_svecref(atypes, i);
         if (!jl_is_type(elt) && !jl_is_typevar(elt) && !jl_is_vararg(elt)) {
             jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(f->slotnames, i);
-            if (argname == unused_sym)
+            if (argname == jl_unused_sym)
                 jl_exceptionf(jl_argumenterror_type,
                               "invalid type for argument number %d in method definition for %s at %s:%d",
                               i,
diff --git a/src/module.c b/src/module.c
index 4120b6cb9225d..bb7d08e8bd5b4 100644
--- a/src/module.c
+++ b/src/module.c
@@ -11,7 +11,7 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t using_core)
+JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t default_names)
 {
     jl_task_t *ct = jl_current_task;
     const jl_uuid_t uuid_zero = {0, 0};
@@ -36,11 +36,13 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t using_core)
     htable_new(&m->bindings, 0);
     arraylist_new(&m->usings, 0);
     JL_GC_PUSH1(&m);
-    if (jl_core_module && using_core) {
+    if (jl_core_module && default_names) {
         jl_module_using(m, jl_core_module);
     }
     // export own name, so "using Foo" makes "Foo" itself visible
-    jl_set_const(m, name, (jl_value_t*)m);
+    if (default_names) {
+        jl_set_const(m, name, (jl_value_t*)m);
+    }
     jl_module_export(m, name);
     JL_GC_POP();
     return m;
@@ -56,10 +58,10 @@ uint32_t jl_module_next_counter(jl_module_t *m)
     return jl_atomic_fetch_add(&m->counter, 1);
 }
 
-JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports, uint8_t using_core)
+JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports, uint8_t default_names)
 {
     // TODO: should we prohibit this during incremental compilation?
-    jl_module_t *m = jl_new_module_(name, using_core);
+    jl_module_t *m = jl_new_module_(name, default_names);
     JL_GC_PUSH1(&m);
     m->parent = jl_main_module; // TODO: this is a lie
     jl_gc_wb(m, m->parent);
@@ -186,7 +188,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT,
 // Hash tables don't generically root their contents, but they do for bindings.
 // Express this to the analyzer.
 // NOTE: Must hold m->lock while calling these.
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 jl_binding_t *_jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT;
 jl_binding_t **_jl_get_module_binding_bp(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT;
 #else
@@ -262,7 +264,7 @@ static jl_binding_t *jl_get_binding_(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t
 
 static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
 // The analyzer doesn't like looking through the arraylist, so just model the
 // access for it using this function
 static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
@@ -380,12 +382,17 @@ JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
         JL_UNLOCK(&m->lock);
         return jl_new_struct(jl_globalref_type, m, var);
     }
-    if (b->globalref == NULL) {
-        b->globalref = jl_new_struct(jl_globalref_type, m, var);
-        jl_gc_wb(m, b->globalref);
+    jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref);
+    if (globalref == NULL) {
+        jl_value_t *newref = jl_new_struct(jl_globalref_type, m, var);
+        if (jl_atomic_cmpswap_relaxed(&b->globalref, &globalref, newref)) {
+            JL_GC_PROMISE_ROOTED(newref);
+            globalref = newref;
+            jl_gc_wb(m, globalref);
+        }
     }
-    JL_UNLOCK(&m->lock);
-    return b->globalref;
+    JL_UNLOCK(&m->lock); // may GC
+    return globalref;
 }
 
 static int eq_bindings(jl_binding_t *a, jl_binding_t *b)
@@ -640,7 +647,8 @@ JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var
     jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
     if (bp->value == NULL) {
         uint8_t constp = 0;
-        if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
+        // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
+        if (constp = bp->constp, bp->constp = 1, constp == 0) {
             jl_value_t *old = NULL;
             if (jl_atomic_cmpswap(&bp->value, &old, val)) {
                 jl_gc_wb_binding(bp, val);
@@ -774,7 +782,7 @@ JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs) JL_NOT
         if (jl_egal(rhs, old))
             return;
         if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs)) {
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
             jl_errorf("invalid redefinition of constant %s",
                       jl_symbol_name(b->name));
 #endif
@@ -825,9 +833,10 @@ JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
                  (imported && b->imported) ||
                  (b->owner == m && !b->imported && (all || m == jl_main_module))) &&
                 (all || (!b->deprecated && !hidden))) {
+                jl_sym_t *in_module_name = (jl_sym_t*)table[i-1]; // the name in the module may not be b->name, use the httable key instead
                 jl_array_grow_end(a, 1);
                 //XXX: change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
-                jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)b->name);
+                jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)in_module_name);
             }
         }
     }
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index 4a23c604b079d..8dcedd5b1529d 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -1,3 +1,5 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
 #include "julia.h"
 #include "julia_internal.h"
 
@@ -39,7 +41,7 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *isv
     oc->invoke = (jl_fptr_args_t)jl_invoke_opaque_closure;
     oc->specptr = NULL;
     oc->captures = captures;
-    oc->world = jl_world_counter;
+    oc->world = jl_atomic_load_acquire(&jl_world_counter);
     return oc;
 }
 
diff --git a/src/options.h b/src/options.h
index bb56e0c41c7c0..36f34654b2bd0 100644
--- a/src/options.h
+++ b/src/options.h
@@ -1,5 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include "platform.h"
+
 #ifndef JL_OPTIONS_H
 #define JL_OPTIONS_H
 
@@ -158,23 +160,19 @@
 
 // sanitizer defaults ---------------------------------------------------------
 
-#ifndef JULIA_H
-#error "Must be included after julia.h"
-#endif
-
 // Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers
-#if defined(JL_ASAN_ENABLED) || defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define MEMDEBUG
 #define KEEP_BODIES
 #endif
 
 // TSAN doesn't like COPY_STACKS
-#if defined(JL_TSAN_ENABLED) && defined(COPY_STACKS)
+#if defined(_COMPILER_TSAN_ENABLED_) && defined(COPY_STACKS)
 #undef COPY_STACKS
 #endif
 
 // Memory sanitizer needs TLS, which llvm only supports for the small memory model
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
 // todo: fix the llvm MemoryManager to work with small memory model
 #endif
 
diff --git a/src/partr.c b/src/partr.c
index c3de56b80cc92..048a841158153 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -17,6 +17,9 @@ extern "C" {
 
 // thread sleep state
 
+// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD
+uint64_t sleep_threshold;
+
 // thread should not be sleeping--it might need to do work.
 static const int16_t not_sleeping = 0;
 
@@ -36,6 +39,8 @@ uint64_t io_wakeup_enter;
 uint64_t io_wakeup_leave;
 );
 
+uv_mutex_t *sleep_locks;
+uv_cond_t *wake_signals;
 
 JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT
 {
@@ -57,10 +62,10 @@ extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
 
 /* a task heap */
 typedef struct taskheap_tag {
-    jl_mutex_t lock;
+    uv_mutex_t lock;
     jl_task_t **tasks;
-    int32_t ntasks;
-    int16_t prio;
+    _Atomic(int32_t) ntasks;
+    _Atomic(int16_t) prio;
 } taskheap_t;
 
 /* multiqueue parameters */
@@ -83,10 +88,10 @@ static inline void multiq_init(void)
     heap_p = heap_c * jl_n_threads;
     heaps = (taskheap_t *)calloc(heap_p, sizeof(taskheap_t));
     for (int32_t i = 0; i < heap_p; ++i) {
-        jl_mutex_init(&heaps[i].lock);
+        uv_mutex_init(&heaps[i].lock);
         heaps[i].tasks = (jl_task_t **)calloc(tasks_per_heap, sizeof(jl_task_t*));
-        heaps[i].ntasks = 0;
-        heaps[i].prio = INT16_MAX;
+        jl_atomic_store_relaxed(&heaps[i].ntasks, 0);
+        jl_atomic_store_relaxed(&heaps[i].prio, INT16_MAX);
     }
     unbias_cong(heap_p, &cong_unbias);
 }
@@ -108,12 +113,12 @@ static inline void sift_up(taskheap_t *heap, int32_t idx)
 
 static inline void sift_down(taskheap_t *heap, int32_t idx)
 {
-    if (idx < heap->ntasks) {
+    if (idx < jl_atomic_load_relaxed(&heap->ntasks)) {
         for (int32_t child = heap_d*idx + 1;
                 child < tasks_per_heap && child <= heap_d*idx + heap_d;
                 ++child) {
             if (heap->tasks[child]
-                    &&  heap->tasks[child]->prio < heap->tasks[idx]->prio) {
+                    && heap->tasks[child]->prio < heap->tasks[idx]->prio) {
                 jl_task_t *t = heap->tasks[idx];
                 heap->tasks[idx] = heap->tasks[child];
                 heap->tasks[child] = t;
@@ -132,20 +137,22 @@ static inline int multiq_insert(jl_task_t *task, int16_t priority)
     task->prio = priority;
     do {
         rn = cong(heap_p, cong_unbias, &ptls->rngseed);
-    } while (!jl_mutex_trylock_nogc(&heaps[rn].lock));
+    } while (uv_mutex_trylock(&heaps[rn].lock) != 0);
 
-    if (heaps[rn].ntasks >= tasks_per_heap) {
-        jl_mutex_unlock_nogc(&heaps[rn].lock);
+    if (jl_atomic_load_relaxed(&heaps[rn].ntasks) >= tasks_per_heap) {
+        uv_mutex_unlock(&heaps[rn].lock);
         // multiq insertion failed, increase #tasks per heap
         return -1;
     }
 
-    heaps[rn].tasks[heaps[rn].ntasks++] = task;
-    sift_up(&heaps[rn], heaps[rn].ntasks-1);
-    int16_t prio = jl_atomic_load(&heaps[rn].prio);
+    int32_t ntasks = jl_atomic_load_relaxed(&heaps[rn].ntasks);
+    jl_atomic_store_relaxed(&heaps[rn].ntasks, ntasks + 1);
+    heaps[rn].tasks[ntasks] = task;
+    sift_up(&heaps[rn], ntasks);
+    int16_t prio = jl_atomic_load_relaxed(&heaps[rn].prio);
     if (task->prio < prio)
-        jl_atomic_store(&heaps[rn].prio, task->prio);
-    jl_mutex_unlock_nogc(&heaps[rn].lock);
+        jl_atomic_store_relaxed(&heaps[rn].prio, task->prio);
+    uv_mutex_unlock(&heaps[rn].lock);
 
     return 0;
 }
@@ -163,18 +170,18 @@ static inline jl_task_t *multiq_deletemin(void)
     for (i = 0; i < heap_p; ++i) {
         rn1 = cong(heap_p, cong_unbias, &ptls->rngseed);
         rn2 = cong(heap_p, cong_unbias, &ptls->rngseed);
-        prio1 = jl_atomic_load(&heaps[rn1].prio);
-        prio2 = jl_atomic_load(&heaps[rn2].prio);
+        prio1 = jl_atomic_load_relaxed(&heaps[rn1].prio);
+        prio2 = jl_atomic_load_relaxed(&heaps[rn2].prio);
         if (prio1 > prio2) {
             prio1 = prio2;
             rn1 = rn2;
         }
         else if (prio1 == prio2 && prio1 == INT16_MAX)
             continue;
-        if (jl_mutex_trylock_nogc(&heaps[rn1].lock)) {
-            if (prio1 == heaps[rn1].prio)
+        if (uv_mutex_trylock(&heaps[rn1].lock) == 0) {
+            if (prio1 == jl_atomic_load_relaxed(&heaps[rn1].prio))
                 break;
-            jl_mutex_unlock_nogc(&heaps[rn1].lock);
+            uv_mutex_unlock(&heaps[rn1].lock);
         }
     }
     if (i == heap_p)
@@ -182,18 +189,20 @@ static inline jl_task_t *multiq_deletemin(void)
 
     task = heaps[rn1].tasks[0];
     if (!jl_set_task_tid(task, ptls->tid)) {
-        jl_mutex_unlock_nogc(&heaps[rn1].lock);
+        uv_mutex_unlock(&heaps[rn1].lock);
         goto retry;
     }
-    heaps[rn1].tasks[0] = heaps[rn1].tasks[--heaps[rn1].ntasks];
-    heaps[rn1].tasks[heaps[rn1].ntasks] = NULL;
+    int32_t ntasks = jl_atomic_load_relaxed(&heaps[rn1].ntasks) - 1;
+    jl_atomic_store_relaxed(&heaps[rn1].ntasks, ntasks);
+    heaps[rn1].tasks[0] = heaps[rn1].tasks[ntasks];
+    heaps[rn1].tasks[ntasks] = NULL;
     prio1 = INT16_MAX;
-    if (heaps[rn1].ntasks > 0) {
+    if (ntasks > 0) {
         sift_down(&heaps[rn1], 0);
         prio1 = heaps[rn1].tasks[0]->prio;
     }
-    jl_atomic_store(&heaps[rn1].prio, prio1);
-    jl_mutex_unlock_nogc(&heaps[rn1].lock);
+    jl_atomic_store_relaxed(&heaps[rn1].prio, prio1);
+    uv_mutex_unlock(&heaps[rn1].lock);
 
     return task;
 }
@@ -203,7 +212,7 @@ void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp
 {
     int32_t i, j;
     for (i = 0; i < heap_p; ++i)
-        for (j = 0; j < heaps[i].ntasks; ++j)
+        for (j = 0; j < jl_atomic_load_relaxed(&heaps[i].ntasks); ++j)
             jl_gc_mark_queue_obj_explicit(gc_cache, sp, (jl_value_t *)heaps[i].tasks[j]);
 }
 
@@ -212,7 +221,7 @@ static int multiq_check_empty(void)
 {
     int32_t i;
     for (i = 0; i < heap_p; ++i) {
-        if (heaps[i].ntasks != 0)
+        if (jl_atomic_load_relaxed(&heaps[i].ntasks) != 0)
             return 0;
     }
     return 1;
@@ -224,15 +233,31 @@ static int multiq_check_empty(void)
 // ---
 
 // initialize the threading infrastructure
+// (used only by the main thread)
 void jl_init_threadinginfra(void)
 {
     /* initialize the synchronization trees pool and the multiqueue */
     multiq_init();
 
+    sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD;
+    char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME);
+    if (cp) {
+        if (!strncasecmp(cp, "infinite", 8))
+            sleep_threshold = UINT64_MAX;
+        else
+            sleep_threshold = (uint64_t)strtol(cp, NULL, 10);
+    }
+
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_install_thread_signal_handler(ptls);
-    uv_mutex_init(&ptls->sleep_lock);
-    uv_cond_init(&ptls->wake_signal);
+
+    int16_t tid;
+    sleep_locks = (uv_mutex_t*)calloc(jl_n_threads, sizeof(uv_mutex_t));
+    wake_signals = (uv_cond_t*)calloc(jl_n_threads, sizeof(uv_cond_t));
+    for (tid = 0; tid < jl_n_threads; tid++) {
+        uv_mutex_init(&sleep_locks[tid]);
+        uv_cond_init(&wake_signals[tid]);
+    }
 }
 
 
@@ -247,13 +272,11 @@ void jl_threadfun(void *arg)
     jl_ptls_t ptls = jl_init_threadtls(targ->tid);
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(0, &stack_lo, &stack_hi);
-    jl_init_root_task(ptls, stack_lo, stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
     jl_install_thread_signal_handler(ptls);
 
-    // set up sleep mechanism for this thread
-    uv_mutex_init(&ptls->sleep_lock);
-    uv_cond_init(&ptls->wake_signal);
-
     // wait for all threads
     jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0);
     uv_barrier_wait(targ->barrier);
@@ -262,7 +285,7 @@ void jl_threadfun(void *arg)
     free(targ);
 
     (void)jl_gc_unsafe_enter(ptls);
-    jl_finish_task(jl_current_task); // noreturn
+    jl_finish_task(ct); // noreturn
 }
 
 
@@ -280,22 +303,20 @@ int jl_running_under_rr(int recheck)
 #ifdef _OS_LINUX_
 #define RR_CALL_BASE 1000
 #define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
-    static int checked_running_under_rr = 0;
-    static int is_running_under_rr = 0;
-    if (!checked_running_under_rr || recheck) {
+    static _Atomic(int) is_running_under_rr = 0;
+    int rr = jl_atomic_load_relaxed(&is_running_under_rr);
+    if (rr == 0 || recheck) {
         int ret = syscall(SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0);
-        if (ret == -1) {
+        if (ret == -1)
             // Should always be ENOSYS, but who knows what people do for
             // unknown syscalls with their seccomp filters, so just say
             // that we don't have rr.
-            is_running_under_rr = 0;
-        }
-        else {
-            is_running_under_rr = 1;
-        }
-        checked_running_under_rr = 1;
+            rr = 2;
+        else
+            rr = 1;
+        jl_atomic_store_relaxed(&is_running_under_rr, rr);
     }
-    return is_running_under_rr;
+    return rr == 1;
 #else
     return 0;
 #endif
@@ -319,7 +340,7 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
         return 0;
     }
     uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles);
-    if (elapsed_cycles >= DEFAULT_THREAD_SLEEP_THRESHOLD) {
+    if (elapsed_cycles >= sleep_threshold) {
         *start_cycles = 0;
         return 1;
     }
@@ -330,12 +351,12 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
 static void wake_thread(int16_t tid)
 {
     jl_ptls_t other = jl_all_tls_states[tid];
-    uint8_t state = sleeping;
+    int8_t state = sleeping;
     jl_atomic_cmpswap(&other->sleep_check_state, &state, not_sleeping);
     if (state == sleeping) {
-        uv_mutex_lock(&other->sleep_lock);
-        uv_cond_signal(&other->wake_signal);
-        uv_mutex_unlock(&other->sleep_lock);
+        uv_mutex_lock(&sleep_locks[tid]);
+        uv_cond_signal(&wake_signals[tid]);
+        uv_mutex_unlock(&sleep_locks[tid]);
     }
 }
 
@@ -350,24 +371,24 @@ static void wake_libuv(void)
 /* ensure thread tid is awake if necessary */
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
 {
-    jl_ptls_t ptls = jl_current_task->ptls;
-    jl_thread_t uvlock = jl_atomic_load(&jl_uv_mutex.owner);
-    int16_t self = ptls->tid;
-    jl_thread_t system_self = jl_all_tls_states[self]->system_id;
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    jl_task_t *uvlock = jl_atomic_load(&jl_uv_mutex.owner);
+    int16_t self = jl_atomic_load_relaxed(&ct->tid);
     JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
     if (tid == self || tid == -1) {
         // we're already awake, but make sure we'll exit uv_run
         if (jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping)
             jl_atomic_store(&ptls->sleep_check_state, not_sleeping);
-        if (uvlock == system_self)
+        if (uvlock == ct)
             uv_stop(jl_global_event_loop());
     }
     else {
         // something added to the sticky-queue: notify that thread
         wake_thread(tid);
         // check if we need to notify uv_run too
-        jl_thread_t system_tid = jl_all_tls_states[tid]->system_id;
-        if (uvlock != system_self && jl_atomic_load(&jl_uv_mutex.owner) == system_tid)
+        jl_task_t *system_tid = jl_atomic_load_relaxed(&jl_all_tls_states[tid]->current_task);
+        if (uvlock != ct && jl_atomic_load(&jl_uv_mutex.owner) == system_tid)
             wake_libuv();
     }
     // check if the other threads might be sleeping
@@ -380,7 +401,7 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
                 wake_thread(tid);
         }
         // check if we need to notify uv_run too
-        if (uvlock != system_self && jl_atomic_load(&jl_uv_mutex.owner) != 0)
+        if (uvlock != ct && jl_atomic_load(&jl_uv_mutex.owner) != NULL)
             wake_libuv();
     }
     JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
@@ -394,14 +415,14 @@ static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
     jl_value_t *args[2] = { trypoptask, q };
     jl_task_t *task = (jl_task_t*)jl_apply(args, 2);
     if (jl_typeis(task, jl_task_type)) {
-        int self = jl_current_task->tid;
+        int self = jl_atomic_load_relaxed(&jl_current_task->tid);
         jl_set_task_tid(task, self);
         return task;
     }
     return multiq_deletemin();
 }
 
-static int may_sleep(jl_ptls_t ptls)
+static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     // sleep_check_state is only transitioned from not_sleeping to sleeping
     // by the thread itself. As a result, if this returns false, it will
@@ -409,7 +430,7 @@ static int may_sleep(jl_ptls_t ptls)
     return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
 }
 
-extern volatile unsigned _threadedregion;
+extern _Atomic(unsigned) _threadedregion;
 
 JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
 {
@@ -430,7 +451,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
 
         jl_cpu_pause();
         jl_ptls_t ptls = ct->ptls;
-        if (sleep_check_after_threshold(&start_cycles) || (!_threadedregion && ptls->tid == 0)) {
+        if (sleep_check_after_threshold(&start_cycles) || (!jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0)) {
             jl_atomic_store(&ptls->sleep_check_state, sleeping); // acquire sleep-check lock
             if (!multiq_check_empty()) {
                 if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
@@ -439,7 +460,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
             }
             task = get_next_task(trypoptask, q); // WARNING: this should not yield
             if (ptls != ct->ptls)
-                continue;
+                continue; // oops, get_next_task did yield--start over
             if (task) {
                 if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
                     jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
@@ -452,7 +473,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
             // outside of threaded regions, all IO is permitted,
             // but only on thread 1
             int uvlock = 0;
-            if (_threadedregion) {
+            if (jl_atomic_load_relaxed(&_threadedregion)) {
                 uvlock = jl_mutex_trylock(&jl_uv_mutex);
             }
             else if (ptls->tid == 0) {
@@ -479,7 +500,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
                     JL_UV_UNLOCK();
                     // optimization: check again first if we may have work to do
                     if (!may_sleep(ptls)) {
-                        assert(ptls->sleep_check_state == not_sleeping);
+                        assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
                         start_cycles = 0;
                         continue;
                     }
@@ -494,7 +515,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
                         continue;
                     }
                 }
-                if (!_threadedregion && active && ptls->tid == 0) {
+                if (!jl_atomic_load_relaxed(&_threadedregion) && active && ptls->tid == 0) {
                     // thread 0 is the only thread permitted to run the event loop
                     // so it needs to stay alive
                     if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
@@ -507,13 +528,13 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
             // the other threads will just wait for on signal to resume
             JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
             int8_t gc_state = jl_gc_safe_enter(ptls);
-            uv_mutex_lock(&ptls->sleep_lock);
+            uv_mutex_lock(&sleep_locks[ptls->tid]);
             while (may_sleep(ptls)) {
-                uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
+                uv_cond_wait(&wake_signals[ptls->tid], &sleep_locks[ptls->tid]);
                 // TODO: help with gc work here, if applicable
             }
-            assert(ptls->sleep_check_state == not_sleeping);
-            uv_mutex_unlock(&ptls->sleep_lock);
+            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+            uv_mutex_unlock(&sleep_locks[ptls->tid]);
             JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
             jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
             start_cycles = 0;
diff --git a/src/precompile.c b/src/precompile.c
index 9f6fa1a79e8a6..df0a3aa897587 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -268,7 +268,7 @@ static void _compile_all_deq(jl_array_t *found)
         src = m->source;
         assert(src);
         // TODO: we could now enable storing inferred function pointers in the `unspecialized` cache
-        //src = jl_type_infer(mi, jl_world_counter, 1);
+        //src = jl_type_infer(mi, jl_atomic_load_acquire(&jl_world_counter), 1);
         //if (ucache->invoke != NULL)
         //    continue;
 
@@ -344,8 +344,8 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur
 static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *closure)
 {
     jl_method_t *m = def->func.method;
-    if (m->name == jl_symbol("__init__") && jl_is_dispatch_tupletype(m->sig)) {
-        // ensure `__init__()` gets strongly-hinted, specialized, and compiled
+    if ((m->name == jl_symbol("__init__") || m->ccallable) && jl_is_dispatch_tupletype(m->sig)) {
+        // ensure `__init__()` and @ccallables get strongly-hinted, specialized, and compiled
         jl_method_instance_t *mi = jl_specializations_get_linfo(m, m->sig, jl_emptysvec);
         jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
     }
@@ -387,7 +387,7 @@ static void *jl_precompile(int all)
             size_t min_world = 0;
             size_t max_world = ~(size_t)0;
             if (!jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->def.method))
-                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_world_counter, &min_world, &max_world, 0);
+                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0);
             if (mi)
                 jl_array_ptr_1d_push(m2, (jl_value_t*)mi);
         }
@@ -398,7 +398,7 @@ static void *jl_precompile(int all)
         }
     }
     m = NULL;
-    void *native_code = jl_create_native(m2, jl_default_cgparams, 0);
+    void *native_code = jl_create_native(m2, NULL, 0);
     JL_GC_POP();
     return native_code;
 }
diff --git a/src/processor.cpp b/src/processor.cpp
index c5e42368412e8..b9dfc2b7f0b4e 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -73,7 +73,7 @@
 //
 //     Optimize only for size. Clang's `-Oz`.
 
-bool jl_processor_print_help = false;
+JL_DLLEXPORT bool jl_processor_print_help = false;
 
 namespace {
 
diff --git a/src/processor.h b/src/processor.h
index a1509180ba24a..dbd51cc64f240 100644
--- a/src/processor.h
+++ b/src/processor.h
@@ -107,6 +107,8 @@ enum {
     JL_TARGET_OPTSIZE = 1 << 6,
     // Only optimize for size for this target
     JL_TARGET_MINSIZE = 1 << 7,
+    // Clone when the function queries CPU features
+    JL_TARGET_CLONE_CPU = 1 << 8,
 };
 
 #define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) JL_FEATURE_DEF(name, bit, llvmver)
@@ -177,7 +179,7 @@ JL_DLLEXPORT int32_t jl_get_default_nans(void);
 #include <string>
 #include <vector>
 
-extern bool jl_processor_print_help;
+extern JL_DLLEXPORT bool jl_processor_print_help;
 
 /**
  * Returns the CPU name and feature string to be used by LLVM JIT.
@@ -185,14 +187,14 @@ extern bool jl_processor_print_help;
  * If the detected/specified CPU name is not available on the LLVM version specified,
  * a fallback CPU name will be used. Unsupported features will be ignored.
  */
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags);
+extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags);
 
 /**
  * Returns the CPU name and feature string to be used by LLVM disassembler.
  *
  * This will return a generic CPU name and a full feature string.
  */
-const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void);
+extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void);
 
 struct jl_target_spec_t {
     // LLVM target name
@@ -209,8 +211,7 @@ struct jl_target_spec_t {
 /**
  * Return the list of targets to clone
  */
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void);
+extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void);
 std::string jl_get_cpu_name_llvm(void);
 std::string jl_get_cpu_features_llvm(void);
-std::string jl_format_filename(llvm::StringRef output_pattern);
 #endif
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index a411314e34e9d..0804f77a3a889 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -1562,6 +1562,8 @@ static void ensure_jit_target(bool imaging)
         auto &t = jit_targets[i];
         if (t.en.flags & JL_TARGET_CLONE_ALL)
             continue;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
         // The most useful one in general...
         t.en.flags |= JL_TARGET_CLONE_LOOP;
 #ifdef _CPU_ARM_
diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp
index 564562f971f4a..1f314eb460f0f 100644
--- a/src/processor_fallback.cpp
+++ b/src/processor_fallback.cpp
@@ -117,7 +117,7 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     return res;
 }
 
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp
index eab2c77ad91dc..f18c7069fa2c2 100644
--- a/src/processor_x86.cpp
+++ b/src/processor_x86.cpp
@@ -6,9 +6,6 @@
 
 extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
 {
-#if defined _MSC_VER
-    __cpuid(CPUInfo, InfoType);
-#else
     asm volatile (
 #if defined(__i386__) && defined(__PIC__)
         "xchg %%ebx, %%esi;"
@@ -24,14 +21,10 @@ extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
         "=d" (CPUInfo[3]) :
         "a" (InfoType)
         );
-#endif
 }
 
 extern "C" JL_DLLEXPORT void jl_cpuidex(int32_t CPUInfo[4], int32_t InfoType, int32_t subInfoType)
 {
-#if defined _MSC_VER
-    __cpuidex(CPUInfo, InfoType, subInfoType);
-#else
     asm volatile (
 #if defined(__i386__) && defined(__PIC__)
         "xchg %%ebx, %%esi;"
@@ -48,7 +41,6 @@ extern "C" JL_DLLEXPORT void jl_cpuidex(int32_t CPUInfo[4], int32_t InfoType, in
         "a" (InfoType),
         "c" (subInfoType)
         );
-#endif
 }
 
 namespace X86 {
@@ -298,13 +290,9 @@ const int SIG_AMD = 0x68747541; // Auth
 
 static uint64_t get_xcr0(void)
 {
-#if defined _MSC_VER
-    return _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#else
     uint32_t eax, edx;
     asm volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
     return (uint64_t(edx) << 32) | eax;
-#endif
 }
 
 static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t brand_id,
@@ -889,6 +877,8 @@ static void ensure_jit_target(bool imaging)
         auto &t = jit_targets[i];
         if (t.en.flags & JL_TARGET_CLONE_ALL)
             continue;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
         // The most useful one in general...
         t.en.flags |= JL_TARGET_CLONE_LOOP;
         auto &features0 = jit_targets[t.base].en.features;
@@ -1011,21 +1001,21 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl)
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
     return get_llvm_target_vec(jit_targets[0]);
 }
 
-const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
+extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
 {
     static const auto res = get_llvm_target_str(TargetData<feature_sz>{"generic", "",
             {feature_masks, 0}, {{}, 0}, 0});
     return res;
 }
 
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
diff --git a/src/rtutils.c b/src/rtutils.c
index 67d17c39c67ec..b4432d8af3d0c 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -214,6 +214,18 @@ JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t)
         jl_type_error("typeassert", t, x);
 }
 
+#ifndef HAVE_SSP
+JL_DLLEXPORT uintptr_t __stack_chk_guard = (uintptr_t)0xBAD57ACCBAD67ACC; // 0xBADSTACKBADSTACK
+
+JL_DLLEXPORT void __stack_chk_fail(void)
+{
+    /* put your panic function or similar in here */
+    fprintf(stderr, "fatal error: stack corruption detected\n");
+    jl_gc_debug_critical_error();
+    abort(); // end with abort, since the compiler destroyed the stack upon entry to this function, there's no going back now
+}
+#endif
+
 // exceptions -----------------------------------------------------------------
 
 JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
@@ -222,7 +234,7 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
     // Must have no safepoint
     eh->prev = ct->eh;
     eh->gcstack = ct->gcstack;
-    eh->gc_state = ct->ptls->gc_state;
+    eh->gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
     eh->locks_len = ct->ptls->locks.len;
     eh->defer_signal = ct->ptls->defer_signal;
     eh->world_age = ct->world_age;
@@ -250,7 +262,7 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
     // This function should **NOT** have any safepoint before the ones at the
     // end.
     sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
-    int8_t old_gc_state = ct->ptls->gc_state;
+    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
     ct->eh = eh->prev;
     ct->gcstack = eh->gcstack;
     small_arraylist_t *locks = &ct->ptls->locks;
@@ -271,7 +283,8 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
     if (old_defer_signal && !eh->defer_signal) {
         jl_sigint_safepoint(ct->ptls);
     }
-    if (jl_gc_have_pending_finalizers && unlocks && eh->locks_len == 0) {
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers) &&
+            unlocks && eh->locks_len == 0) {
         jl_gc_run_pending_finalizers(ct);
     }
 }
@@ -972,7 +985,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_expr_type) {
         jl_expr_t *e = (jl_expr_t*)v;
-        if (e->head == assign_sym && jl_array_len(e->args) == 2) {
+        if (e->head == jl_assign_sym && jl_array_len(e->args) == 2) {
             n += jl_static_show_x(out, jl_exprarg(e,0), depth);
             n += jl_printf(out, " = ");
             n += jl_static_show_x(out, jl_exprarg(e,1), depth);
@@ -1001,12 +1014,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_printf(out, ")}[");
         size_t j, tlen = jl_array_len(v);
         jl_array_t *av = (jl_array_t*)v;
-        jl_datatype_t *el_type = (jl_datatype_t*)jl_tparam0(vt);
+        jl_value_t *el_type = jl_tparam0(vt);
+        char *typetagdata = (!av->flags.ptrarray && jl_is_uniontype(el_type)) ? jl_array_typetagdata(av) : NULL;
         int nlsep = 0;
         if (av->flags.ptrarray) {
             // print arrays with newlines, unless the elements are probably small
             for (j = 0; j < tlen; j++) {
-                jl_value_t *p = jl_array_ptr_ref(av, j);
+                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
+                jl_value_t *p = *ptr;
                 if (p != NULL && (uintptr_t)p >= 4096U) {
                     jl_value_t *p_ty = jl_typeof(p);
                     if ((uintptr_t)p_ty >= 4096U) {
@@ -1022,11 +1037,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_printf(out, "\n  ");
         for (j = 0; j < tlen; j++) {
             if (av->flags.ptrarray) {
-                n += jl_static_show_x(out, jl_array_ptr_ref(v, j), depth);
+                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
+                n += jl_static_show_x(out, *ptr, depth);
             }
             else {
                 char *ptr = ((char*)av->data) + j * av->elsize;
-                n += jl_static_show_x_(out, (jl_value_t*)ptr, el_type, depth);
+                n += jl_static_show_x_(out, (jl_value_t*)ptr,
+                        typetagdata ? (jl_datatype_t*)jl_nth_union_component(el_type, typetagdata[j]) : (jl_datatype_t*)el_type,
+                        depth);
             }
             if (j != tlen - 1)
                 n += jl_printf(out, nlsep ? ",\n  " : ", ");
@@ -1355,27 +1373,6 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
     JL_GC_POP();
 }
 
-#if 0
-void jl_depwarn(const char *msg, jl_value_t *sym)
-{
-    static jl_value_t *depwarn_func = NULL;
-    if (!depwarn_func && jl_base_module) {
-        depwarn_func = jl_get_global(jl_base_module, jl_symbol("depwarn"));
-    }
-    if (!depwarn_func) {
-        jl_safe_printf("WARNING: %s\n", msg);
-        return;
-    }
-    jl_value_t **depwarn_args;
-    JL_GC_PUSHARGS(depwarn_args, 3);
-    depwarn_args[0] = depwarn_func;
-    depwarn_args[1] = jl_cstr_to_string(msg);
-    depwarn_args[2] = sym;
-    jl_apply(depwarn_args, 3);
-    JL_GC_POP();
-}
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index ba265eb67be76..02523abe73479 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -36,7 +36,7 @@ void *jl_get_library_(const char *f_lib, int throw_err)
         return jl_exe_handle;
     if (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME)
         return jl_libjulia_internal_handle;
-    if (strcmp(f_lib, JL_LIBJULIA_DL_LIBNAME) == 0)
+    if (f_lib == JL_LIBJULIA_DL_LIBNAME)
         return jl_libjulia_handle;
 #endif
     JL_LOCK(&libmap_lock);
@@ -54,7 +54,7 @@ void *jl_get_library_(const char *f_lib, int throw_err)
 }
 
 extern "C" JL_DLLEXPORT
-void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd)
+void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd)
 {
     void *handle = jl_atomic_load_acquire(hnd);
     if (!handle)
@@ -208,7 +208,7 @@ extern "C" JL_DLLEXPORT char *jl_format_filename(const char *output_pattern)
 }
 
 
-static jl_mutex_t trampoline_lock; // for accesses to the cache and freelist
+static uv_mutex_t trampoline_lock; // for accesses to the cache and freelist
 
 static void *trampoline_freelist;
 
@@ -261,14 +261,14 @@ static void trampoline_deleter(void **f)
     f[0] = NULL;
     f[2] = NULL;
     f[3] = NULL;
-    JL_LOCK_NOGC(&trampoline_lock);
+    uv_mutex_lock(&trampoline_lock);
     if (tramp)
         trampoline_free(tramp);
     if (fobj && cache)
         ptrhash_remove((htable_t*)cache, fobj);
     if (nval)
         free(nval);
-    JL_UNLOCK_NOGC(&trampoline_lock);
+    uv_mutex_unlock(&trampoline_lock);
 }
 
 typedef void *(*init_trampoline_t)(void *tramp, void **nval) JL_NOTSAFEPOINT;
@@ -288,7 +288,7 @@ jl_value_t *jl_get_cfunction_trampoline(
     jl_value_t **vals)
 {
     // lookup (fobj, vals) in cache
-    JL_LOCK_NOGC(&trampoline_lock);
+    uv_mutex_lock(&trampoline_lock);
     if (!cache->table)
         htable_new(cache, 1);
     if (fill != jl_emptysvec) {
@@ -300,7 +300,7 @@ jl_value_t *jl_get_cfunction_trampoline(
         }
     }
     void *tramp = ptrhash_get(cache, (void*)fobj);
-    JL_UNLOCK_NOGC(&trampoline_lock);
+    uv_mutex_unlock(&trampoline_lock);
     if (tramp != HT_NOTFOUND) {
         assert((jl_datatype_t*)jl_typeof(tramp) == result_type);
         return (jl_value_t*)tramp;
@@ -350,12 +350,18 @@ jl_value_t *jl_get_cfunction_trampoline(
         free(nval);
         jl_rethrow();
     }
-    JL_LOCK_NOGC(&trampoline_lock);
+    uv_mutex_lock(&trampoline_lock);
     tramp = trampoline_alloc();
     ((void**)result)[0] = tramp;
     tramp = init_trampoline(tramp, nval);
     ptrhash_put(cache, (void*)fobj, result);
-    JL_UNLOCK_NOGC(&trampoline_lock);
+    uv_mutex_unlock(&trampoline_lock);
     return result;
 }
 JL_GCC_IGNORE_STOP
+
+void jl_init_runtime_ccall(void)
+{
+    JL_MUTEX_INIT(&libmap_lock);
+    uv_mutex_init(&trampoline_lock);
+}
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 7cb58bc230294..d64e1945c52a1 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -14,6 +14,219 @@
 
 const unsigned int host_char_bit = 8;
 
+// float16 intrinsics
+// TODO: use LLVM's compiler-rt on all platforms (Xcode already links compiler-rt)
+
+#if !defined(_OS_DARWIN_)
+
+static inline float half_to_float(uint16_t ival) JL_NOTSAFEPOINT
+{
+    uint32_t sign = (ival & 0x8000) >> 15;
+    uint32_t exp = (ival & 0x7c00) >> 10;
+    uint32_t sig = (ival & 0x3ff) >> 0;
+    uint32_t ret;
+
+    if (exp == 0) {
+        if (sig == 0) {
+            sign = sign << 31;
+            ret = sign | exp | sig;
+        }
+        else {
+            int n_bit = 1;
+            uint16_t bit = 0x0200;
+            while ((bit & sig) == 0) {
+                n_bit = n_bit + 1;
+                bit = bit >> 1;
+            }
+            sign = sign << 31;
+            exp = ((-14 - n_bit + 127) << 23);
+            sig = ((sig & (~bit)) << n_bit) << (23 - 10);
+            ret = sign | exp | sig;
+        }
+    }
+    else if (exp == 0x1f) {
+        if (sig == 0) { // Inf
+            if (sign == 0)
+                ret = 0x7f800000;
+            else
+                ret = 0xff800000;
+        }
+        else // NaN
+            ret = 0x7fc00000 | (sign << 31) | (sig << (23 - 10));
+    }
+    else {
+        sign = sign << 31;
+        exp = ((exp - 15 + 127) << 23);
+        sig = sig << (23 - 10);
+        ret = sign | exp | sig;
+    }
+
+    float fret;
+    memcpy(&fret, &ret, sizeof(float));
+    return fret;
+}
+
+// float to half algorithm from:
+//   "Fast Half Float Conversion" by Jeroen van der Zijp
+//   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+//
+// With adjustments for round-to-nearest, ties to even.
+
+static uint16_t basetable[512] = {
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
+    0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
+    0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
+    0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
+    0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
+    0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00};
+
+static uint8_t shifttable[512] = {
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
+    0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x0d, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
+    0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
+
+static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
+{
+    uint32_t f;
+    memcpy(&f, &param, sizeof(float));
+    if (isnan(param)) {
+        uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10)));
+        return t ^ ((uint16_t)(f >> 0xd));
+    }
+    int i = ((f & ~0x007fffff) >> 23);
+    uint8_t sh = shifttable[i];
+    f &= 0x007fffff;
+    // If `val` is subnormal, the tables are set up to force the
+    // result to 0, so the significand has an implicit `1` in the
+    // cases we care about.
+    f |= 0x007fffff + 0x1;
+    uint16_t h = (uint16_t)(basetable[i] + ((f >> sh) & 0x03ff));
+    // round
+    // NOTE: we maybe should ignore NaNs here, but the payload is
+    // getting truncated anyway so "rounding" it might not matter
+    int nextbit = (f >> (sh - 1)) & 1;
+    if (nextbit != 0 && (h & 0x7C00) != 0x7C00) {
+        // Round halfway to even or check lower bits
+        if ((h & 1) == 1 || (f & ((1 << (sh - 1)) - 1)) != 0)
+            h += UINT16_C(1);
+    }
+    return h;
+}
+
+JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param)
+{
+    return half_to_float(param);
+}
+
+JL_DLLEXPORT float __extendhfsf2(uint16_t param)
+{
+    return half_to_float(param);
+}
+
+JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
+{
+    return float_to_half(param);
+}
+
+JL_DLLEXPORT uint16_t __truncdfhf2(double param)
+{
+    float res = (float)param;
+    uint32_t resi;
+    memcpy(&resi, &res, sizeof(res));
+    if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
+        // shift so that the mantissa lines up where it would for normal Float16
+        uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
+        if (shift<23u) {
+            resi |= 0x00800000; // set implicit bit
+            resi >>= shift;
+        }
+    }
+    if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
+        memcpy(&resi, &res, sizeof(res));
+        // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
+        resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
+        memcpy(&res, &resi, sizeof(res));
+    }
+    return float_to_half(res);
+}
+
+#endif
+
 // run time version of bitcast intrinsic
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v)
 {
@@ -83,7 +296,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerref(jl_value_t *p, jl_value_t *order)
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     char *pp = (char*)jl_unbox_long(p);
     if (ety == (jl_value_t*)jl_any_type) {
-        return jl_atomic_load((jl_value_t**)pp);
+        return jl_atomic_load((_Atomic(jl_value_t*)*)pp);
     }
     else {
         if (!is_valid_intrinsic_elptr(ety))
@@ -103,7 +316,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerset(jl_value_t *p, jl_value_t *x, jl_v
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     char *pp = (char*)jl_unbox_long(p);
     if (ety == (jl_value_t*)jl_any_type) {
-        jl_atomic_store((jl_value_t**)pp, x);
+        jl_atomic_store((_Atomic(jl_value_t*)*)pp, x);
     }
     else {
         if (!is_valid_intrinsic_elptr(ety))
@@ -127,7 +340,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_
     jl_value_t *y;
     char *pp = (char*)jl_unbox_long(p);
     if (ety == (jl_value_t*)jl_any_type) {
-        y = jl_atomic_exchange((jl_value_t**)pp, x);
+        y = jl_atomic_exchange((_Atomic(jl_value_t*)*)pp, x);
     }
     else {
         if (!is_valid_intrinsic_elptr(ety))
@@ -142,15 +355,25 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_
     return y;
 }
 
-JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order_sym)
+JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order)
 {
-    // n.b. we use seq_cst always here, but need to verify the order sym
-    // against the weaker load-only that happens first
-    if (order_sym == (jl_value_t*)acquire_release_sym)
-        order_sym = (jl_value_t*)acquire_sym;
-    jl_value_t *expected = jl_atomic_pointerref(p, order_sym);
+    JL_TYPECHK(atomic_pointerref, pointer, p);
+    JL_TYPECHK(atomic_pointerref, symbol, order)
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 1);
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     char *pp = (char*)jl_unbox_long(p);
+    jl_value_t *expected;
+    if (ety == (jl_value_t*)jl_any_type) {
+        expected = jl_atomic_load((_Atomic(jl_value_t*)*)pp);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointermodify: invalid pointer");
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointermodify: invalid pointer for atomic operation");
+        expected = jl_atomic_new_bits(ety, pp);
+    }
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 2);
     args[0] = expected;
@@ -159,7 +382,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, j
         jl_value_t *y = jl_apply_generic(f, args, 2);
         args[1] = y;
         if (ety == (jl_value_t*)jl_any_type) {
-            if (jl_atomic_cmpswap((jl_value_t**)pp, &expected, y))
+            if (jl_atomic_cmpswap((_Atomic(jl_value_t*)*)pp, &expected, y))
                 break;
         }
         else {
@@ -175,12 +398,16 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, j
         args[0] = expected;
         jl_gc_safepoint();
     }
-    // args[0] == expected (old); args[1] == y (new)
-    args[0] = jl_f_tuple(NULL, args, 2);
+    // args[0] == expected (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
     JL_GC_POP();
     return args[0];
 }
 
+
 JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *expected, jl_value_t *x, jl_value_t *success_order_sym, jl_value_t *failure_order_sym)
 {
     JL_TYPECHK(atomic_pointerreplace, pointer, p);
@@ -193,20 +420,21 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
     // TODO: filter other invalid orderings
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     char *pp = (char*)jl_unbox_long(p);
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
     if (ety == (jl_value_t*)jl_any_type) {
-        jl_value_t **result;
-        JL_GC_PUSHARGS(result, 2);
-        result[0] = expected;
+        jl_value_t *result;
+        JL_GC_PUSH1(&result);
+        result = expected;
         int success;
         while (1) {
-            success = jl_atomic_cmpswap((jl_value_t**)pp, &result[0], x);
-            if (success || !jl_egal(result[0], expected))
+            success = jl_atomic_cmpswap((_Atomic(jl_value_t*)*)pp, &result, x);
+            if (success || !jl_egal(result, expected))
                 break;
         }
-        result[1] = success ? jl_true : jl_false;
-        result[0] = jl_f_tuple(NULL, result, 2);
+        result = jl_new_struct(rettyp, result, success ? jl_true : jl_false);
         JL_GC_POP();
-        return result[0];
+        return result;
     }
     else {
         if (!is_valid_intrinsic_elptr(ety))
@@ -216,14 +444,14 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
         size_t nb = jl_datatype_size(ety);
         if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
             jl_error("atomic_pointerreplace: invalid pointer for atomic operation");
-        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, pp, expected, x, nb);
+        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, rettyp, pp, expected, x, nb);
     }
 }
 
 JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order_sym)
 {
     JL_TYPECHK(fence, symbol, order_sym);
-    enum jl_memory_order order = jl_get_atomic_order_checked((jl_sym_t*)order_sym, 0, 0);
+    enum jl_memory_order order = jl_get_atomic_order_checked((jl_sym_t*)order_sym, 1, 1);
     if (order > jl_memory_order_monotonic)
         jl_fence();
     return jl_nothing;
@@ -1121,3 +1349,10 @@ JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
     JL_TYPECHK(arraylen, array, a);
     return jl_box_long(jl_array_len((jl_array_t*)a));
 }
+
+JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
+{
+    JL_TYPECHK(have_fma, datatype, typ);
+    // TODO: run-time feature check?
+    return jl_false;
+}
diff --git a/src/safepoint.c b/src/safepoint.c
index d54c7c62bec56..17c37a66c3a16 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -19,7 +19,7 @@ extern "C" {
 // 1: at least one sigint is pending, only the sigint page is enabled.
 // 2: at least one sigint is pending, both safepoint pages are enabled.
 JL_DLLEXPORT sig_atomic_t jl_signal_pending = 0;
-uint32_t jl_gc_running = 0;
+_Atomic(uint32_t) jl_gc_running = 0;
 char *jl_safepoint_pages = NULL;
 // The number of safepoints enabled on the three pages.
 // The first page, is the SIGINT page, only used by the master thread.
@@ -42,11 +42,9 @@ uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
 // Additionally accessing `jl_gc_running` should use acquire/release
 // load/store so that threads waiting for the GC doesn't have to also
 // fight on the safepoint lock...
-//
-// Acquiring and releasing this lock should use the `jl_mutex_*_nogc` functions
-jl_mutex_t safepoint_lock;
+uv_mutex_t safepoint_lock;
 
-static void jl_safepoint_enable(int idx)
+static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
     assert(0 <= idx && idx < 3);
@@ -67,7 +65,7 @@ static void jl_safepoint_enable(int idx)
 #endif
 }
 
-static void jl_safepoint_disable(int idx)
+static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
     assert(0 <= idx && idx < 3);
@@ -88,6 +86,7 @@ static void jl_safepoint_disable(int idx)
 
 void jl_safepoint_init(void)
 {
+    uv_mutex_init(&safepoint_lock);
     // jl_page_size isn't available yet.
     size_t pgsz = jl_getpagesize();
 #ifdef _OS_WINDOWS_
@@ -100,7 +99,7 @@ void jl_safepoint_init(void)
 #endif
     if (addr == NULL) {
         jl_printf(JL_STDERR, "could not allocate GC synchronization page\n");
-        gc_debug_critical_error();
+        jl_gc_debug_critical_error();
         abort();
     }
     // The signal page is for the gc safepoint.
@@ -111,36 +110,36 @@ void jl_safepoint_init(void)
 int jl_safepoint_start_gc(void)
 {
     if (jl_n_threads == 1) {
-        jl_gc_running = 1;
+        jl_atomic_store_relaxed(&jl_gc_running, 1);
         return 1;
     }
     // The thread should have set this already
-    assert(jl_current_task->ptls->gc_state == JL_GC_STATE_WAITING);
-    jl_mutex_lock_nogc(&safepoint_lock);
+    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING);
+    uv_mutex_lock(&safepoint_lock);
     // In case multiple threads enter the GC at the same time, only allow
     // one of them to actually run the collection. We can't just let the
     // master thread do the GC since it might be running unmanaged code
     // and can take arbitrarily long time before hitting a safe point.
     uint32_t running = 0;
     if (!jl_atomic_cmpswap(&jl_gc_running, &running, 1)) {
-        jl_mutex_unlock_nogc(&safepoint_lock);
+        uv_mutex_unlock(&safepoint_lock);
         jl_safepoint_wait_gc();
         return 0;
     }
     jl_safepoint_enable(1);
     jl_safepoint_enable(2);
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
     return 1;
 }
 
 void jl_safepoint_end_gc(void)
 {
-    assert(jl_gc_running);
+    assert(jl_atomic_load_relaxed(&jl_gc_running));
     if (jl_n_threads == 1) {
-        jl_gc_running = 0;
+        jl_atomic_store_relaxed(&jl_gc_running, 0);
         return;
     }
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Need to reset the page protection before resetting the flag since
     // the thread will trigger a segfault immediately after returning from
     // the signal handler.
@@ -151,13 +150,13 @@ void jl_safepoint_end_gc(void)
     // This wakes up other threads on mac.
     jl_mach_gc_end();
 #  endif
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
 }
 
 void jl_safepoint_wait_gc(void)
 {
     // The thread should have set this is already
-    assert(jl_current_task->ptls->gc_state != 0);
+    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) != 0);
     // Use normal volatile load in the loop for speed until GC finishes.
     // Then use an acquire load to make sure the GC result is visible on this thread.
     while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
@@ -167,7 +166,7 @@ void jl_safepoint_wait_gc(void)
 
 void jl_safepoint_enable_sigint(void)
 {
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Make sure both safepoints are enabled exactly once for SIGINT.
     switch (jl_signal_pending) {
     default:
@@ -183,24 +182,24 @@ void jl_safepoint_enable_sigint(void)
     case 2:
         jl_signal_pending = 2;
     }
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
 }
 
 void jl_safepoint_defer_sigint(void)
 {
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Make sure the GC safepoint is disabled for SIGINT.
     if (jl_signal_pending == 2) {
         jl_safepoint_disable(1);
         jl_signal_pending = 1;
     }
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
 }
 
 int jl_safepoint_consume_sigint(void)
 {
     int has_signal = 0;
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Make sure both safepoints are disabled for SIGINT.
     switch (jl_signal_pending) {
     default:
@@ -217,7 +216,7 @@ int jl_safepoint_consume_sigint(void)
     case 0:
         jl_signal_pending = 0;
     }
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
     return has_signal;
 }
 
diff --git a/src/signal-handling.c b/src/signal-handling.c
index 8011c62934d28..3762c838caba5 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -25,16 +25,26 @@ static volatile size_t bt_size_cur = 0;
 static volatile uint64_t nsecprof = 0;
 static volatile int running = 0;
 static const    uint64_t GIGA = 1000000000ULL;
+static uint64_t profile_cong_rng_seed = 0;
+static uint64_t profile_cong_rng_unbias = 0;
+static volatile uint64_t *profile_round_robin_thread_order = NULL;
 // Timers to take samples at intervals
 JL_DLLEXPORT void jl_profile_stop_timer(void);
 JL_DLLEXPORT int jl_profile_start_timer(void);
 void jl_lock_profile(void);
 void jl_unlock_profile(void);
+void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed);
 
 JL_DLLEXPORT int jl_profile_is_buffer_full(void)
 {
-    // the latter `+ 1` is for the block terminator `0`.
-    return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 1 > bt_size_max;
+    // declare buffer full if there isn't enough room to take samples across all threads
+    #if defined(_OS_WINDOWS_)
+        uint64_t nthreads = 1; // windows only profiles the main thread
+    #else
+        uint64_t nthreads = jl_n_threads;
+    #endif
+    // the `+ 6` is for the two block terminators `0` plus 4 metadata entries
+    return bt_size_cur + (((JL_BT_MAX_ENTRY_SIZE + 1) + 6) * nthreads) > bt_size_max;
 }
 
 static uint64_t jl_last_sigint_trigger = 0;
@@ -231,19 +241,19 @@ void jl_show_sigill(void *_ctx)
 }
 
 // what to do on a critical error on a thread
-void jl_critical_error(int sig, bt_context_t *context)
+void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
 {
-
-    jl_task_t *ct = jl_current_task;
-    jl_bt_element_t *bt_data = ct->ptls->bt_data;
-    size_t *bt_size = &ct->ptls->bt_size;
-    size_t i, n = *bt_size;
+    jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL;
+    size_t *bt_size = ct ? &ct->ptls->bt_size : NULL;
+    size_t i, n = ct ? *bt_size : 0;
     if (sig) {
         // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
         jl_set_safe_restore(NULL);
-        ct->gcstack = NULL;
-        ct->eh = NULL;
-        ct->excstack = NULL;
+        if (ct) {
+            ct->gcstack = NULL;
+            ct->eh = NULL;
+            ct->excstack = NULL;
+        }
 #ifndef _OS_WINDOWS_
         sigset_t sset;
         sigemptyset(&sset);
@@ -267,7 +277,7 @@ void jl_critical_error(int sig, bt_context_t *context)
         jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
     }
     jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
-    if (context) {
+    if (context && ct) {
         // Must avoid extended backtrace frames here unless we're sure bt_data
         // is properly rooted.
         *bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL);
@@ -275,8 +285,8 @@ void jl_critical_error(int sig, bt_context_t *context)
     for (i = 0; i < n; i += jl_bt_entry_size(bt_data + i)) {
         jl_print_bt_entry_codeloc(bt_data + i);
     }
-    gc_debug_print_status();
-    gc_debug_critical_error();
+    jl_gc_debug_print_status();
+    jl_gc_debug_critical_error();
 }
 
 ///////////////////////
@@ -288,6 +298,17 @@ JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
     nsecprof = delay_nsec;
     if (bt_data_prof != NULL)
         free((void*)bt_data_prof);
+    if (profile_round_robin_thread_order == NULL) {
+        // NOTE: We currently only allocate this once, since jl_n_threads cannot change
+        // during execution of a julia process. If/when this invariant changes in the
+        // future, this will have to be adjusted.
+        profile_round_robin_thread_order = (uint64_t*) calloc(jl_n_threads, sizeof(uint64_t));
+        for (int i = 0; i < jl_n_threads; i++) {
+            profile_round_robin_thread_order[i] = i;
+        }
+    }
+    seed_cong(&profile_cong_rng_seed);
+    unbias_cong(jl_n_threads, &profile_cong_rng_unbias);
     bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
     if (bt_data_prof == NULL && maxsize > 0)
         return -1;
@@ -295,6 +316,17 @@ JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
     return 0;
 }
 
+void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed) {
+    // The "modern Fisher–Yates shuffle" - O(n) algorithm
+    // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
+    for (size_t i = size - 1; i >= 1; --i) {
+        size_t j = cong(i, profile_cong_rng_unbias, seed);
+        uint64_t tmp = carray[j];
+        carray[j] = carray[i];
+        carray[i] = tmp;
+    }
+}
+
 JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
 {
     return (uint8_t*) bt_data_prof;
diff --git a/src/signals-mach.c b/src/signals-mach.c
index 3f133c3189b10..57dcc969068e8 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -54,12 +54,12 @@ void jl_mach_gc_end(void)
 static int jl_mach_gc_wait(jl_ptls_t ptls2,
                            mach_port_t thread, int16_t tid)
 {
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     if (!jl_atomic_load_relaxed(&jl_gc_running)) {
         // relaxed, since gets set to zero only while the safepoint_lock was held
         // this means we can tell if GC is done before we got the message or
         // the safepoint was enabled for SIGINT.
-        jl_mutex_unlock_nogc(&safepoint_lock);
+        uv_mutex_unlock(&safepoint_lock);
         return 0;
     }
     // Otherwise, set the gc state of the thread, suspend and record it
@@ -68,7 +68,7 @@ static int jl_mach_gc_wait(jl_ptls_t ptls2,
     uintptr_t item = tid | (((uintptr_t)gc_state) << 16);
     arraylist_push(&suspended_threads, (void*)item);
     thread_suspend(thread);
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
     return 1;
 }
 
@@ -302,7 +302,7 @@ kern_return_t catch_exception_raise(mach_port_t            exception_port,
     if (msync((void*)(fault_addr & ~(jl_page_size - 1)), 1, MS_ASYNC) == 0) { // check if this was a valid address
 #endif
         jl_value_t *excpt;
-        if (is_addr_on_stack(ptls2->current_task, (void*)fault_addr)) {
+        if (is_addr_on_stack(jl_atomic_load_relaxed(&ptls2->current_task), (void*)fault_addr)) {
             excpt = jl_stackovf_exception;
         }
 #ifdef SEGV_EXCEPTION
@@ -402,7 +402,7 @@ static void jl_try_deliver_sigint(void)
 static void JL_NORETURN jl_exit_thread0_cb(int exitstate)
 {
 CFI_NORETURN
-    jl_critical_error(exitstate - 128, NULL);
+    jl_critical_error(exitstate - 128, NULL, jl_current_task);
     jl_exit(exitstate);
 }
 
@@ -524,7 +524,6 @@ static kern_return_t profiler_segv_handler
 void *mach_profile_listener(void *arg)
 {
     (void)arg;
-    int i;
     const int max_size = 512;
     attach_exception_port(mach_thread_self(), 1);
 #ifdef LLVMLIBUNWIND
@@ -541,7 +540,10 @@ void *mach_profile_listener(void *arg)
         jl_lock_profile();
         void *unused = NULL;
         int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0;
-        for (i = jl_n_threads; i-- > 0; ) {
+        jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
+        for (int idx = jl_n_threads; idx-- > 0; ) {
+            // Stop the threads in the random round-robin order.
+            int i = profile_round_robin_thread_order[idx];
             // if there is no space left, break early
             if (jl_profile_is_buffer_full()) {
                 jl_profile_stop_timer();
@@ -586,8 +588,22 @@ void *mach_profile_listener(void *arg)
 #else
                 bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
 #endif
+                jl_ptls_t ptls = jl_all_tls_states[i];
 
-                // Mark the end of this block with 0
+                // store threadid but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+
+                // store task id
+                bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+
+                // store cpu cycle clock
+                bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
+
+                // Mark the end of this block with two 0's
+                bt_data_prof[bt_size_cur++].uintptr = 0;
                 bt_data_prof[bt_size_cur++].uintptr = 0;
             }
             // We're done! Resume the thread.
diff --git a/src/signals-unix.c b/src/signals-unix.c
index bb19e2bd65d78..9142da0c03ada 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -7,6 +7,7 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <pthread.h>
+#include <time.h>
 #include <errno.h>
 #if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
 #define MAP_ANONYMOUS MAP_ANON
@@ -44,7 +45,8 @@
 #include "julia_assert.h"
 
 // helper function for returning the unw_context_t inside a ucontext_t
-static bt_context_t *jl_to_bt_context(void *sigctx)
+// (also used by stackwalk.c)
+bt_context_t *jl_to_bt_context(void *sigctx)
 {
 #ifdef __APPLE__
     return (bt_context_t*)&((ucontext64_t*)sigctx)->uc_mcontext64->__ss;
@@ -83,8 +85,11 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
     const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
     return ctx->uc_mcontext64->__ss.__sp;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.mc_rsp;
 #else
-    // TODO Add support for FreeBSD and PowerPC(64)?
+    // TODO Add support for PowerPC(64)?
     return 0;
 #endif
 }
@@ -227,7 +232,7 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
     uv_tty_reset_mode();
     if (sig == SIGILL)
         jl_show_sigill(context);
-    jl_critical_error(sig, jl_to_bt_context(context));
+    jl_critical_error(sig, jl_to_bt_context(context), jl_get_current_task());
     if (sig != SIGSEGV &&
         sig != SIGBUS &&
         sig != SIGILL) {
@@ -321,7 +326,7 @@ static void segv_handler(int sig, siginfo_t *info, void *context)
     if (jl_addr_is_safepoint((uintptr_t)info->si_addr)) {
         jl_set_gc_and_wait();
         // Do not raise sigint on worker thread
-        if (ct->tid != 0)
+        if (jl_atomic_load_relaxed(&ct->tid) != 0)
             return;
         if (ct->ptls->defer_signal) {
             jl_safepoint_defer_sigint();
@@ -364,11 +369,25 @@ static pthread_cond_t signal_caught_cond;
 
 static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
 {
+    struct timespec ts;
+    clock_gettime(CLOCK_REALTIME, &ts);
+    ts.tv_sec += 1;
     pthread_mutex_lock(&in_signal_lock);
     jl_ptls_t ptls2 = jl_all_tls_states[tid];
     jl_atomic_store_release(&ptls2->signal_request, 1);
     pthread_kill(ptls2->system_id, SIGUSR2);
-    pthread_cond_wait(&signal_caught_cond, &in_signal_lock);  // wait for thread to acknowledge
+    // wait for thread to acknowledge
+    int err = pthread_cond_timedwait(&signal_caught_cond, &in_signal_lock, &ts);
+    if (err == ETIMEDOUT) {
+        sig_atomic_t request = 1;
+        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+            *ctx = NULL;
+            pthread_mutex_unlock(&in_signal_lock);
+            return;
+        }
+        err = pthread_cond_wait(&signal_caught_cond, &in_signal_lock);
+    }
+    assert(!err);
     assert(jl_atomic_load_acquire(&ptls2->signal_request) == 0);
     *ctx = signal_context;
 }
@@ -406,7 +425,7 @@ CFI_NORETURN
     // (unavoidable due to its async nature).
     // Try harder to exit each time if we get multiple exit requests.
     if (thread0_exit_count <= 1) {
-        jl_critical_error(thread0_exit_state - 128, NULL);
+        jl_critical_error(thread0_exit_state - 128, NULL, jl_current_task);
         jl_exit(thread0_exit_state);
     }
     else if (thread0_exit_count == 2) {
@@ -448,6 +467,8 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
     if (ct == NULL)
         return;
     jl_ptls_t ptls = ct->ptls;
+    if (ptls == NULL)
+        return;
     int errno_save = errno;
     sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, 0);
 #if !defined(JL_DISABLE_LIBUNWIND)
@@ -743,53 +764,74 @@ static void *signal_listener(void *arg)
         unw_context_t *signal_context;
         // sample each thread, round-robin style in reverse order
         // (so that thread zero gets notified last)
-        if (critical || profile)
+        if (critical || profile) {
             jl_lock_profile();
-        for (int i = jl_n_threads; i-- > 0; ) {
-            // notify thread to stop
-            jl_thread_suspend_and_get_state(i, &signal_context);
-
-            // do backtrace on thread contexts for critical signals
-            // this part must be signal-handler safe
-            if (critical) {
-                bt_size += rec_backtrace_ctx(bt_data + bt_size,
-                        JL_MAX_BT_SIZE / jl_n_threads - 1,
-                        signal_context, NULL);
-                bt_data[bt_size++].uintptr = 0;
-            }
+            if (!critical)
+                jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
+            for (int idx = jl_n_threads; idx-- > 0; ) {
+                // Stop the threads in the random round-robin order.
+                int i = critical ? idx : profile_round_robin_thread_order[idx];
+                // notify thread to stop
+                jl_thread_suspend_and_get_state(i, &signal_context);
+                if (signal_context == NULL)
+                    continue;
 
-            // do backtrace for profiler
-            if (profile && running) {
-                if (jl_profile_is_buffer_full()) {
-                    // Buffer full: Delete the timer
-                    jl_profile_stop_timer();
+                // do backtrace on thread contexts for critical signals
+                // this part must be signal-handler safe
+                if (critical) {
+                    bt_size += rec_backtrace_ctx(bt_data + bt_size,
+                            JL_MAX_BT_SIZE / jl_n_threads - 1,
+                            signal_context, NULL);
+                    bt_data[bt_size++].uintptr = 0;
                 }
-                else {
-                    // unwinding can fail, so keep track of the current state
-                    // and restore from the SEGV handler if anything happens.
-                    jl_jmp_buf *old_buf = jl_get_safe_restore();
-                    jl_jmp_buf buf;
-
-                    jl_set_safe_restore(&buf);
-                    if (jl_setjmp(buf, 0)) {
-                        jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
-                    } else {
-                        // Get backtrace data
-                        bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                                bt_size_max - bt_size_cur - 1, signal_context, NULL);
-                    }
-                    jl_set_safe_restore(old_buf);
 
-                    // Mark the end of this block with 0
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
+                // do backtrace for profiler
+                if (profile && running) {
+                    if (jl_profile_is_buffer_full()) {
+                        // Buffer full: Delete the timer
+                        jl_profile_stop_timer();
+                    }
+                    else {
+                        // unwinding can fail, so keep track of the current state
+                        // and restore from the SEGV handler if anything happens.
+                        jl_jmp_buf *old_buf = jl_get_safe_restore();
+                        jl_jmp_buf buf;
+
+                        jl_set_safe_restore(&buf);
+                        if (jl_setjmp(buf, 0)) {
+                            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
+                        } else {
+                            // Get backtrace data
+                            bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
+                                    bt_size_max - bt_size_cur - 1, signal_context, NULL);
+                        }
+                        jl_set_safe_restore(old_buf);
+
+                        jl_ptls_t ptls2 = jl_all_tls_states[i];
+
+                        // store threadid but add 1 as 0 is preserved to indicate end of block
+                        bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
+
+                        // store task id
+                        bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
+
+                        // store cpu cycle clock
+                        bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                        // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                        bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls2->sleep_check_state) + 1;
+
+                        // Mark the end of this block with two 0's
+                        bt_data_prof[bt_size_cur++].uintptr = 0;
+                        bt_data_prof[bt_size_cur++].uintptr = 0;
+                    }
                 }
-            }
 
-            // notify thread to resume
-            jl_thread_resume(i, sig);
-        }
-        if (critical || profile)
+                // notify thread to resume
+                jl_thread_resume(i, sig);
+            }
             jl_unlock_profile();
+        }
 #ifndef HAVE_MACH
         if (profile && running) {
 #if defined(HAVE_TIMER)
@@ -809,6 +851,15 @@ static void *signal_listener(void *arg)
                 jl_exit_thread0(128 + sig, bt_data, bt_size);
             }
             else {
+#ifndef SIGINFO // SIGINFO already prints this automatically
+                int nrunning = 0;
+                for (int idx = jl_n_threads; idx-- > 0; ) {
+                    jl_ptls_t ptls2 = jl_all_tls_states[idx];
+                    nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
+                }
+                jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", jl_getpid(), nrunning, jl_n_threads);
+#endif
+
                 jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
                 size_t i;
                 for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
diff --git a/src/signals-win.c b/src/signals-win.c
index 984330dc434dc..fefb29a529847 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -2,6 +2,7 @@
 
 // Windows
 // Note that this file is `#include`d by "signal-handling.c"
+#include <mmsystem.h> // hidden by LEAN_AND_MEAN
 
 #define sig_stack_size 131072 // 128k reserved for SEGV handling
 
@@ -59,7 +60,6 @@ static void jl_try_throw_sigint(void)
 
 void __cdecl crt_sig_handler(int sig, int num)
 {
-    jl_task_t *ct = jl_current_task;
     CONTEXT Context;
     switch (sig) {
     case SIGFPE:
@@ -92,16 +92,16 @@ void __cdecl crt_sig_handler(int sig, int num)
         RtlCaptureContext(&Context);
         if (sig == SIGILL)
             jl_show_sigill(&Context);
-        jl_critical_error(sig, &Context);
+        jl_critical_error(sig, &Context, jl_get_current_task());
         raise(sig);
     }
 }
 
 // StackOverflowException needs extra stack space to record the backtrace
 // so we keep one around, shared by all threads
-static jl_mutex_t backtrace_lock;
-static jl_ucontext_t collect_backtrace_fiber;
-static jl_ucontext_t error_return_fiber;
+static uv_mutex_t backtrace_lock;
+static win32_ucontext_t collect_backtrace_fiber;
+static win32_ucontext_t error_return_fiber;
 static PCONTEXT stkerror_ctx;
 static jl_ptls_t stkerror_ptls;
 static int have_backtrace_fiber;
@@ -141,11 +141,11 @@ void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
                                               ct->gcstack);
         }
         else if (have_backtrace_fiber) {
-            JL_LOCK(&backtrace_lock);
+            uv_mutex_lock(&backtrace_lock);
             stkerror_ctx = ctxThread;
             stkerror_ptls = ptls;
             jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
-            JL_UNLOCK_NOGC(&backtrace_lock);
+            uv_mutex_unlock(&backtrace_lock);
         }
         ptls->sig_exception = excpt;
     }
@@ -226,7 +226,8 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara
 
 LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
 {
-    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) {
         switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
             case EXCEPTION_INT_DIVIDE_BY_ZERO:
@@ -313,7 +314,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
         jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
         jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
 
-        jl_critical_error(0, ExceptionInfo->ContextRecord);
+        jl_critical_error(0, ExceptionInfo->ContextRecord, ct);
         static int recursion = 0;
         if (recursion++)
             exit(1);
@@ -343,7 +344,7 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
                 continue;
             }
             else {
-                JL_LOCK_NOGC(&jl_in_stackwalk);
+                uv_mutex_lock(&jl_in_stackwalk);
                 jl_lock_profile();
                 if ((DWORD)-1 == SuspendThread(hMainThread)) {
                     fputs("failed to suspend main thread. aborting profiling.", stderr);
@@ -360,22 +361,38 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
                     // Get backtrace data
                     bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
                             bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
-                    // Mark the end of this block with 0
+
+                    jl_ptls_t ptls = jl_all_tls_states[0]; // given only profiling hMainThread
+
+                    // store threadid but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+
+                    // store task id
+                    bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+
+                    // store cpu cycle clock
+                    bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                    // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
+
+                    // Mark the end of this block with two 0's
+                    bt_data_prof[bt_size_cur++].uintptr = 0;
                     bt_data_prof[bt_size_cur++].uintptr = 0;
                 }
                 jl_unlock_profile();
-                JL_UNLOCK_NOGC(&jl_in_stackwalk);
+                uv_mutex_unlock(&jl_in_stackwalk);
                 if ((DWORD)-1 == ResumeThread(hMainThread)) {
                     jl_profile_stop_timer();
                     fputs("failed to resume main thread! aborting.", stderr);
-                    gc_debug_critical_error();
+                    jl_gc_debug_critical_error();
                     abort();
                 }
             }
         }
     }
     jl_unlock_profile();
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     jl_profile_stop_timer();
     hBtThread = 0;
     return 0;
@@ -457,6 +474,6 @@ void jl_install_thread_signal_handler(jl_ptls_t ptls)
     collect_backtrace_fiber.uc_stack.ss_sp = (void*)stk;
     collect_backtrace_fiber.uc_stack.ss_size = ssize;
     jl_makecontext(&collect_backtrace_fiber, start_backtrace_fiber);
-    JL_MUTEX_INIT(&backtrace_lock);
+    uv_mutex_init(&backtrace_lock);
     have_backtrace_fiber = 1;
 }
diff --git a/src/simplevector.c b/src/simplevector.c
index 2b87eb92c41d1..fa21330b23ab4 100644
--- a/src/simplevector.c
+++ b/src/simplevector.c
@@ -7,7 +7,7 @@
 #include "julia_internal.h"
 #include "julia_assert.h"
 
-JL_DLLEXPORT jl_svec_t *(jl_svec)(size_t n, ...)
+JL_DLLEXPORT jl_svec_t *(ijl_svec)(size_t n, ...)
 {
     va_list args;
     if (n == 0) return jl_emptysvec;
diff --git a/src/smallintset.c b/src/smallintset.c
index 7598d8fd85ce4..54fdad616a758 100644
--- a/src/smallintset.c
+++ b/src/smallintset.c
@@ -130,14 +130,16 @@ static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
     return 0;
 }
 
-static void smallintset_rehash(jl_array_t **cache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np);
+static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np);
 
-void jl_smallintset_insert(jl_array_t **cache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data)
+void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data)
 {
-    if (val + 1 >  jl_max_int(*cache))
-        smallintset_rehash(cache, parent, hash, data, jl_array_len(*cache), val + 1);
+    jl_array_t *a = jl_atomic_load_relaxed(pcache);
+    if (val + 1 >  jl_max_int(a))
+        smallintset_rehash(pcache, parent, hash, data, jl_array_len(a), val + 1);
     while (1) {
-        if (smallintset_insert_(*cache, hash(val, data), val + 1))
+        a = jl_atomic_load_relaxed(pcache);
+        if (smallintset_insert_(a, hash(val, data), val + 1))
             return;
 
         /* table full */
@@ -145,20 +147,21 @@ void jl_smallintset_insert(jl_array_t **cache, jl_value_t *parent, smallintset_h
         /* it's important to grow the table really fast; otherwise we waste */
         /* lots of time rehashing all the keys over and over. */
         size_t newsz;
-        size_t sz = jl_array_len(*cache);
+        a = jl_atomic_load_relaxed(pcache);
+        size_t sz = jl_array_len(a);
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
             newsz = sz << 1;
         else
             newsz = sz << 2;
-        smallintset_rehash(cache, parent, hash, data, newsz, 0);
+        smallintset_rehash(pcache, parent, hash, data, newsz, 0);
     }
 }
 
-static void smallintset_rehash(jl_array_t **cache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np)
+static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np)
 {
-    jl_array_t *a = *cache;
+    jl_array_t *a = jl_atomic_load_relaxed(pcache);
     size_t sz = jl_array_len(a);
     size_t i;
     for (i = 0; i < sz; i += 1) {
@@ -179,7 +182,7 @@ static void smallintset_rehash(jl_array_t **cache, jl_value_t *parent, smallints
         }
         JL_GC_POP();
         if (i == sz) {
-            *cache = newa;
+            jl_atomic_store_release(pcache, newa);
             jl_gc_wb(parent, newa);
             return;
         }
diff --git a/src/stackwalk.c b/src/stackwalk.c
index 2994e653cb462..766e318a46b7b 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -13,7 +13,7 @@
 // define `jl_unw_get` as a macro, since (like setjmp)
 // returning from the callee function will invalidate the context
 #ifdef _OS_WINDOWS_
-jl_mutex_t jl_in_stackwalk;
+uv_mutex_t jl_in_stackwalk;
 #define jl_unw_get(context) (RtlCaptureContext(context), 0)
 #elif !defined(JL_DISABLE_LIBUNWIND)
 #define jl_unw_get(context) unw_getcontext(context)
@@ -75,7 +75,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
     uintptr_t return_ip = 0;
     uintptr_t thesp = 0;
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     if (!from_signal_handler) {
         // Workaround 32-bit windows bug missing top frame
         // See for example https://bugs.chromium.org/p/crashpad/issues/detail?id=53
@@ -102,13 +102,18 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
                 // But sometimes the external unwinder doesn't check that.
                 have_more_frames = 0;
             }
+            if (return_ip == 0) {
+                // The return address is clearly wrong, and while the unwinder
+                // might try to continue (by popping another stack frame), that
+                // likely won't work well, and it'll confuse the stack frame
+                // separator detection logic (double-NULL).
+                have_more_frames = 0;
+            }
             if (skip > 0) {
                 skip--;
                 from_signal_handler = 0;
                 continue;
             }
-            if (sp)
-                sp[n] = thesp;
             // For the purposes of looking up debug info for functions, we want
             // to harvest addresses for the *call* instruction `call_ip` during
             // stack walking.  However, this information isn't directly
@@ -141,8 +146,9 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
             if (!from_signal_handler)
                 call_ip -= 1; // normal frame
             from_signal_handler = 0;
-            if (call_ip == JL_BT_NON_PTR_ENTRY) {
+            if (call_ip == JL_BT_NON_PTR_ENTRY || call_ip == 0) {
                 // Never leave special marker in the bt data as it can corrupt the GC.
+                have_more_frames = 0;
                 call_ip = 0;
             }
             jl_bt_element_t *bt_entry = bt_data + n;
@@ -160,6 +166,8 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
                 }
             }
             bt_entry->uintptr = call_ip;
+            if (sp)
+                sp[n] = thesp;
             n++;
         }
         // NOTE: if we have some pgcstack entries remaining (because the
@@ -177,7 +185,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
     jl_set_safe_restore(old_buf);
 #endif
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
 #endif
     *bt_size = n;
     return need_more_space;
@@ -251,8 +259,8 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
             jl_array_grow_end(ip, maxincr);
             uintptr_t *sp_ptr = NULL;
             if (returnsp) {
-                sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
                 jl_array_grow_end(sp, maxincr);
+                sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
             }
             size_t size_incr = 0;
             have_more_frames = jl_unw_stepn(&cursor, (jl_bt_element_t*)jl_array_data(ip) + offset,
@@ -336,7 +344,7 @@ JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int ma
 {
     JL_TYPECHK(current_exceptions, task, (jl_value_t*)task);
     jl_task_t *ct = jl_current_task;
-    if (task != ct && task->_state == JL_TASK_STATE_RUNNABLE) {
+    if (task != ct && jl_atomic_load_relaxed(&task->_state) == JL_TASK_STATE_RUNNABLE) {
         jl_error("Inspecting the exception stack of a task which might "
                  "be running concurrently isn't allowed.");
     }
@@ -384,9 +392,9 @@ static PVOID CALLBACK JuliaFunctionTableAccess64(
     PRUNTIME_FUNCTION fn = RtlLookupFunctionEntry(AddrBase, &ImageBase, &HistoryTable);
     if (fn)
         return fn;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     PVOID ftable = SymFunctionTableAccess64(hProcess, AddrBase);
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     return ftable;
 #else
     return SymFunctionTableAccess64(hProcess, AddrBase);
@@ -402,9 +410,9 @@ static DWORD64 WINAPI JuliaGetModuleBase64(
     PRUNTIME_FUNCTION fn = RtlLookupFunctionEntry(dwAddr, &ImageBase, &HistoryTable);
     if (fn)
         return ImageBase;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     DWORD64 fbase = SymGetModuleBase64(hProcess, dwAddr);
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     return fbase;
 #else
     if (dwAddr == HistoryTable.dwAddr)
@@ -423,7 +431,7 @@ static DWORD64 WINAPI JuliaGetModuleBase64(
 volatile int needsSymRefreshModuleList;
 BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
 
-void jl_refresh_dbg_module_list(void)
+JL_DLLEXPORT void jl_refresh_dbg_module_list(void)
 {
     if (needsSymRefreshModuleList && hSymRefreshModuleList != NULL) {
         hSymRefreshModuleList(GetCurrentProcess());
@@ -433,7 +441,7 @@ void jl_refresh_dbg_module_list(void)
 static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *Context)
 {
     int result;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     jl_refresh_dbg_module_list();
 #if !defined(_CPU_X86_64_)
     memset(&cursor->stackframe, 0, sizeof(cursor->stackframe));
@@ -451,7 +459,7 @@ static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *Context)
     *cursor = *Context;
     result = 1;
 #endif
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     return result;
 }
 
@@ -587,12 +595,12 @@ JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC)
         if (frame.func_name)
             jl_svecset(r, 0, jl_symbol(frame.func_name));
         else
-            jl_svecset(r, 0, empty_sym);
+            jl_svecset(r, 0, jl_empty_sym);
         free(frame.func_name);
         if (frame.file_name)
             jl_svecset(r, 1, jl_symbol(frame.file_name));
         else
-            jl_svecset(r, 1, empty_sym);
+            jl_svecset(r, 1, jl_empty_sym);
         free(frame.file_name);
         jl_svecset(r, 2, jl_box_long(frame.line));
         jl_svecset(r, 3, frame.linfo != NULL ? (jl_value_t*)frame.linfo : jl_nothing);
@@ -709,7 +717,7 @@ void jl_rec_backtrace(jl_task_t *t)
 #if defined(_OS_WINDOWS_)
     bt_context_t c;
     memset(&c, 0, sizeof(c));
-    _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.uc_mcontext;
+    _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
 #if defined(_CPU_X86_64_)
     c.Rbx = mctx->Rbx;
     c.Rsp = mctx->Rsp;
@@ -729,9 +737,9 @@ void jl_rec_backtrace(jl_task_t *t)
 #endif
     context = &c;
 #elif defined(JL_HAVE_UNW_CONTEXT)
-    context = &t->ctx;
+    context = &t->ctx.ctx;
 #elif defined(JL_HAVE_UCONTEXT)
-    context = jl_to_bt_context(&t->ctx);
+    context = jl_to_bt_context(&t->ctx.ctx);
 #else
 #endif
     if (context)
diff --git a/src/staticdata.c b/src/staticdata.c
index d70e35542de2a..223d7b8e63427 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -16,11 +16,7 @@
 #include <dlfcn.h>
 #endif
 
-#ifndef _COMPILER_MICROSOFT_
 #include "valgrind.h"
-#else
-#define RUNNING_ON_VALGRIND 0
-#endif
 #include "julia_assert.h"
 
 #ifdef __cplusplus
@@ -30,7 +26,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    150
+#define NUM_TAGS    151
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -127,6 +123,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_floatingpoint_type);
         INSERT_TAG(jl_number_type);
         INSERT_TAG(jl_signed_type);
+        INSERT_TAG(jl_pair_type);
 
         // special typenames
         INSERT_TAG(jl_tuple_typename);
@@ -236,6 +233,9 @@ static arraylist_t ccallable_list;
 static htable_t fptr_to_id;
 void *native_functions;
 
+// table of struct field addresses to rewrite during saving
+static htable_t field_replace;
+
 // array of definitions for the predefined function pointers
 // (reverse of fptr_to_id)
 // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
@@ -269,6 +269,8 @@ static jl_typename_t *jl_idtable_typename = NULL;
 static jl_value_t *jl_bigint_type = NULL;
 static int gmp_limb_size = 0;
 
+static jl_sym_t *jl_docmeta_sym = NULL;
+
 enum RefTags {
     DataRef,
     ConstDataRef,
@@ -402,8 +404,11 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
             jl_serialize_value(s, (jl_value_t*)table[i]);
             jl_binding_t *b = (jl_binding_t*)table[i+1];
             jl_serialize_value(s, b->name);
-            jl_serialize_value(s, b->value);
-            jl_serialize_value(s, b->globalref);
+            if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
+                jl_serialize_value(s, jl_nothing);
+            else
+                jl_serialize_value(s, jl_atomic_load_relaxed(&b->value));
+            jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref));
             jl_serialize_value(s, b->owner);
         }
     }
@@ -413,6 +418,13 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
     }
 }
 
+static jl_value_t *get_replaceable_field(jl_value_t **addr)
+{
+    jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr);
+    if (fld == HT_NOTFOUND)
+        return *addr;
+    return fld;
+}
 
 #define NBOX_C 1024
 
@@ -513,7 +525,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recur
         size_t i, np = t->layout->npointers;
         for (i = 0; i < np; i++) {
             uint32_t ptr = jl_ptr_offset(t, i);
-            jl_value_t *fld = ((jl_value_t* const*)data)[ptr];
+            jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr]);
             jl_serialize_value(s, fld);
         }
     }
@@ -635,7 +647,7 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
     newm->parent = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, parent)));
     arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent));
-    newm->primary_world = jl_world_counter;
+    newm->primary_world = jl_atomic_load_acquire(&jl_world_counter);
 
     // write out the bindings table as a list
     // immediately after jl_module_t
@@ -654,8 +666,11 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
             record_gvar(s, jl_get_llvm_gv(native_functions, (jl_value_t*)b),
                     ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset);
             write_pointerfield(s, (jl_value_t*)b->name);
-            write_pointerfield(s, b->value);
-            write_pointerfield(s, b->globalref);
+            if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
+                write_pointerfield(s, jl_nothing);
+            else
+                write_pointerfield(s, jl_atomic_load_relaxed(&b->value));
+            write_pointerfield(s, jl_atomic_load_relaxed(&b->globalref));
             write_pointerfield(s, (jl_value_t*)b->owner);
             size_t flag_offset = offsetof(jl_binding_t, owner) + sizeof(b->owner);
             ios_write(s->s, (char*)b + flag_offset, sizeof(*b) - flag_offset);
@@ -939,7 +954,7 @@ static void jl_write_values(jl_serializer_state *s)
             size_t np = t->layout->npointers;
             for (i = 0; i < np; i++) {
                 size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
-                jl_value_t *fld = *(jl_value_t**)&data[offset];
+                jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset]);
                 if (fld != NULL) {
                     arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location
                     arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
@@ -1040,14 +1055,14 @@ static void jl_write_values(jl_serializer_state *s)
                 jl_typename_t *tn = (jl_typename_t*)v;
                 jl_typename_t *newtn = (jl_typename_t*)&s->s->buf[reloc_offset];
                 if (tn->atomicfields != NULL) {
-                    size_t nf = jl_svec_len(tn->names);
+                    size_t nb = (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t);
                     uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
                     write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
                     newtn->atomicfields = NULL; // relocation offset
                     layout /= sizeof(void*);
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_typename_t, atomicfields))); // relocation location
                     arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target
-                    ios_write(s->const_data, (char*)tn->atomicfields, nf);
+                    ios_write(s->const_data, (char*)tn->atomicfields, nb);
                 }
             }
             else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
@@ -1522,6 +1537,117 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
     }
 }
 
+static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig)
+{
+    jl_code_info_t *ci = NULL;
+    JL_GC_PUSH1(&ci);
+    int compressed = 0;
+    if (!jl_is_code_info(ci_)) {
+        compressed = 1;
+        ci = jl_uncompress_ir(m, NULL, (jl_array_t*)ci_);
+    }
+    else {
+        ci = (jl_code_info_t*)ci_;
+    }
+    // leave codelocs length the same so the compiler can assume that; just zero it
+    memset(jl_array_data(ci->codelocs), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
+    // empty linetable
+    if (jl_is_array(ci->linetable))
+        jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable));
+    // replace slot names with `?`, except unused_sym since the compiler looks at it
+    jl_sym_t *questionsym = jl_symbol("?");
+    int i, l = jl_array_len(ci->slotnames);
+    for (i = 0; i < l; i++) {
+        jl_value_t *s = jl_array_ptr_ref(ci->slotnames, i);
+        if (s != (jl_value_t*)jl_unused_sym)
+            jl_array_ptr_set(ci->slotnames, i, questionsym);
+    }
+    if (orig) {
+        m->slot_syms = jl_compress_argnames(ci->slotnames);
+        jl_gc_wb(m, m->slot_syms);
+    }
+    jl_value_t *ret = (jl_value_t*)ci;
+    if (compressed)
+        ret = (jl_value_t*)jl_compress_ir(m, ci);
+    JL_GC_POP();
+    return ret;
+}
+
+static void record_field_change(jl_value_t **addr, jl_value_t *newval)
+{
+    ptrhash_put(&field_replace, (void*)addr, newval);
+}
+
+static void strip_specializations_(jl_method_instance_t *mi)
+{
+    assert(jl_is_method_instance(mi));
+    jl_code_instance_t *codeinst = mi->cache;
+    while (codeinst) {
+        if (codeinst->inferred && codeinst->inferred != jl_nothing) {
+            if (jl_options.strip_ir) {
+                record_field_change(&codeinst->inferred, jl_nothing);
+            }
+            else if (jl_options.strip_metadata) {
+                codeinst->inferred = strip_codeinfo_meta(mi->def.method, codeinst->inferred, 0);
+                jl_gc_wb(codeinst, codeinst->inferred);
+            }
+        }
+        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    }
+    if (jl_options.strip_ir) {
+        record_field_change(&mi->uninferred, NULL);
+    }
+}
+
+static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
+{
+    jl_method_t *m = def->func.method;
+    if (m->source) {
+        int stripped_ir = 0;
+        if (jl_options.strip_ir) {
+            if (m->unspecialized) {
+                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&m->unspecialized->cache);
+                if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
+                    // we have a generic compiled version, so can remove the IR
+                    record_field_change(&m->source, jl_nothing);
+                    stripped_ir = 1;
+                }
+            }
+            if (!stripped_ir) {
+                int mod_setting = jl_get_module_compile(m->module);
+                // if the method is declared not to be compiled, keep IR for interpreter
+                if (!(mod_setting == JL_OPTIONS_COMPILE_OFF || mod_setting == JL_OPTIONS_COMPILE_MIN)) {
+                    record_field_change(&m->source, jl_nothing);
+                    stripped_ir = 1;
+                }
+            }
+        }
+        if (jl_options.strip_metadata && !stripped_ir) {
+            m->source = strip_codeinfo_meta(m, m->source, 1);
+            jl_gc_wb(m, m->source);
+        }
+    }
+    jl_svec_t *specializations = m->specializations;
+    size_t i, l = jl_svec_len(specializations);
+    for (i = 0; i < l; i++) {
+        jl_value_t *mi = jl_svecref(specializations, i);
+        if (mi != jl_nothing)
+            strip_specializations_((jl_method_instance_t*)mi);
+    }
+    if (m->unspecialized)
+        strip_specializations_(m->unspecialized);
+    return 1;
+}
+
+static void strip_all_codeinfos_(jl_methtable_t *mt, void *_env)
+{
+    jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL);
+}
+
+static void jl_strip_all_codeinfos(void)
+{
+    jl_foreach_reachable_mtable(strip_all_codeinfos_, NULL);
+}
 
 // --- entry points ---
 
@@ -1533,6 +1659,12 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
     jl_gc_collect(JL_GC_FULL);
     jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
     JL_TIMING(SYSIMG_DUMP);
+
+    htable_new(&field_replace, 10000);
+    // strip metadata and IR when requested
+    if (jl_options.strip_metadata || jl_options.strip_ir)
+        jl_strip_all_codeinfos();
+
     int en = jl_gc_enable(0);
     jl_init_serializer2(1);
     htable_reset(&backref_table, 250000);
@@ -1573,6 +1705,12 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
         gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
                                                     jl_symbol("BITS_PER_LIMB"))) / 8;
     }
+    if (jl_base_module) {
+        jl_value_t *docs = jl_get_global(jl_base_module, jl_symbol("Docs"));
+        if (docs && jl_is_module(docs)) {
+            jl_docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META"));
+        }
+    }
 
     { // step 1: record values (recursively) that need to go in the image
         size_t i;
@@ -1660,7 +1798,7 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
         }
         jl_write_value(&s, s.ptls->root_task->tls);
         write_uint32(f, jl_get_gs_ctr());
-        write_uint32(f, jl_world_counter);
+        write_uint32(f, jl_atomic_load_acquire(&jl_world_counter));
         write_uint32(f, jl_typeinf_world);
         jl_finalize_serializer(&s, &reinit_list);
         jl_finalize_serializer(&s, &ccallable_list);
@@ -1671,6 +1809,7 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
     arraylist_free(&ccallable_list);
     arraylist_free(&s.relocs_list);
     arraylist_free(&s.gctags_list);
+    htable_free(&field_replace);
     jl_cleanup_serializer2();
 
     jl_gc_enable(en);
@@ -1790,7 +1929,7 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
     jl_init_box_caches();
 
     uint32_t gs_ctr = read_uint32(f);
-    jl_world_counter = read_uint32(f);
+    jl_atomic_store_release(&jl_world_counter, read_uint32(f));
     jl_typeinf_world = read_uint32(f);
     jl_set_gs_ctr(gs_ctr);
     s.s = NULL;
@@ -1839,6 +1978,7 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
     }
 
     s.s = &sysimg;
+    jl_init_codegen();
     jl_update_all_fptrs(&s); // fptr relocs and registration
     // reinit ccallables, which require codegen to be initialized
     s.s = f;
diff --git a/src/subtype.c b/src/subtype.c
index 0d87532e73c39..eb668645552d7 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -42,11 +42,19 @@ extern "C" {
 // TODO: the stack probably needs to be artificially large because of some
 // deeper problem (see #21191) and could be shrunk once that is fixed
 typedef struct {
-    int depth;
-    int more;
+    int16_t depth;
+    int16_t more;
+    int16_t used;
     uint32_t stack[100];  // stack of bits represented as a bit vector
 } jl_unionstate_t;
 
+typedef struct {
+    int16_t depth;
+    int16_t more;
+    int16_t used;
+    void *stack;
+} jl_saved_unionstate_t;
+
 // Linked list storing the type variable environment. A new jl_varbinding_t
 // is pushed for each UnionAll type we encounter. `lb` and `ub` are updated
 // during the computation.
@@ -60,22 +68,21 @@ typedef struct jl_varbinding_t {
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
-    // in covariant position, we need to try constraining a variable in different ways:
-    // 0 - unconstrained
-    // 1 - less than
-    // 2 - greater than
-    // 3 - inexpressible - occurs when the var has non-trivial overlap with another type,
-    //                     and we would need to return `intersect(var,other)`. in this case
-    //                     we choose to over-estimate the intersection by returning the var.
+    // constraintkind: in covariant position, we try three different ways to compute var ∩ type:
+    // let ub = var.ub ∩ type
+    // 0 - var.ub <: type ? var : ub
+    // 1 - var.ub = ub; return var
+    // 2 - either (var.ub = ub; return var), or return ub
     int8_t constraintkind;
-    int depth0;         // # of invariant constructors nested around the UnionAll type for this var
+    int8_t intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
+    int8_t limited;
+    int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
     // when this variable's integer value is compared to that of another,
     // it equals `other + offset`. used by vararg length parameters.
-    int offset;
+    int16_t offset;
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
     jl_array_t *innervars;
-    int intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
     struct jl_varbinding_t *prev;
 } jl_varbinding_t;
 
@@ -94,12 +101,13 @@ typedef struct jl_stenv_t {
     int ignore_free;          // treat free vars as black boxes; used during intersection
     int intersection;         // true iff subtype is being called from intersection
     int emptiness_only;       // true iff intersection only needs to test for emptiness
+    int triangular;           // when intersecting Ref{X} with Ref{<:Y}
 } jl_stenv_t;
 
 // state manipulation utilities
 
 // look up a type variable in an environment
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 static jl_varbinding_t *lookup(jl_stenv_t *e, jl_tvar_t *v) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #else
 static jl_varbinding_t *lookup(jl_stenv_t *e, jl_tvar_t *v) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
@@ -129,6 +137,23 @@ static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
         st->stack[i>>5] &= ~(1u<<(i&31));
 }
 
+#define push_unionstate(saved, src)                                     \
+    do {                                                                \
+        (saved)->depth = (src)->depth;                                  \
+        (saved)->more = (src)->more;                                    \
+        (saved)->used = (src)->used;                                    \
+        (saved)->stack = alloca(((src)->used+7)/8);                     \
+        memcpy((saved)->stack, &(src)->stack, ((src)->used+7)/8);       \
+    } while (0);
+
+#define pop_unionstate(dst, saved)                                      \
+    do {                                                                \
+        (dst)->depth = (saved)->depth;                                  \
+        (dst)->more = (saved)->more;                                    \
+        (dst)->used = (saved)->used;                                    \
+        memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
+    } while (0);
+
 typedef struct {
     int8_t *buf;
     int rdepth;
@@ -146,7 +171,7 @@ static void save_env(jl_stenv_t *e, jl_value_t **root, jl_savedenv_t *se)
     if (root)
         *root = (jl_value_t*)jl_alloc_svec(len * 3);
     se->buf = (int8_t*)(len > 8 ? malloc_s(len * 2) : &se->_space);
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
     memset(se->buf, 0, len * 2);
 #endif
     int i=0, j=0; v = e->vars;
@@ -486,6 +511,10 @@ static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv
 {
     jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
     do {
+        if (state->depth >= state->used) {
+            statestack_set(state, state->used, 0);
+            state->used++;
+        }
         int ui = statestack_get(state, state->depth);
         state->depth++;
         if (ui == 0) {
@@ -514,11 +543,10 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         return 1;
     if (x == (jl_value_t*)jl_any_type && jl_is_datatype(y))
         return 0;
-    jl_unionstate_t oldLunions = e->Lunions;
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int sub;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Lunions.used = e->Runions.used = 0;
     e->Runions.depth = 0;
     e->Runions.more = 0;
     e->Lunions.depth = 0;
@@ -526,8 +554,8 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
     sub = forall_exists_subtype(x, y, e, 0);
 
-    e->Runions = oldRunions;
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Runions, &oldRunions);
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub;
 }
 
@@ -611,6 +639,8 @@ static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     return 1;
 }
 
+static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT;
+
 // check that type var `b` is >: `a`, and update b's lower bound.
 static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
 {
@@ -628,7 +658,10 @@ static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     }
     if (!((bb->ub == (jl_value_t*)jl_any_type && !jl_is_type(a) && !jl_is_typevar(a)) || subtype_ccheck(a, bb->ub, e)))
         return 0;
-    bb->lb = simple_join(bb->lb, a);
+    jl_value_t *lb = simple_join(bb->lb, a);
+    if (!e->intersection || !subtype_by_bounds(lb, (jl_value_t*)b, e))
+        bb->lb = lb;
+    // this bound should not be directly circular
     assert(bb->lb != (jl_value_t*)b);
     if (jl_is_typevar(a)) {
         jl_varbinding_t *aa = lookup(e, (jl_tvar_t*)a);
@@ -731,8 +764,8 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
     int ans;
@@ -1148,6 +1181,10 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // union against the variable before trying to take it apart to see if there are any
             // variables lurking inside.
             jl_unionstate_t *state = &e->Runions;
+            if (state->depth >= state->used) {
+                statestack_set(state, state->used, 0);
+                state->used++;
+            }
             ui = statestack_get(state, state->depth);
             state->depth++;
             if (ui == 0)
@@ -1310,13 +1347,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         (is_definite_length_tuple_type(x) && is_indefinite_length_tuple_type(y)))
         return 0;
 
-    jl_unionstate_t oldLunions = e->Lunions;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    e->Lunions.used = 0;
     int sub;
 
     if (!jl_has_free_typevars(x) || !jl_has_free_typevars(y)) {
-        jl_unionstate_t oldRunions = e->Runions;
-        memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
+        e->Runions.used = 0;
         e->Runions.depth = 0;
         e->Runions.more = 0;
         e->Lunions.depth = 0;
@@ -1324,7 +1361,7 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
         sub = forall_exists_subtype(x, y, e, 2);
 
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
     }
     else {
         int lastset = 0;
@@ -1342,13 +1379,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
     }
 
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub && subtype(y, x, e, 0);
 }
 
 static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_t *saved, jl_savedenv_t *se, int param)
 {
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     int lastset = 0;
     while (1) {
         e->Runions.depth = 0;
@@ -1379,7 +1416,7 @@ static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, in
     JL_GC_PUSH1(&saved);
     save_env(e, &saved, &se);
 
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    e->Lunions.used = 0;
     int lastset = 0;
     int sub;
     while (1) {
@@ -1413,8 +1450,10 @@ static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
     e->ignore_free = 0;
     e->intersection = 0;
     e->emptiness_only = 0;
+    e->triangular = 0;
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
+    e->Lunions.used = 0;       e->Runions.used = 0;
 }
 
 // subtyping entry points
@@ -2084,14 +2123,14 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
 
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int savedepth = e->invdepth, Rsavedepth = e->Rinvdepth;
     // TODO: this doesn't quite make sense
     e->invdepth = e->Rinvdepth = d;
 
     jl_value_t *res = intersect_all(x, y, e);
 
-    e->Runions = oldRunions;
+    pop_unionstate(&e->Runions, &oldRunions);
     e->invdepth = savedepth;
     e->Rinvdepth = Rsavedepth;
     return res;
@@ -2102,10 +2141,10 @@ static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t
     if (param == 2 || (!jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u))) {
         jl_value_t *a=NULL, *b=NULL;
         JL_GC_PUSH2(&a, &b);
-        jl_unionstate_t oldRunions = e->Runions;
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         a = R ? intersect_all(x, u->a, e) : intersect_all(u->a, x, e);
         b = R ? intersect_all(x, u->b, e) : intersect_all(u->b, x, e);
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
         jl_value_t *i = simple_join(a,b);
         JL_GC_POP();
         return i;
@@ -2171,7 +2210,7 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten
         return;
     jl_varbinding_t *btemp = e->vars;
     while (btemp != NULL) {
-        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+        if ((btemp->lb == (jl_value_t*)v || btemp->ub == (jl_value_t*)v) &&
             in_union(val, (jl_value_t*)btemp->var))
             return;
         btemp = btemp->prev;
@@ -2210,13 +2249,44 @@ static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *
     return issub;
 }
 
+// See if var y is reachable from x via bounds; used to avoid cycles.
+static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
+{
+    if (in_union(x, (jl_value_t*)y))
+        return 1;
+    if (!jl_is_typevar(x))
+        return 0;
+    jl_varbinding_t *xv = lookup(e, (jl_tvar_t*)x);
+    if (xv == NULL)
+        return 0;
+    return reachable_var(xv->ub, y, e) || reachable_var(xv->lb, y, e);
+}
+
+// check whether setting v == t implies v == SomeType{v}, which is unsatisfiable.
+static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    if (var_occurs_inside(t, v, 0, 0))
+        return 1;
+    jl_varbinding_t *btemp = e->vars;
+    while (btemp != NULL) {
+        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+            var_occurs_inside(t, btemp->var, 0, 0))
+            return 1;
+        btemp = btemp->prev;
+    }
+    return 0;
+}
+
 static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param)
 {
     jl_varbinding_t *bb = lookup(e, b);
     if (bb == NULL)
         return R ? intersect_aside(a, b->ub, e, 1, 0) : intersect_aside(b->ub, a, e, 0, 0);
-    if (bb->lb == bb->ub && jl_is_typevar(bb->lb) && bb->lb != (jl_value_t*)b)
+    if (reachable_var(bb->lb, b, e) || reachable_var(bb->ub, b, e))
+        return a;
+    if (bb->lb == bb->ub && jl_is_typevar(bb->lb)) {
         return intersect(a, bb->lb, e, param);
+    }
     if (!jl_is_type(a) && !jl_is_typevar(a))
         return set_var_to_const(bb, a, NULL);
     int d = bb->depth0;
@@ -2236,7 +2306,9 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
             ub = a;
         }
         else {
+            e->triangular++;
             ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
+            e->triangular--;
             save_env(e, &root, &se);
             int issub = subtype_in_env_existential(bb->lb, ub, e, 0, d);
             restore_env(e, root, &se);
@@ -2248,20 +2320,10 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         }
         if (ub != (jl_value_t*)b) {
             if (jl_has_free_typevars(ub)) {
-                // constraint X == Ref{X} is unsatisfiable. also check variables set equal to X.
-                if (var_occurs_inside(ub, b, 0, 0)) {
+                if (check_unsat_bound(ub, b, e)) {
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
-                jl_varbinding_t *btemp = e->vars;
-                while (btemp != NULL) {
-                    if (btemp->lb == (jl_value_t*)b && btemp->ub == (jl_value_t*)b &&
-                        var_occurs_inside(ub, btemp->var, 0, 0)) {
-                        JL_GC_POP();
-                        return jl_bottom_type;
-                    }
-                    btemp = btemp->prev;
-                }
             }
             bb->ub = ub;
             bb->lb = ub;
@@ -2269,67 +2331,33 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         JL_GC_POP();
         return ub;
     }
-    else if (bb->constraintkind == 0) {
-        if (!jl_is_typevar(bb->ub) && !jl_is_typevar(a)) {
-            if (try_subtype_in_env(bb->ub, a, e, 0, d))
-                return (jl_value_t*)b;
-        }
-        return R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-    }
-    else if (bb->concrete || bb->constraintkind == 1) {
-        jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-        if (ub == jl_bottom_type)
-            return jl_bottom_type;
-        JL_GC_PUSH1(&ub);
-        if (!R && !subtype_bounds_in_env(bb->lb, a, e, 0, d)) {
-            // this fixes issue #30122. TODO: better fix for R flag.
-            JL_GC_POP();
-            return jl_bottom_type;
-        }
-        JL_GC_POP();
-        set_bound(&bb->ub, ub, b, e);
-        return (jl_value_t*)b;
-    }
-    else if (bb->constraintkind == 2) {
-        // TODO: removing this case fixes many test_brokens in test/subtype.jl
-        // but breaks other tests.
-        if (!subtype_bounds_in_env(a, bb->ub, e, 1, d)) {
-            // mark var as unsatisfiable by making it circular
-            bb->lb = (jl_value_t*)b;
-            return jl_bottom_type;
-        }
-        jl_value_t *lb = simple_join(bb->lb, a);
-        set_bound(&bb->lb, lb, b, e);
-        return a;
-    }
-    assert(bb->constraintkind == 3);
     jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
-    if (jl_is_typevar(a))
+    if (bb->constraintkind == 1 || e->triangular) {
+        if (e->triangular && check_unsat_bound(ub, b, e))
+            return jl_bottom_type;
+        set_bound(&bb->ub, ub, b, e);
         return (jl_value_t*)b;
-    if (ub == a) {
-        if (bb->lb == jl_bottom_type) {
-            set_bound(&bb->ub, a, b, e);
+    }
+    else if (bb->constraintkind == 0) {
+        JL_GC_PUSH1(&ub);
+        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e, 0, d)) {
+            JL_GC_POP();
             return (jl_value_t*)b;
         }
+        JL_GC_POP();
         return ub;
     }
-    else if (bb->ub == bb->lb) {
-        return ub;
-    }
-    root = NULL;
-    JL_GC_PUSH2(&root, &ub);
-    save_env(e, &root, &se);
-    jl_value_t *ii = R ? intersect_aside(a, bb->lb, e, 1, d) : intersect_aside(bb->lb, a, e, 0, d);
-    if (ii == jl_bottom_type) {
-        restore_env(e, root, &se);
-        ii = (jl_value_t*)b;
+    assert(bb->constraintkind == 2);
+    if (!jl_is_typevar(a)) {
+        if (ub == a && bb->lb != jl_bottom_type)
+            return ub;
+        else if (jl_egal(bb->ub, bb->lb))
+            return ub;
         set_bound(&bb->ub, ub, b, e);
     }
-    free_env(&se);
-    JL_GC_POP();
-    return ii;
+    return (jl_value_t*)b;
 }
 
 // test whether `var` occurs inside constructors. `want_inv` tests only inside
@@ -2373,7 +2401,7 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want
 }
 
 // Caller might not have rooted `res`
-static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_stenv_t *e)
+static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
     jl_value_t *varval = NULL;
     jl_tvar_t *newvar = vb->var;
@@ -2386,7 +2414,10 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         // given x<:T<:x, substitute x for T
         varval = vb->ub;
     }
-    else if (!vb->occurs_inv && is_leaf_bound(vb->ub)) {
+    // TODO: `vb.occurs_cov == 1` here allows substituting Tuple{<:X} => Tuple{X},
+    // which is valid but changes some ambiguity errors so we don't need to do it yet.
+    else if ((/*vb->occurs_cov == 1 || */is_leaf_bound(vb->ub)) &&
+             !var_occurs_invariant(u->body, u->var, 0)) {
         // replace T<:x with x in covariant position when possible
         varval = vb->ub;
     }
@@ -2404,9 +2435,8 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         }
     }
 
-    // prefer generating a fresh typevar, to avoid repeated renaming if the result
-    // is compared to one of the intersected types later.
-    if (!varval)
+    // TODO: this can prevent us from matching typevar identities later
+    if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
     // remove/replace/rewrap free occurrences of this var in the environment
@@ -2521,7 +2551,13 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     // if the var for this unionall (based on identity) already appears somewhere
     // in the environment, rename to get a fresh var.
     // TODO: might need to look inside types in btemp->lb and btemp->ub
+    int envsize = 0;
     while (btemp != NULL) {
+        envsize++;
+        if (envsize > 120) {
+            vb->limited = 1;
+            return t;
+        }
         if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
             btemp->ub == (jl_value_t*)u->var) {
             u = rename_unionall(u);
@@ -2571,46 +2607,37 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     }
     if (res != jl_bottom_type)
         // res is rooted by callee
-        res = finish_unionall(res, vb, e);
+        res = finish_unionall(res, vb, u, e);
     JL_GC_POP();
     return res;
 }
 
 static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    jl_value_t *res=NULL, *res2=NULL, *save=NULL, *save2=NULL;
-    jl_savedenv_t se, se2;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
-    JL_GC_PUSH6(&res, &save2, &vb.lb, &vb.ub, &save, &vb.innervars);
+    jl_value_t *res=NULL, *save=NULL;
+    jl_savedenv_t se;
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
+    JL_GC_PUSH5(&res, &vb.lb, &vb.ub, &save, &vb.innervars);
     save_env(e, &save, &se);
     res = intersect_unionall_(t, u, e, R, param, &vb);
-    if (res != jl_bottom_type) {
+    if (vb.limited) {
+        // if the environment got too big, avoid tree recursion and propagate the flag
+        if (e->vars)
+            e->vars->limited = 1;
+    }
+    else if (res != jl_bottom_type) {
         if (vb.concrete || vb.occurs_inv>1 || u->var->lb != jl_bottom_type || (vb.occurs_inv && vb.occurs_cov)) {
             restore_env(e, NULL, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.constraintkind = 3;
+            vb.constraintkind = vb.concrete ? 1 : 2;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
-        else if (vb.occurs_cov) {
-            save_env(e, &save2, &se2);
+        else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) {
             restore_env(e, save, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.lb = u->var->lb; vb.ub = u->var->ub;
             vb.constraintkind = 1;
-            res2 = intersect_unionall_(t, u, e, R, param, &vb);
-            if (res2 == jl_bottom_type) {
-                restore_env(e, save, &se);
-                vb.occurs_cov = vb.occurs_inv = 0;
-                vb.lb = u->var->lb; vb.ub = u->var->ub;
-                vb.constraintkind = 2;
-                res2 = intersect_unionall_(t, u, e, R, param, &vb);
-                if (res2 == jl_bottom_type)
-                    restore_env(e, save2, &se2);
-            }
-            if (res2 != jl_bottom_type)
-                res = res2;
-            free_env(&se2);
+            res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
     free_env(&se);
@@ -2896,7 +2923,7 @@ static jl_value_t *intersect_type_type(jl_value_t *x, jl_value_t *y, jl_stenv_t
 
 // cmp <= 0: is x already <= y in this environment
 // cmp >= 0: is x already >= y in this environment
-static int compareto_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e, int cmp)
+static int compareto_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e, int cmp) JL_NOTSAFEPOINT
 {
     if (x == (jl_value_t*)y)
         return 1;
@@ -2916,26 +2943,13 @@ static int compareto_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e, int cmp)
 // Check whether the environment already asserts x <: y via recorded bounds.
 // This is used to avoid adding redundant constraints that lead to cycles.
 // Note this is a semi-predicate: 1 => is a subtype, 0 => unknown
-static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
+static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT
 {
     if (!jl_is_typevar(x) || !jl_is_typevar(y))
         return 0;
     return compareto_var(x, (jl_tvar_t*)y, e, -1) || compareto_var(y, (jl_tvar_t*)x, e, 1);
 }
 
-// See if var y is reachable from x via bounds; used to avoid cycles.
-static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
-{
-    if (x == (jl_value_t*)y)
-        return 1;
-    if (!jl_is_typevar(x))
-        return 0;
-    jl_varbinding_t *xv = lookup(e, (jl_tvar_t*)x);
-    if (xv == NULL)
-        return 0;
-    return reachable_var(xv->ub, y, e) || reachable_var(xv->lb, y, e);
-}
-
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -3009,14 +3023,13 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 jl_value_t *ub=NULL, *lb=NULL;
                 JL_GC_PUSH2(&lb, &ub);
                 ub = intersect_aside(xub, yub, e, 0, xx ? xx->depth0 : 0);
-                if (xlb == y)
+                if (reachable_var(xlb, (jl_tvar_t*)y, e))
                     lb = ylb;
                 else
                     lb = simple_join(xlb, ylb);
                 if (yy) {
-                    if (!subtype_by_bounds(lb, y, e))
-                        yy->lb = lb;
-                    if (!subtype_by_bounds(y, ub, e))
+                    yy->lb = lb;
+                    if (!reachable_var(ub, (jl_tvar_t*)y, e))
                         yy->ub = ub;
                     assert(yy->ub != y);
                     assert(yy->lb != y);
@@ -3152,7 +3165,7 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     e->Runions.depth = 0;
     e->Runions.more = 0;
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     jl_value_t **is;
     JL_GC_PUSHARGS(is, 3);
     jl_value_t **saved = &is[2];
@@ -3169,11 +3182,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         save_env(e, saved, &se);
     }
     while (e->Runions.more) {
-        if (e->emptiness_only && ii != jl_bottom_type) {
-            free_env(&se);
-            JL_GC_POP();
-            return ii;
-        }
+        if (e->emptiness_only && ii != jl_bottom_type)
+            break;
         e->Runions.depth = 0;
         int set = e->Runions.more - 1;
         e->Runions.more = 0;
@@ -3202,9 +3212,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
         total_iter++;
         if (niter > 3 || total_iter > 400000) {
-            free_env(&se);
-            JL_GC_POP();
-            return y;
+            ii = y;
+            break;
         }
     }
     free_env(&se);
diff --git a/src/support/END.h b/src/support/END.h
index 090bbc02eeb1c..3a7c3ac00a9ac 100644
--- a/src/support/END.h
+++ b/src/support/END.h
@@ -36,18 +36,13 @@
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__ELF__)
 .size CNAME, . - CNAME
 #else
-#ifdef _MSC_VER
-CNAME endp
-#else
 #ifdef _WIN64
 .seh_endproc
 #endif
 #endif
-#endif
 
 
 #undef CNAME
-#undef HIDENAME
 #undef STR
 #undef XSTR
 #undef _START_ENTRY
diff --git a/src/support/ENTRY.amd64.h b/src/support/ENTRY.amd64.h
index b8049f0711f89..d4decb98e973a 100644
--- a/src/support/ENTRY.amd64.h
+++ b/src/support/ENTRY.amd64.h
@@ -41,7 +41,6 @@
 #define EXT_(csym)          csym
 #define EXT(csym)           EXT_(csym)
 #endif
-#define HIDENAME(asmsym)    .asmsym
 .text
 _START_ENTRY
 .globl EXT(CNAME)
@@ -51,9 +50,7 @@ EXT(CNAME):
 #elif defined(_WIN32)
 #define EXT_(csym)          csym
 #define EXT(csym)           EXT_(csym)
-#define HIDENAME(asmsym)    .asmsym
 
-#ifndef _MSC_VER
 .intel_syntax noprefix
 .text
 _START_ENTRY
@@ -69,9 +66,5 @@ _START_ENTRY
 .seh_proc EXT(CNAME)
 EXT(CNAME):
 .seh_endprologue
-#else
-.code
-CNAME proc
-#endif
 
 #endif
diff --git a/src/support/ENTRY.i387.h b/src/support/ENTRY.i387.h
index d80038671247a..7a857f22f855b 100644
--- a/src/support/ENTRY.i387.h
+++ b/src/support/ENTRY.i387.h
@@ -41,7 +41,6 @@
 #define EXT_(csym)          csym
 #define EXT(csym)           EXT_(csym)
 #endif
-#define HIDENAME(asmsym)    .asmsym
 .text
 _START_ENTRY
 .globl EXT(CNAME)
@@ -51,9 +50,7 @@ EXT(CNAME):
 #elif defined(_WIN32)
 #define EXT_(csym)          _##csym
 #define EXT(csym)           EXT_(csym)
-#define HIDENAME(asmsym)    .asmsym
 
-#ifndef _MSC_VER
 .intel_syntax
 .text
 _START_ENTRY
@@ -66,11 +63,5 @@ _START_ENTRY
 .type 32
 .endef
 EXT(CNAME):
-#else
-.586
-.model small,C
-.code
-CNAME proc
-#endif
 
 #endif
diff --git a/src/support/Makefile b/src/support/Makefile
index 1ccfdeed3f3da..6083823e95408 100644
--- a/src/support/Makefile
+++ b/src/support/Makefile
@@ -11,16 +11,13 @@ JLDFLAGS += $(LDFLAGS)
 SRCS := hashing timefuncs ptrhash operators utf8 ios htable bitvector \
 	int2str libsupportinit arraylist strtod
 ifeq ($(OS),WINNT)
-SRCS += asprintf strptime win32_ucontext
+SRCS += asprintf strptime
 ifeq ($(ARCH),i686)
 SRCS += _setjmp.win32
 else ifeq ($(ARCH),x86_64)
 SRCS += _setjmp.win64
 endif
 endif
-ifeq ($(USEMSVC), 1)
-SRCS += dirname
-endif
 
 HEADERS := $(wildcard *.h) $(LIBUV_INC)/uv.h
 
@@ -28,10 +25,8 @@ OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
 FLAGS := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
-ifneq ($(USEMSVC), 1)
 FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
-endif
 
 DEBUGFLAGS += $(FLAGS)
 SHIPFLAGS += $(FLAGS)
@@ -45,19 +40,10 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@)
-ifneq ($(USEMSVC), 1)
 $(BUILDDIR)/%.o: $(SRCDIR)/%.S | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(SHIPFLAGS) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.S | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(DEBUGFLAGS) -c $< -o $@)
-else
-$(BUILDDIR)/%.o: $(SRCDIR)/%.S | $(BUILDDIR)
-	@$(call PRINT_CC, $(CPP) -P $(JCPPFLAGS) $(SHIPFLAGS) $<)
-	@$(call PRINT_CC, $(AS) $(JCPPFLAGS) $(SHIPFLAGS) -Fo $@ -c $*.i)
-$(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.S | $(BUILDDIR)
-	@$(call PRINT_CC, $(CPP) -P $(JCPPFLAGS) $(DEBUGFLAGS) $<)
-	@$(call PRINT_CC, $(AS) $(JCPPFLAGS) $(DEBUGFLAGS) -Fo $@ -c $*.i)
-endif
 
 $(BUILDDIR)/host/Makefile:
 	mkdir -p $(BUILDDIR)/host
diff --git a/src/support/MurmurHash3.c b/src/support/MurmurHash3.c
index 94069eab02732..fce7351f90ffe 100644
--- a/src/support/MurmurHash3.c
+++ b/src/support/MurmurHash3.c
@@ -12,23 +12,6 @@
 //-----------------------------------------------------------------------------
 // Platform-specific functions and macros
 
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE    __forceinline
-
-#include <stdlib.h>
-
-#define ROTL32(x,y)     _rotl(x,y)
-#define ROTL64(x,y)     _rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else   // defined(_MSC_VER)
-
 #define FORCE_INLINE inline __attribute__((always_inline))
 
 static inline uint32_t rotl32 ( uint32_t x, int8_t r )
@@ -46,8 +29,6 @@ static inline uint64_t rotl64 ( uint64_t x, int8_t r )
 
 #define BIG_CONSTANT(x) (x##LLU)
 
-#endif // !defined(_MSC_VER)
-
 //-----------------------------------------------------------------------------
 // Finalization mix - force all bits of a hash block to avalanche
 
diff --git a/src/support/_setjmp.win32.S b/src/support/_setjmp.win32.S
index 441872dd4261a..33ed50ed3deab 100644
--- a/src/support/_setjmp.win32.S
+++ b/src/support/_setjmp.win32.S
@@ -56,8 +56,10 @@
  * and update fs:[0xEOC] to contain the address of the stack
  */
 
-#define CNAME jl_setjmp
+#define CNAME ijl_setjmp
 #include "ENTRY.i387.h"
+.globl _jl_setjmp
+_jl_setjmp:
     mov    eax,DWORD PTR [esp+4] // arg 1
     mov    edx,DWORD PTR [esp+0] // rta
     mov    DWORD PTR [eax+0],ebp
@@ -73,8 +75,10 @@
 #include "END.h"
 
 
-#define CNAME jl_longjmp
+#define CNAME ijl_longjmp
 #include "ENTRY.i387.h"
+.globl _jl_longjmp
+_jl_longjmp:
     mov    edx,DWORD PTR [esp+4] // arg 1
     mov    eax,DWORD PTR [esp+8] // arg 2
     mov    ebp,DWORD PTR [edx+24] // seh registration
@@ -87,14 +91,16 @@
     mov    ebp,DWORD PTR [edx+0]
     mov    DWORD PTR [esp],ecx
     test   eax,eax
-    jne    a
+    jne    1f
     inc    eax
-a:  ret    // jmp ecx
+1:  ret    // jmp ecx
 #include "END.h"
 
 
-#define CNAME jl_swapcontext
+#define CNAME ijl_swapcontext
 #include "ENTRY.i387.h"
+.globl _jl_swapcontext
+_jl_swapcontext:
     mov    eax,DWORD PTR [esp+4]
     // save stack registers
     mov    edx,DWORD PTR fs:[8] // stack top (low)
@@ -118,8 +124,10 @@ a:  ret    // jmp ecx
 #include "END.h"
 
 
-#define CNAME jl_setcontext
+#define CNAME ijl_setcontext
 #include "ENTRY.i387.h"
+.globl _jl_setcontext
+_jl_setcontext:
     mov    eax,DWORD PTR [esp+4]
     // restore stack registers
     mov    edx,DWORD PTR [eax+0]
diff --git a/src/support/_setjmp.win64.S b/src/support/_setjmp.win64.S
index cb512cfe4ab3e..f5e5c69c7cff3 100644
--- a/src/support/_setjmp.win64.S
+++ b/src/support/_setjmp.win64.S
@@ -6,8 +6,10 @@
  * and update gs:[0x1478] to contain the address of the stack
  */
 
-#define CNAME jl_setjmp
+#define CNAME ijl_setjmp
 #include "ENTRY.amd64.h"
+.globl jl_setjmp
+jl_setjmp:
     mov    rdx,QWORD PTR [rsp] // rta
     mov    rax,QWORD PTR gs:[0] // SEH
     mov    QWORD PTR [rcx+0],rax
@@ -37,8 +39,10 @@
 #include "END.h"
 
 
-#define CNAME jl_longjmp
+#define CNAME ijl_longjmp
 #include "ENTRY.amd64.h"
+.globl jl_longjmp
+jl_longjmp:
     mov    rax,QWORD PTR [rcx+0]
     mov    rbx,QWORD PTR [rcx+8]
     mov    rsp,QWORD PTR [rcx+16]
@@ -63,15 +67,17 @@
     mov    QWORD PTR gs:[0],rax
     mov    eax,edx // move arg2 to return
     test   eax,eax
-    jne    a
+    jne    1f
     inc    eax
-a:  mov    QWORD PTR [rsp],r8
+1:  mov    QWORD PTR [rsp],r8
     ret
 #include "END.h"
 
 
-#define CNAME jl_swapcontext
+#define CNAME ijl_swapcontext
 #include "ENTRY.amd64.h"
+.globl jl_swapcontext
+jl_swapcontext:
     // save stack registers
     mov    r8,QWORD PTR gs:[16] // stack top (low)
     mov    rax,QWORD PTR gs:[8] // stack bottom (high)
@@ -109,8 +115,10 @@ a:  mov    QWORD PTR [rsp],r8
 #include "END.h"
 
 
-#define CNAME jl_setcontext
+#define CNAME ijl_setcontext
 #include "ENTRY.amd64.h"
+.globl jl_setcontext
+jl_setcontext:
     // restore stack registers
     mov    r8,QWORD PTR [rcx+0]
     mov    rax,QWORD PTR [rcx+8]
diff --git a/src/support/analyzer_annotations.h b/src/support/analyzer_annotations.h
index 1579584a572a9..70b5a273953f1 100644
--- a/src/support/analyzer_annotations.h
+++ b/src/support/analyzer_annotations.h
@@ -8,7 +8,7 @@
 #endif
 #define JL_NONNULL _Nonnull
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 #define JL_PROPAGATES_ROOT __attribute__((annotate("julia_propagates_root")))
 #define JL_NOTSAFEPOINT __attribute__((annotate("julia_not_safepoint")))
diff --git a/src/support/arraylist.c b/src/support/arraylist.c
index 343ee59ab6540..230c4ed3a16f5 100644
--- a/src/support/arraylist.c
+++ b/src/support/arraylist.c
@@ -104,7 +104,7 @@ void small_arraylist_free(small_arraylist_t *a)
     a->items = &a->_space[0];
 }
 
-void small_arraylist_grow(small_arraylist_t *a, uint32_t n)
+JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n)
 {
     size_t len = a->len;
     size_t newlen = len + n;
diff --git a/src/support/arraylist.h b/src/support/arraylist.h
index f996fb397c6e0..03bfd45f8f525 100644
--- a/src/support/arraylist.h
+++ b/src/support/arraylist.h
@@ -39,7 +39,7 @@ void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
 
 void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
 void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
-void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/support/dirpath.h b/src/support/dirpath.h
index 0657cc26de0b1..36b24d0001c78 100644
--- a/src/support/dirpath.h
+++ b/src/support/dirpath.h
@@ -6,7 +6,7 @@
 #ifdef _OS_WINDOWS_
 #define PATHSEPSTRING "\\"
 #define PATHLISTSEPSTRING ";"
-#if defined(_MSC_VER) || defined(_COMPILER_CLANG_)
+#if defined(_COMPILER_CLANG_)
 #define PATH_MAX MAX_PATH
 #endif
 #else
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index 46780e8e64d4a..2df897c7ba554 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -10,11 +10,7 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <stdio.h>
-#if defined(_COMPILER_INTEL_)
-#include <mathimf.h>
-#else
-#include <math.h>
-#endif
+#include <math.h> // NAN and INF constants
 
 #include "platform.h"
 #include "analyzer_annotations.h"
@@ -28,6 +24,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
 
 #if !defined(_COMPILER_GCC_)
 
@@ -76,17 +74,6 @@
 #define JL_DLLIMPORT
 #endif
 
-/*
- * Debug builds include `-fstack-protector`, which adds a bit of extra prologue to
- * functions, even naked ones.  We don't want that, but we also don't want the
- * compiler warnings when `no_stack_protector` has no effect.
- */
-#ifdef JL_DEBUG_BUILD
-#define JL_NAKED __attribute__ ((naked,no_stack_protector))
-#else
-#define JL_NAKED __attribute__ ((naked))
-#endif
-
 #ifdef _OS_LINUX_
 #include <endian.h>
 #define LITTLE_ENDIAN  __LITTLE_ENDIAN
@@ -119,13 +106,7 @@
 #define LLT_REALLOC(p,n) realloc((p),(n))
 #define LLT_FREE(x) free(x)
 
-#if defined(_OS_WINDOWS_) && defined(_COMPILER_INTEL_)
-#  define STATIC_INLINE static
-#elif defined(_OS_WINDOWS_) && defined(_COMPILER_MICROSOFT_)
-#  define STATIC_INLINE static __inline
-#else
-#  define STATIC_INLINE static inline
-#endif
+#define STATIC_INLINE static inline
 
 #if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
 #  define NOINLINE __declspec(noinline)
@@ -135,13 +116,7 @@
 #  define NOINLINE_DECL(f) f __attribute__((noinline))
 #endif
 
-#ifdef _COMPILER_MICROSOFT_
-# ifdef _P64
-#  define JL_ATTRIBUTE_ALIGN_PTRSIZE(x) __declspec(align(8)) x
-# else
-#  define JL_ATTRIBUTE_ALIGN_PTRSIZE(x) __declspec(align(4)) x
-# endif
-#elif defined(__GNUC__)
+#if defined(__GNUC__)
 #  define JL_ATTRIBUTE_ALIGN_PTRSIZE(x) x __attribute__ ((aligned (sizeof(void*))))
 #else
 #  define JL_ATTRIBUTE_ALIGN_PTRSIZE(x)
@@ -159,20 +134,6 @@
                 __builtin_assume(!!(cond_));            \
                 cond_;                                  \
             }))
-#elif defined(_COMPILER_MICROSOFT_) && defined(__cplusplus)
-template<typename T>
-static inline T
-jl_assume(T v)
-{
-    __assume(!!v);
-    return v;
-}
-#elif defined(_COMPILER_INTEL_)
-#define jl_assume(cond) (__extension__ ({               \
-                __typeof__(cond) cond_ = (cond);        \
-                __assume(!!(cond_));                    \
-                cond_;                                  \
-            }))
 #elif defined(__GNUC__)
 static inline void jl_assume_(int cond)
 {
@@ -191,12 +152,6 @@ static inline void jl_assume_(int cond)
 
 #if jl_has_builtin(__builtin_assume_aligned) || defined(_COMPILER_GCC_)
 #define jl_assume_aligned(ptr, align) __builtin_assume_aligned(ptr, align)
-#elif defined(_COMPILER_INTEL_)
-#define jl_assume_aligned(ptr, align) (__extension__ ({         \
-                __typeof__(ptr) ptr_ = (ptr);                   \
-                __assume_aligned(ptr_, align);                  \
-                ptr_;                                           \
-            }))
 #elif defined(__GNUC__)
 #define jl_assume_aligned(ptr, align) (__extension__ ({         \
                 __typeof__(ptr) ptr_ = (ptr);                   \
@@ -360,16 +315,12 @@ STATIC_INLINE void jl_store_unaligned_i16(void *ptr, uint16_t val) JL_NOTSAFEPOI
     memcpy(ptr, &val, 2);
 }
 
-#ifdef _OS_WINDOWS_
-#include <errhandlingapi.h>
-#endif
-
 STATIC_INLINE void *malloc_s(size_t sz) JL_NOTSAFEPOINT {
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    void *p = malloc(sz);
+    void *p = malloc(sz == 0 ? 1 : sz);
     if (p == NULL) {
         perror("(julia) malloc");
         abort();
@@ -386,7 +337,7 @@ STATIC_INLINE void *realloc_s(void *p, size_t sz) JL_NOTSAFEPOINT {
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    p = realloc(p, sz);
+    p = realloc(p, sz == 0 ? 1 : sz);
     if (p == NULL) {
         perror("(julia) realloc");
         abort();
diff --git a/src/support/hashing.h b/src/support/hashing.h
index 8686c746f4898..bed688e94f5b2 100644
--- a/src/support/hashing.h
+++ b/src/support/hashing.h
@@ -12,9 +12,9 @@ extern "C" {
 #endif
 
 uint_t nextipow2(uint_t i) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint32_t int32hash(uint32_t a) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint64_t int64hash(uint64_t key) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint32_t int64to32hash(uint64_t key) JL_NOTSAFEPOINT;
+uint32_t int32hash(uint32_t a) JL_NOTSAFEPOINT;
+uint64_t int64hash(uint64_t key) JL_NOTSAFEPOINT;
+uint32_t int64to32hash(uint64_t key) JL_NOTSAFEPOINT;
 #ifdef _P64
 #define inthash int64hash
 #else
diff --git a/src/support/htable.inc b/src/support/htable.inc
index fa59624a4998f..7a9be2514e2f0 100644
--- a/src/support/htable.inc
+++ b/src/support/htable.inc
@@ -13,67 +13,77 @@
 static void **HTNAME##_lookup_bp_r(htable_t *h, void *key, void *ctx)   \
 {                                                                       \
     uint_t hv;                                                          \
-    size_t i, orig, index, iter;                                        \
+    size_t i, orig, index, iter, empty_slot;                            \
     size_t newsz, sz = hash_size(h);                                    \
     size_t maxprobe = max_probe(sz);                                    \
     void **tab = h->table;                                              \
     void **ol;                                                          \
                                                                         \
     hv = HFUNC((uintptr_t)key, ctx);                                    \
- retry_bp:                                                              \
-    iter = 0;                                                           \
-    index = (size_t)(hv & (sz-1)) * 2;                                  \
-    sz *= 2;                                                            \
-    orig = index;                                                       \
-                                                                        \
-    do {                                                                \
-        if (tab[index+1] == HT_NOTFOUND) {                              \
-            tab[index] = key;                                           \
-            return &tab[index+1];                                       \
+    while (1) {                                                         \
+        iter = 0;                                                       \
+        index = (size_t)(hv & (sz-1)) * 2;                              \
+        sz *= 2;                                                        \
+        orig = index;                                                   \
+        empty_slot = -1;                                                \
+                                                                        \
+        do {                                                            \
+            if (tab[index] == HT_NOTFOUND) {                            \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+                break;                                                  \
+            }                                                           \
+            if (tab[index+1] == HT_NOTFOUND) {                          \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+            }                                                           \
+                                                                        \
+            if (EQFUNC(key, tab[index], ctx))                           \
+                return &tab[index+1];                                   \
+                                                                        \
+            index = (index+2) & (sz-1);                                 \
+            iter++;                                                     \
+            if (iter > maxprobe)                                        \
+                break;                                                  \
+        } while (index != orig);                                        \
+                                                                        \
+        if (empty_slot != -1) {                                         \
+            tab[empty_slot] = key;                                      \
+            return &tab[empty_slot+1];                                  \
         }                                                               \
                                                                         \
-        if (EQFUNC(key, tab[index], ctx))                               \
-            return &tab[index+1];                                       \
-                                                                        \
-        index = (index+2) & (sz-1);                                     \
-        iter++;                                                         \
-        if (iter > maxprobe)                                            \
-            break;                                                      \
-    } while (index != orig);                                            \
-                                                                        \
-    /* table full */                                                    \
-    /* quadruple size, rehash, retry the insert */                      \
-    /* it's important to grow the table really fast; otherwise we waste */ \
-    /* lots of time rehashing all the keys over and over. */            \
-    sz = h->size;                                                       \
-    ol = h->table;                                                      \
-    if (sz < HT_N_INLINE)                                              \
-        newsz = HT_N_INLINE;                                            \
-    else if (sz >= (1<<19) || (sz <= (1<<8)))                           \
-        newsz = sz<<1;                                                  \
-    else                                                                \
-        newsz = sz<<2;                                                  \
-    /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
-    tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                       \
-    if (tab == NULL)                                                    \
-        return NULL;                                                    \
-    for(i=0; i < newsz; i++)                                            \
-        tab[i] = HT_NOTFOUND;                                           \
-    h->table = tab;                                                     \
-    h->size = newsz;                                                    \
-    for(i=0; i < sz; i+=2) {                                            \
-        if (ol[i+1] != HT_NOTFOUND) {                                   \
-            (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];           \
+        /* table full */                                                \
+        /* quadruple size, rehash, retry the insert */                  \
+        /* it's important to grow the table really fast; otherwise we waste */ \
+        /* lots of time rehashing all the keys over and over. */        \
+        sz = h->size;                                                   \
+        ol = h->table;                                                  \
+        if (sz < HT_N_INLINE)                                           \
+            newsz = HT_N_INLINE;                                        \
+        else if (sz >= (1<<19) || (sz <= (1<<8)))                       \
+            newsz = sz<<1;                                              \
+        else                                                            \
+            newsz = sz<<2;                                              \
+        /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
+        tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                   \
+        if (tab == NULL)                                                \
+            return NULL;                                                \
+        for (i = 0; i < newsz; i++)                                     \
+            tab[i] = HT_NOTFOUND;                                       \
+        h->table = tab;                                                 \
+        h->size = newsz;                                                \
+        for (i = 0; i < sz; i += 2) {                                   \
+            if (ol[i+1] != HT_NOTFOUND) {                               \
+                (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];       \
+            }                                                           \
         }                                                               \
-    }                                                                   \
-    if (ol != &h->_space[0])                                            \
-        LLT_FREE(ol);                                                   \
+        if (ol != &h->_space[0])                                        \
+            LLT_FREE(ol);                                               \
                                                                         \
-    sz = hash_size(h);                                                  \
-    maxprobe = max_probe(sz);                                           \
-    tab = h->table;                                                     \
-                                                                        \
-    goto retry_bp;                                                      \
+        sz = hash_size(h);                                              \
+        maxprobe = max_probe(sz);                                       \
+        tab = h->table;                                                 \
+    }                                                                   \
                                                                         \
     return NULL;                                                        \
 }                                                                       \
diff --git a/src/support/ios.c b/src/support/ios.c
index 4ab093ff40e78..c0f1c92572b78 100644
--- a/src/support/ios.c
+++ b/src/support/ios.c
@@ -1105,9 +1105,12 @@ int ios_ungetc(int c, ios_t *s)
 {
     if (s->state == bst_wr)
         return IOS_EOF;
+    if (c == '\n') s->lineno--;
+    if (s->u_colno > 0) s->u_colno--;
     if (s->bpos > 0) {
         s->bpos--;
-        s->buf[s->bpos] = (char)c;
+        if (s->buf[s->bpos] != (char)c)
+            s->buf[s->bpos] = (char)c;
         s->_eof = 0;
         return c;
     }
@@ -1129,11 +1132,14 @@ int ios_getutf8(ios_t *s, uint32_t *pwc)
     char c0;
     char buf[8];
 
-    c = ios_getc(s);
-    if (c == IOS_EOF)
+    c = ios_peekc(s);
+    if (c == IOS_EOF) {
+        s->_eof = 1;
         return IOS_EOF;
+    }
     c0 = (char)c;
     if ((unsigned char)c0 < 0x80) {
+        (void)ios_getc(s); // consume peeked char, increment lineno
         *pwc = (uint32_t)(unsigned char)c0;
         if (c == '\n')
             s->u_colno = 0;
@@ -1141,13 +1147,12 @@ int ios_getutf8(ios_t *s, uint32_t *pwc)
             s->u_colno += utf8proc_charwidth(*pwc);
         return 1;
     }
-    if (ios_ungetc(c, s) == IOS_EOF)
-        return IOS_EOF;
     sz = u8_seqlen(&c0);
     if (!isutf(c0) || sz > 4)
         return 0;
     if (ios_readprep(s, sz) < sz)
-        // NOTE: this can return EOF even if some bytes are available
+        // NOTE: this returns EOF even though some bytes are available,
+        // so we do not set s->_eof on this code path
         return IOS_EOF;
     int valid = u8_isvalid(&s->buf[s->bpos], sz);
     if (valid) {
diff --git a/src/support/ios.h b/src/support/ios.h
index 3ba5ab4884284..e5d83ec974a2b 100644
--- a/src/support/ios.h
+++ b/src/support/ios.h
@@ -4,7 +4,6 @@
 #define JL_IOS_H
 
 #include <stdarg.h>
-#include "uv.h"
 #include "analyzer_annotations.h"
 
 #ifdef __cplusplus
@@ -16,7 +15,7 @@ extern "C" {
 // never moves out.
 
 //make it compatible with UV Handles
-typedef enum { bm_none=UV_HANDLE_TYPE_MAX+1, bm_line, bm_block, bm_mem } bufmode_t;
+typedef enum { bm_none=1000, bm_line, bm_block, bm_mem } bufmode_t;
 typedef enum { bst_none, bst_rd, bst_wr } bufstate_t;
 
 #define IOS_INLSIZE 54
diff --git a/src/support/libsupport.h b/src/support/libsupport.h
index 880c8560cd23c..043a1e6a426f9 100644
--- a/src/support/libsupport.h
+++ b/src/support/libsupport.h
@@ -8,15 +8,8 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include "dtypes.h"
-#include "utils.h"
 #include "utf8.h"
 #include "ios.h"
-#include "timefuncs.h"
-#include "hashing.h"
-#include "ptrhash.h"
-#include "bitvector.h"
-#include "dirpath.h"
-#include "strtod.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/support/platform.h b/src/support/platform.h
index 1bb46d3bc648c..bb960f54d3c4e 100644
--- a/src/support/platform.h
+++ b/src/support/platform.h
@@ -14,8 +14,6 @@
  *      Compiler:
  *          _COMPILER_CLANG_
  *          _COMPILER_GCC_
- *          _COMPILER_INTEL_
- *          _COMPILER_MICROSOFT_
  *      OS:
  *          _OS_FREEBSD_
  *          _OS_LINUX_
@@ -35,20 +33,32 @@
 *                               Compiler                                       *
 *******************************************************************************/
 
-/*
- * Note: Checking for Intel's compiler should be done before checking for
- * Microsoft's. On Windows Intel's compiler also defines _MSC_VER as the
- * acknowledgement of the fact that it is integrated with Visual Studio.
- */
 #if defined(__clang__)
 #define _COMPILER_CLANG_
-#elif defined(__INTEL_COMPILER) || defined(__ICC)
-#define _COMPILER_INTEL_
-#elif defined(_MSC_VER)
-#define _COMPILER_MICROSOFT_
 #elif defined(__GNUC__)
 #define _COMPILER_GCC_
+#else
+#error Unsupported compiler
+#endif
+
+#if defined(__has_feature) // Clang flavor
+#if __has_feature(address_sanitizer)
+#define _COMPILER_ASAN_ENABLED_
+#endif
+#if __has_feature(memory_sanitizer)
+#define _COMPILER_MSAN_ENABLED_
+#endif
+#if __has_feature(thread_sanitizer)
+#if __clang_major__ < 11
+#error Thread sanitizer runtime libraries in clang < 11 leak memory and cannot be used
+#endif
+#define _COMPILER_TSAN_ENABLED_
+#endif
+#else // GCC flavor
+#if defined(__SANITIZE_ADDRESS__)
+#define _COMPILER_ASAN_ENABLED_
 #endif
+#endif // __has_feature
 
 /*******************************************************************************
 *                               OS                                             *
diff --git a/src/support/timefuncs.c b/src/support/timefuncs.c
index 1a8a8ebbd9b7b..b353ce8f49cec 100644
--- a/src/support/timefuncs.c
+++ b/src/support/timefuncs.c
@@ -6,7 +6,6 @@
 
 #if defined(_OS_WINDOWS_)
 #include <sys/timeb.h>
-#include <windows.h>
 #else
 #include <sys/time.h>
 #include <sys/select.h>
diff --git a/src/support/utils.h b/src/support/utils.h
index 4d77a1bb9fb9b..b7e9de2cfdb79 100644
--- a/src/support/utils.h
+++ b/src/support/utils.h
@@ -7,7 +7,7 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base);
+char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base);
 int str2int(char *str, size_t len, int64_t *res, uint32_t base);
 int isdigit_base(char c, int base);
 
@@ -33,14 +33,6 @@ int cmp_eq(void *a, numerictype_t atag, void *b, numerictype_t btag,
 #define bswap_16(x) __builtin_bswap16(x)
 #define bswap_32(x) __builtin_bswap32(x)
 #define bswap_64(x) __builtin_bswap64(x)
-#elif defined(_MSC_VER)
-#define bswap_16(x) _byteswap_ushort(x)
-#define bswap_32(x) _byteswap_ulong(x)
-#define bswap_64(x) _byteswap_uint64(x)
-#elif defined(__INTEL_COMPILER)
-#define bswap_16(x) _bswap16(x)
-#define bswap_32(x) _bswap(x)
-#define bswap_64(x) _bswap64(x)
 #else
 #define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
 #define bswap_32(x) \
diff --git a/src/symbol.c b/src/symbol.c
index fe8e975f8f525..14606c82b9778 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -15,7 +15,7 @@
 extern "C" {
 #endif
 
-static jl_sym_t *symtab = NULL;
+static _Atomic(jl_sym_t*) symtab = NULL;
 
 #define MAX_SYM_LEN ((size_t)INTPTR_MAX - sizeof(jl_taggedvalue_t) - sizeof(jl_sym_t) - 1)
 
@@ -41,16 +41,17 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
     sym = (jl_sym_t*)jl_valueof(tag);
     // set to old marked so that we won't look at it in the GC or write barrier.
     tag->header = ((uintptr_t)jl_symbol_type) | GC_OLD_MARKED;
-    sym->left = sym->right = NULL;
+    jl_atomic_store_relaxed(&sym->left, NULL);
+    jl_atomic_store_relaxed(&sym->right, NULL);
     sym->hash = hash_symbol(str, len);
     memcpy(jl_symbol_name(sym), str, len);
     jl_symbol_name(sym)[len] = 0;
     return sym;
 }
 
-static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl_sym_t ***slot) JL_NOTSAFEPOINT
+static jl_sym_t *symtab_lookup(_Atomic(jl_sym_t*) *ptree, const char *str, size_t len, _Atomic(jl_sym_t*) **slot) JL_NOTSAFEPOINT
 {
-    jl_sym_t *node = jl_atomic_load_acquire(ptree); // consume
+    jl_sym_t *node = jl_atomic_load_relaxed(ptree); // consume
     uintptr_t h = hash_symbol(str, len);
 
     // Tree nodes sorted by major key of (int(hash)) and minor key of (str).
@@ -68,7 +69,7 @@ static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl
             ptree = &node->left;
         else
             ptree = &node->right;
-        node = jl_atomic_load_acquire(ptree); // consume
+        node = jl_atomic_load_relaxed(ptree); // consume
     }
     if (slot != NULL)
         *slot = ptree;
@@ -77,25 +78,25 @@ static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl
 
 jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT // (or throw)
 {
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
     // Hide the error throwing from the analyser since there isn't a way to express
     // "safepoint only when throwing error" currently.
     if (len > MAX_SYM_LEN)
         jl_exceptionf(jl_argumenterror_type, "Symbol name too long");
 #endif
     assert(!memchr(str, 0, len));
-    jl_sym_t **slot;
+    _Atomic(jl_sym_t*) *slot;
     jl_sym_t *node = symtab_lookup(&symtab, str, len, &slot);
     if (node == NULL) {
-        JL_LOCK_NOGC(&gc_perm_lock);
+        uv_mutex_lock(&gc_perm_lock);
         // Someone might have updated it, check and look up again
-        if (*slot != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
-            JL_UNLOCK_NOGC(&gc_perm_lock);
+        if (jl_atomic_load_relaxed(slot) != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
+            uv_mutex_unlock(&gc_perm_lock);
             return node;
         }
         node = mk_symbol(str, len);
         jl_atomic_store_release(slot, node);
-        JL_UNLOCK_NOGC(&gc_perm_lock);
+        uv_mutex_unlock(&gc_perm_lock);
     }
     return node;
 }
@@ -119,12 +120,12 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len)
 
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void)
 {
-    return symtab;
+    return jl_atomic_load_relaxed(&symtab);
 }
 
-static uint32_t gs_ctr = 0;  // TODO: per-thread
-uint32_t jl_get_gs_ctr(void) { return gs_ctr; }
-void jl_set_gs_ctr(uint32_t ctr) { gs_ctr = ctr; }
+static _Atomic(uint32_t) gs_ctr = 0;  // TODO: per-module?
+uint32_t jl_get_gs_ctr(void) { return jl_atomic_load_relaxed(&gs_ctr); }
+void jl_set_gs_ctr(uint32_t ctr) { jl_atomic_store_relaxed(&gs_ctr, ctr); }
 
 JL_DLLEXPORT jl_sym_t *jl_gensym(void)
 {
diff --git a/src/sys.c b/src/sys.c
index da5b7dabbc7f6..70dde30dc2089 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -13,7 +13,6 @@
 
 #include "julia.h"
 #include "julia_internal.h"
-#include "llvm-version.h"
 
 #ifdef _OS_WINDOWS_
 #include <psapi.h>
@@ -49,19 +48,12 @@
 #include <xmmintrin.h>
 #endif
 
-#if defined _MSC_VER
-#include <io.h>
-#include <intrin.h>
-#endif
-
-#ifdef JL_MSAN_ENABLED
+#ifdef _COMPILER_MSAN_ENABLED_
 #include <sanitizer/msan_interface.h>
 #endif
 
 #include "julia_assert.h"
 
-#include <llvm-c/Core.h>
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -232,7 +224,25 @@ JL_DLLEXPORT double jl_stat_ctime(char *statbuf)
     return (double)s->st_ctim.tv_sec + (double)s->st_ctim.tv_nsec * 1e-9;
 }
 
-JL_DLLEXPORT int jl_os_get_passwd(uv_passwd_t *pwd, size_t uid)
+JL_DLLEXPORT unsigned long jl_getuid(void)
+{
+#ifdef _OS_WINDOWS_
+    return -1;
+#else
+    return getuid();
+#endif
+}
+
+JL_DLLEXPORT unsigned long jl_geteuid(void)
+{
+#ifdef _OS_WINDOWS_
+    return -1;
+#else
+    return geteuid();
+#endif
+}
+
+JL_DLLEXPORT int jl_os_get_passwd(uv_passwd_t *pwd, unsigned long uid)
 {
 #ifdef _OS_WINDOWS_
   return UV_ENOTSUP;
@@ -345,11 +355,11 @@ JL_DLLEXPORT int jl_os_get_passwd(uv_passwd_t *pwd, size_t uid)
 
 typedef struct jl_group_s {
     char* groupname;
-    long gid;
+    unsigned long gid;
     char** members;
 } jl_group_t;
 
-JL_DLLEXPORT int jl_os_get_group(jl_group_t *grp, size_t gid)
+JL_DLLEXPORT int jl_os_get_group(jl_group_t *grp, unsigned long gid)
 {
 #ifdef _OS_WINDOWS_
   return UV_ENOTSUP;
@@ -587,6 +597,15 @@ typedef DWORD (WINAPI *GAPC)(WORD);
 #endif
 #endif
 
+// Apple's M1 processor is a big.LITTLE style processor, with 4x "performance"
+// cores, and 4x "efficiency" cores.  Because Julia expects to be able to run
+// things like heavy linear algebra workloads on all cores, it's best for us
+// to only spawn as many threads as there are performance cores.  Once macOS
+// 12 is released, we'll be able to query the multiple "perf levels" of the
+// cores of a CPU (see this PR [0] to pytorch/cpuinfo for an example) but
+// until it's released, we will just recognize the M1 by its CPU family
+// identifier, then subtract how many efficiency cores we know it has.
+
 JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 {
 #if defined(HW_AVAILCPU) && defined(HW_NCPU)
@@ -599,6 +618,19 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
         sysctl(nm, 2, &count, &len, NULL, 0);
         if (count < 1) { count = 1; }
     }
+
+#if defined(__APPLE__) && defined(_CPU_AARCH64_)
+    // Manually subtract efficiency cores for Apple's big.LITTLE cores
+    int32_t family = 0;
+    len = 4;
+    sysctlbyname("hw.cpufamily", &family, &len, NULL, 0);
+    if (family >= 1 && count > 1) {
+        if (family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
+            // We know the Apple M1 has 4 efficiency cores, so subtract them out.
+            count -= 4;
+        }
+    }
+#endif
     return count;
 #elif defined(_SC_NPROCESSORS_ONLN)
     long count = sysconf(_SC_NPROCESSORS_ONLN);
@@ -651,7 +683,7 @@ JL_DLLEXPORT jl_value_t *jl_environ(int i)
 
 // -- child process status --
 
-#if defined _MSC_VER || defined _OS_WINDOWS_
+#if defined _OS_WINDOWS_
 /* Native Woe32 API.  */
 #include <process.h>
 #define waitpid(pid,statusp,options) _cwait (statusp, pid, WAIT_CHILD)
@@ -796,7 +828,7 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 
     struct link_map *map;
     dlinfo(handle, RTLD_DI_LINKMAP, &map);
-#ifdef JL_MSAN_ENABLED
+#ifdef _COMPILER_MSAN_ENABLED_
     __msan_unpoison(&map,sizeof(struct link_map*));
     if (map) {
         __msan_unpoison(map, sizeof(struct link_map));
@@ -828,12 +860,11 @@ JL_DLLEXPORT int jl_dllist(jl_array_t *list)
     } while (cb < cbNeeded);
     for (i = 0; i < cbNeeded / sizeof(HMODULE); i++) {
         const char *path = jl_pathname_for_handle(hMods[i]);
-        // XXX: change to jl_arrayset if array storage allocation for Array{String,1} changes:
         if (path == NULL)
             continue;
         jl_array_grow_end((jl_array_t*)list, 1);
         jl_value_t *v = jl_cstr_to_string(path);
-        free(path);
+        free((char*)path);
         jl_array_ptr_set(list, jl_array_dim0(list) - 1, v);
     }
     free(hMods);
@@ -885,37 +916,6 @@ JL_DLLEXPORT size_t jl_maxrss(void)
 #endif
 }
 
-JL_DLLEXPORT int jl_threading_enabled(void)
-{
-    return 1;
-}
-
-JL_DLLEXPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT {
-#if defined(_OS_WINDOWS_)
-    HMODULE mod;
-    // FIXME: GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS on LLVMContextCreate,
-    //        but that just points to libjulia.dll
-#if JL_LLVM_VERSION <= 110000
-    const char* libLLVM = "LLVM";
-#else
-    const char* libLLVM = "libLLVM";
-#endif
-
-    if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, libLLVM, &mod))
-        return jl_nothing;
-
-    char path[MAX_PATH];
-    if (!GetModuleFileNameA(mod, path, sizeof(path)))
-        return jl_nothing;
-    return (jl_value_t*) jl_symbol(path);
-#else
-    Dl_info dli;
-    if (!dladdr(LLVMContextCreate, &dli))
-        return jl_nothing;
-    return (jl_value_t*) jl_symbol(dli.dli_fname);
-#endif
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/task.c b/src/task.c
index 43ab8db89cfee..9f55b9b64a833 100644
--- a/src/task.c
+++ b/src/task.c
@@ -29,19 +29,19 @@
 #include <stdlib.h>
 #include <string.h>
 #include <signal.h>
+#include <unistd.h>
 #include <errno.h>
 #include <inttypes.h>
 #include "julia.h"
 #include "julia_internal.h"
 #include "threading.h"
 #include "julia_assert.h"
-#include "support/hashing.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#if defined(JL_ASAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_)
 static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size) {
     __sanitizer_start_switch_fiber(NULL, bottom, size);
 }
@@ -53,22 +53,58 @@ static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size)
 static inline void sanitizer_finish_switch_fiber(void) {}
 #endif
 
-#if defined(JL_TSAN_ENABLED)
-static inline void tsan_destroy_ctx(jl_ptls_t ptls, void *state) {
-    if (state != &ptls->root_task->state) {
-        __tsan_destroy_fiber(ctx->state);
-    }
-    ctx->state = NULL;
-}
-static inline void tsan_switch_to_ctx(void *state)  {
-    __tsan_switch_to_fiber(state, 0);
-}
+#if defined(_COMPILER_TSAN_ENABLED_)
+// must defined as macros, since the function containing them must not return before the longjmp
+#define tsan_destroy_ctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
+            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
+        } \
+        _tsan_macro_ctx->tsan_state = NULL; \
+    } while (0)
+#define tsan_switch_to_ctx(_ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
+    } while (0)
+#ifdef COPY_STACKS
+#define tsan_destroy_copyctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
+            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
+        } \
+        _tsan_macro_ctx->tsan_state = NULL; \
+    } while (0)
+#define tsan_switch_to_copyctx(_ctx) do { \
+        struct jl_stack_context_t *_tsan_macro_ctx = (_ctx); \
+        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
+    } while (0)
+#endif
+#else
+// just do minimal type-checking on the arguments
+#define tsan_destroy_ctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
+#define tsan_switch_to_ctx(_ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
+#ifdef COPY_STACKS
+#define tsan_destroy_copyctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
+#define tsan_switch_to_copyctx(_ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
+#endif
 #endif
 
 // empirically, jl_finish_task needs about 64k stack space to infer/run
 // and additionally, gc-stack reserves 64k for the guard pages
-#if defined(MINSIGSTKSZ) && MINSIGSTKSZ > 131072
-#define MINSTKSZ MINSIGSTKSZ
+#if defined(MINSIGSTKSZ)
+#define MINSTKSZ (MINSIGSTKSZ > 131072 ? MINSIGSTKSZ : 131072)
 #else
 #define MINSTKSZ 131072
 #endif
@@ -82,8 +118,7 @@ static inline void tsan_switch_to_ctx(void *state)  {
 #define STATIC_OR_JS static
 #endif
 
-extern size_t jl_page_size;
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
 STATIC_OR_JS void jl_set_fiber(jl_ucontext_t *t);
 STATIC_OR_JS void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
 STATIC_OR_JS void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
@@ -150,9 +185,9 @@ static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, cha
 
     sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
 #if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx);
+    jl_setcontext(&t->ctx.copy_ctx);
 #else
-    jl_longjmp(t->copy_stack_ctx.uc_mcontext, 1);
+    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
 #endif
     abort(); // unreachable
 }
@@ -167,34 +202,35 @@ static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
     memcpy_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 #if defined(JL_HAVE_UNW_CONTEXT)
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(&lastt->ctx.ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
 #elif defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK) || defined(_OS_WINDOWS_)
-    if (jl_setjmp(lastt->copy_stack_ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0))
         return;
 #else
 #error COPY_STACKS is incompatible with this platform
 #endif
     sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
+    tsan_switch_to_copyctx(&t->ctx);
 #if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx);
+    jl_setcontext(&t->ctx.copy_ctx);
 #else
-    jl_longjmp(t->copy_stack_ctx.uc_mcontext, 1);
+    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
 #endif
 }
 #endif
 
 /* Rooted by the base module */
-static jl_function_t *task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
+static _Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
 
 void JL_NORETURN jl_finish_task(jl_task_t *t)
 {
     jl_task_t *ct = jl_current_task;
     JL_SIGATOMIC_BEGIN();
-    if (t->_isexception)
+    if (jl_atomic_load_relaxed(&t->_isexception))
         jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED);
     else
         jl_atomic_store_release(&t->_state, JL_TASK_STATE_DONE);
@@ -203,7 +239,7 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
     // ensure that state is cleared
     ct->ptls->in_finalizer = 0;
     ct->ptls->in_pure_callback = 0;
-    ct->world_age = jl_world_counter;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
     // let the runtime know this task is dead and find a new task to run
     jl_function_t *done = jl_atomic_load_relaxed(&task_done_hook_func);
     if (done == NULL) {
@@ -220,7 +256,7 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
             jl_no_exc_handler(jl_current_exception());
         }
     }
-    gc_debug_critical_error();
+    jl_gc_debug_critical_error();
     abort();
 }
 
@@ -239,7 +275,7 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid
     jl_ptls_t ptls2 = task->ptls;
     *ptid = -1;
     if (ptls2) {
-        *ptid = task->tid;
+        *ptid = jl_atomic_load_relaxed(&task->tid);
 #ifdef COPY_STACKS
         if (task->copy_stack) {
             *size = ptls2->stacksize;
@@ -321,7 +357,7 @@ JL_DLLEXPORT jl_task_t *jl_get_next_task(void) JL_NOTSAFEPOINT
     return ct;
 }
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
 const char tsan_state_corruption[] = "TSAN state corrupted. Exiting HARD!\n";
 #endif
 
@@ -334,7 +370,7 @@ static void ctx_switch(jl_task_t *lastt)
     // none of these locks should be held across a task switch
     assert(ptls->locks.len == 0);
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     if (lastt->ctx.tsan_state != __tsan_get_current_fiber()) {
         // Something went really wrong - don't even assume that we can
         // use assert/abort which involve lots of signal handling that
@@ -344,11 +380,11 @@ static void ctx_switch(jl_task_t *lastt)
     }
 #endif
 
-    int killed = lastt->_state != JL_TASK_STATE_RUNNABLE;
+    int killed = jl_atomic_load_relaxed(&lastt->_state) != JL_TASK_STATE_RUNNABLE;
     if (!t->started && !t->copy_stack) {
         // may need to allocate the stack
         if (t->stkbuf == NULL) {
-            t->stkbuf = jl_alloc_fiber(&t->ctx, &t->bufsz, t);
+            t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
             if (t->stkbuf == NULL) {
 #ifdef COPY_STACKS
                 // fall back to stack copying if mmap fails
@@ -356,9 +392,9 @@ static void ctx_switch(jl_task_t *lastt)
                 t->sticky = 1;
                 t->bufsz = 0;
                 if (always_copy_stacks)
-                    memcpy(&t->copy_stack_ctx, &ptls->copy_stack_ctx, sizeof(t->copy_stack_ctx));
+                    memcpy(&t->ctx.copy_ctx, &ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
                 else
-                    memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx));
+                    memcpy(&t->ctx.ctx, &ptls->base_ctx, sizeof(t->ctx.ctx));
 #else
                 jl_throw(jl_memory_exception);
 #endif
@@ -378,7 +414,7 @@ static void ctx_switch(jl_task_t *lastt)
 #ifdef COPY_STACKS
         if (lastt->copy_stack) { // save the old copy-stack
             save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
-            if (jl_setjmp(lastt->copy_stack_ctx.uc_mcontext, 0)) {
+            if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) {
                 sanitizer_finish_switch_fiber();
                 // TODO: mutex unlock the thread we just switched from
                 return;
@@ -387,34 +423,36 @@ static void ctx_switch(jl_task_t *lastt)
         else
 #endif
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
-        lastt->ptls = NULL;
     }
 
     // set up global state for new task and clear global state for old task
     t->ptls = ptls;
-    ptls->current_task = t;
+    jl_atomic_store_relaxed(&ptls->current_task, t);
     JL_GC_PROMISE_ROOTED(t);
+    jl_signal_fence();
+    jl_set_pgcstack(&t->gcstack);
+    jl_signal_fence();
     lastt->ptls = NULL;
 #ifdef MIGRATE_TASKS
     ptls->previous_task = lastt;
 #endif
-    jl_set_pgcstack(&t->gcstack);
 
-#if defined(JL_TSAN_ENABLED)
-    tsan_switch_to_ctx(&t->tsan_state);
-    if (killed)
-        tsan_destroy_ctx(ptls, &lastt->tsan_state);
-#endif
     if (t->started) {
 #ifdef COPY_STACKS
         if (t->copy_stack) {
             if (!killed && !lastt->copy_stack)
                 restore_stack2(t, ptls, lastt);
-            else if (lastt->copy_stack) {
-                restore_stack(t, ptls, NULL); // (doesn't return)
-            }
             else {
-                restore_stack(t, ptls, (char*)1); // (doesn't return)
+                tsan_switch_to_copyctx(&t->ctx);
+                if (killed)
+                    tsan_destroy_copyctx(ptls, &lastt->ctx);
+
+                if (lastt->copy_stack) {
+                    restore_stack(t, ptls, NULL); // (doesn't return)
+                }
+                else {
+                    restore_stack(t, ptls, (char*)1); // (doesn't return)
+                }
             }
         }
         else
@@ -422,6 +460,8 @@ static void ctx_switch(jl_task_t *lastt)
         {
             sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
             if (killed) {
+                tsan_switch_to_ctx(&t->ctx);
+                tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_set_fiber(&t->ctx); // (doesn't return)
                 abort(); // unreachable
             }
@@ -429,6 +469,7 @@ static void ctx_switch(jl_task_t *lastt)
                 if (lastt->copy_stack) {
                     // Resume at the jl_setjmp earlier in this function,
                     // don't do a full task swap
+                    tsan_switch_to_ctx(&t->ctx);
                     jl_set_fiber(&t->ctx); // (doesn't return)
                 }
                 else {
@@ -440,22 +481,29 @@ static void ctx_switch(jl_task_t *lastt)
     else {
         sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
         if (t->copy_stack && always_copy_stacks) {
+            tsan_switch_to_ctx(&t->ctx);
+            if (killed) {
+                tsan_destroy_ctx(ptls, &lastt->ctx);
+            }
 #ifdef COPY_STACKS
 #if defined(_OS_WINDOWS_)
-            jl_setcontext(&t->ctx);
+            jl_setcontext(&t->ctx.copy_ctx);
 #else
-            jl_longjmp(t->copy_stack_ctx.uc_mcontext, 1);
+            jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
 #endif
 #endif
             abort(); // unreachable
         }
         else {
             if (killed) {
+                tsan_switch_to_ctx(&t->ctx);
+                tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
             }
             else if (lastt->copy_stack) {
                 // Resume at the jl_setjmp earlier in this function
+                tsan_switch_to_ctx(&t->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
             }
@@ -481,7 +529,7 @@ JL_DLLEXPORT void jl_switch(void)
         jl_error("task switch not allowed from inside gc finalizer");
     if (ptls->in_pure_callback)
         jl_error("task switch not allowed from inside staged nor pure functions");
-    if (!jl_set_task_tid(t, ptls->tid)) // manually yielding to a task
+    if (!jl_set_task_tid(t, jl_atomic_load_relaxed(&ct->tid))) // manually yielding to a task
         jl_error("cannot switch to task running on another thread");
 
     // Store old values on the stack and reset
@@ -504,7 +552,7 @@ JL_DLLEXPORT void jl_switch(void)
     t = ptls->previous_task;
     ptls->previous_task = NULL;
     assert(t != ct);
-    assert(t->tid == ptls->tid);
+    assert(jl_atomic_load_relaxed(&t->tid) == ptls->tid);
     if (!t->sticky && !t->copy_stack)
         jl_atomic_store_release(&t->tid, -1);
 #else
@@ -559,9 +607,6 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M
     assert(!jl_get_safe_restore());
     jl_ptls_t ptls = ct->ptls;
     ptls->io_wait = 0;
-    // @time needs its compile timer disabled on error,
-    // and cannot use a try-finally as it would break scope for assignments
-    jl_measure_compile_time[ptls->tid] = 0;
     JL_GC_PUSH1(&exception);
     jl_gc_unsafe_enter(ptls);
     if (exception) {
@@ -663,7 +708,7 @@ JL_DLLEXPORT uint64_t jl_tasklocal_genrandom(jl_task_t *task) JL_NOTSAFEPOINT
     uint64_t s2 = task->rngState2;
     uint64_t s3 = task->rngState3;
 
-    uint64_t t = s0 << 17;
+    uint64_t t = s1 << 17;
     uint64_t tmp = s0 + s3;
     uint64_t res = ((tmp << 23) | (tmp >> 41)) + s0;
     s2 ^= s0;
@@ -725,18 +770,18 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
         if (ssize < MINSTKSZ)
             ssize = MINSTKSZ;
         t->bufsz = ssize;
-        t->stkbuf = jl_alloc_fiber(&t->ctx, &t->bufsz, t);
+        t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
         if (t->stkbuf == NULL)
             jl_throw(jl_memory_exception);
     }
     t->next = jl_nothing;
     t->queue = jl_nothing;
     t->tls = jl_nothing;
-    t->_state = JL_TASK_STATE_RUNNABLE;
+    jl_atomic_store_relaxed(&t->_state, JL_TASK_STATE_RUNNABLE);
     t->start = start;
     t->result = jl_nothing;
     t->donenotify = completion_future;
-    t->_isexception = 0;
+    jl_atomic_store_relaxed(&t->_isexception, 0);
     // Inherit logger state from parent task
     t->logstate = ct->logstate;
     // Fork task-local random state from parent
@@ -748,9 +793,9 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->excstack = NULL;
     t->started = 0;
     t->prio = -1;
-    t->tid = t->copy_stack ? ct->tid : -1; // copy_stacks are always pinned since they can't be moved
+    jl_atomic_store_relaxed(&t->tid, t->copy_stack ? jl_atomic_load_relaxed(&ct->tid) : -1); // copy_stacks are always pinned since they can't be moved
     t->ptls = NULL;
-    t->world_age = 0;
+    t->world_age = ct->world_age;
 
 #ifdef COPY_STACKS
     if (!t->copy_stack) {
@@ -760,13 +805,13 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     }
     else {
         if (always_copy_stacks)
-            memcpy(&t->copy_stack_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->copy_stack_ctx));
+            memcpy(&t->ctx.copy_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
         else
-            memcpy(&t->ctx, &ct->ptls->base_ctx, sizeof(t->ctx));
+            memcpy(&t->ctx.ctx, &ct->ptls->base_ctx, sizeof(t->ctx.ctx));
     }
 #endif
-#ifdef JL_TSAN_ENABLED
-    t->tsan_state = __tsan_create_fiber(0);
+#ifdef _COMPILER_TSAN_ENABLED_
+    t->ctx.tsan_state = __tsan_create_fiber(0);
 #endif
     return t;
 }
@@ -781,7 +826,7 @@ JL_DLLEXPORT jl_task_t *jl_get_current_task(void)
 #ifdef JL_HAVE_ASYNCIFY
 JL_DLLEXPORT jl_ucontext_t *task_ctx_ptr(jl_task_t *t)
 {
-    return &t->ctx;
+    return &t->ctx.ctx;
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_root_task(void)
@@ -845,7 +890,7 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
 CFI_NORETURN
     // this runs the first time we switch to a task
     sanitizer_finish_switch_fiber();
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
     jl_task_t *ct = jl_get_current_task();
     JL_GC_PROMISE_ROOTED(ct);
 #else
@@ -863,7 +908,7 @@ CFI_NORETURN
 #endif
 
     ct->started = 1;
-    if (ct->_isexception) {
+    if (jl_atomic_load_relaxed(&ct->_isexception)) {
         record_backtrace(ptls, 0);
         jl_push_excstack(&ct->excstack, ct->result,
                          ptls->bt_data, ptls->bt_size);
@@ -876,12 +921,11 @@ CFI_NORETURN
                 jl_sigint_safepoint(ptls);
             }
             JL_TIMING(ROOT);
-            ct->world_age = jl_world_counter;
             res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
             res = jl_current_exception();
-            ct->_isexception = 1;
+            jl_atomic_store_relaxed(&ct->_isexception, 1);
             goto skip_pop_exception;
         }
 skip_pop_exception:;
@@ -889,7 +933,7 @@ skip_pop_exception:;
     ct->result = res;
     jl_gc_wb(ct, ct->result);
     jl_finish_task(ct);
-    gc_debug_critical_error();
+    jl_gc_debug_critical_error();
     abort();
 }
 
@@ -900,7 +944,7 @@ skip_pop_exception:;
 #define swapcontext jl_swapcontext
 #define makecontext jl_makecontext
 #endif
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
 {
 #ifndef _OS_WINDOWS_
     int r = getcontext(t);
@@ -922,30 +966,32 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) J
 }
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    setcontext(t);
+    setcontext(&t->ctx);
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
-    swapcontext(lastt, t);
+    tsan_switch_to_ctx(t);
+    swapcontext(&lastt->ctx, &t->ctx);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    swapcontext(lastt, t);
+    tsan_switch_to_ctx(t);
+    swapcontext(&lastt->ctx, &t->ctx);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    setcontext(t);
+    setcontext(&t->ctx);
 }
 #endif
 
 #if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM)
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
 {
     char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
     if (stkbuf == NULL)
         return NULL;
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
     ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
     ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
 #endif
@@ -967,15 +1013,15 @@ static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, t);
+    int r = unw_init_local(&c, &t->ctx);
     if (r < 0)
         abort();
-    jl_unw_swapcontext(lastt, &c);
+    jl_unw_swapcontext(&lastt->ctx, &c);
 }
-static void jl_set_fiber(unw_context_t *t)
+static void jl_set_fiber(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, t);
+    int r = unw_init_local(&c, &t->ctx);
     if (r < 0)
         abort();
     unw_resume(&c);
@@ -983,13 +1029,14 @@ static void jl_set_fiber(unw_context_t *t)
 #elif defined(JL_HAVE_ASM)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (jl_setjmp(lastt->uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
         return;
+    tsan_switch_to_ctx(t);
     jl_set_fiber(t); // doesn't return
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    jl_longjmp(t->uc_mcontext, 1);
+    jl_longjmp(t->ctx.uc_mcontext, 1);
 }
 #endif
 
@@ -1007,21 +1054,21 @@ static void jl_set_fiber(jl_ucontext_t *t)
 #else
 #error please define how to simulate a CALL on this platform
 #endif
-static void jl_start_fiber_set(unw_context_t *t)
+static void jl_start_fiber_set(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    char *stk = ((char**)t)[0];
-    size_t ssize = ((size_t*)t)[1];
+    char *stk = ((char**)&t->ctx)[0];
+    size_t ssize = ((size_t*)&t->ctx)[1];
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
-    int r = unw_getcontext(t);
+    int r = unw_getcontext(&t->ctx);
     if (r)
         abort();
-    if (unw_init_local(&c, t))
+    if (unw_init_local(&c, &t->ctx))
         abort();
     PUSH_RET(&c, stk);
 #if defined __linux__
-#error savannah nongnu libunwind is not capable of setting UNW_REG_SP, as required
+#error savannah nongnu libunwind is incapable of setting UNW_REG_SP, as required
 #endif
     if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
         abort();
@@ -1029,31 +1076,31 @@ static void jl_start_fiber_set(unw_context_t *t)
         abort();
     unw_resume(&c); // (doesn't return)
 }
-static void jl_start_fiber_swap(unw_context_t *lastt, unw_context_t *t)
+static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
     unw_cursor_t c;
-    char *stk = ((char**)t)[0];
-    size_t ssize = ((size_t*)t)[1];
+    char *stk = ((char**)&t->ctx)[0];
+    size_t ssize = ((size_t*)&t->ctx)[1];
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
     volatile int returns = 0;
-    int r = unw_getcontext(lastt);
+    int r = unw_getcontext(&lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-    int r = unw_getcontext(t);
+    r = unw_getcontext(&t->ctx);
     if (r != 0)
         abort();
-    if (unw_init_local(&c, t))
+    if (unw_init_local(&c, &t->ctx))
         abort();
     PUSH_RET(&c, stk);
     if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
         abort();
     if (unw_set_reg(&c, UNW_REG_IP, fn))
         abort();
-    jl_unw_swapcontext(lastt, &c);
+    jl_unw_swapcontext(&lastt->ctx, &c);
 }
 #endif
 
@@ -1063,21 +1110,22 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
     assert(lastt);
 #ifdef JL_HAVE_UNW_CONTEXT
     volatile int returns = 0;
-    int r = unw_getcontext(lastt);
+    int r = unw_getcontext(&lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
 #else
-    if (jl_setjmp(lastt->uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
         return;
 #endif
+    tsan_switch_to_ctx(t);
     jl_start_fiber_set(t); // doesn't return
 }
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    char *stk = ((char**)t)[0];
-    size_t ssize = ((size_t*)t)[1];
+    char *stk = ((char**)&t->ctx)[0];
+    size_t ssize = ((size_t*)&t->ctx)[1];
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
 #ifdef _CPU_X86_64_
@@ -1147,7 +1195,7 @@ static void jl_start_fiber_set(jl_ucontext_t *t)
 #endif
 
 #if defined(JL_HAVE_SIGALTSTACK)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN support not currently implemented for this tasking model
 #endif
 
@@ -1157,7 +1205,7 @@ static void start_basefiber(int sig)
     if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
         start_task(); // sanitizer_finish_switch_fiber is part of start_task
 }
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
 {
     stack_t uc_stack, osigstk;
     struct sigaction sa, osa;
@@ -1167,8 +1215,8 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
         return NULL;
     // setup
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_ucontext_t base_ctx;
-    memcpy(&base_ctx, &ptls->base_ctx, sizeof(ptls->base_ctx));
+    _jl_ucontext_t base_ctx;
+    memcpy(&base_ctx, &ptls->base_ctx, sizeof(base_ctx));
     sigfillset(&set);
     if (sigprocmask(SIG_BLOCK, &set, &oset) != 0) {
        jl_free_stack(stk, *ssize);
@@ -1209,39 +1257,41 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
        jl_error("sigprocmask failed");
     }
     if (&ptls->base_ctx != t) {
-        memcpy(t, &ptls->base_ctx, sizeof(ptls->base_ctx));
-        memcpy(&ptls->base_ctx, &base_ctx, sizeof(ptls->base_ctx)); // restore COPY_STACKS context
+        memcpy(&t, &ptls->base_ctx, sizeof(base_ctx));
+        memcpy(&ptls->base_ctx, &base_ctx, sizeof(base_ctx)); // restore COPY_STACKS context
     }
     return (char*)stk;
 }
 static void jl_start_fiber_set(jl_ucontext_t *t) {
-    jl_longjmp(t->uc_mcontext, 1); // (doesn't return)
+    jl_longjmp(t->ctx.uc_mcontext, 1); // (doesn't return)
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
-    if (lastt && jl_setjmp(lastt->uc_mcontext, 0))
+    if (lastt && jl_setjmp(lastt->ctx.uc_mcontext, 0))
         return;
+    tsan_switch_to_ctx(t);
     jl_start_fiber_set(t);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (jl_setjmp(lastt->uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
         return;
+    tsan_switch_to_ctx(t);
     jl_start_fiber_set(t); // doesn't return
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    jl_longjmp(t->uc_mcontext, 1);
+    jl_longjmp(t->ctx.uc_mcontext, 1);
 }
 #endif
 
 #if defined(JL_HAVE_ASYNCIFY)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN support not currently implemented for this tasking model
 #endif
 
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
 {
     void *stk = jl_malloc_stack(ssize, owner);
     if (stk == NULL)
@@ -1254,7 +1304,7 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) J
 #endif
 
 // Initialize a root task using the given stack.
-void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
+jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 {
     assert(ptls->root_task == NULL);
     // We need `gcstack` in `Task` to allocate Julia objects; *including* the `Task` type.
@@ -1294,27 +1344,27 @@ void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->next = jl_nothing;
     ct->queue = jl_nothing;
     ct->tls = jl_nothing;
-    ct->_state = JL_TASK_STATE_RUNNABLE;
+    jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
     ct->start = NULL;
     ct->result = jl_nothing;
     ct->donenotify = jl_nothing;
-    ct->_isexception = 0;
+    jl_atomic_store_relaxed(&ct->_isexception, 0);
     ct->logstate = jl_nothing;
     ct->eh = NULL;
     ct->gcstack = NULL;
     ct->excstack = NULL;
-    ct->tid = ptls->tid;
+    jl_atomic_store_relaxed(&ct->tid, ptls->tid);
     ct->sticky = 1;
     ct->ptls = ptls;
     ct->world_age = 1; // OK to run Julia code on this task
     ptls->root_task = ct;
-    ptls->current_task = ct;
+    jl_atomic_store_relaxed(&ptls->current_task, ct);
     JL_GC_PROMISE_ROOTED(ct);
     jl_set_pgcstack(&ct->gcstack);
     assert(jl_current_task == ct);
 
-#ifdef JL_TSAN_ENABLED
-    ct->tsan_state = __tsan_get_current_fiber();
+#ifdef _COMPILER_TSAN_ENABLED_
+    ct->ctx.tsan_state = __tsan_get_current_fiber();
 #endif
 
 #ifdef COPY_STACKS
@@ -1330,13 +1380,14 @@ void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 #endif
         if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
             start_task(); // sanitizer_finish_switch_fiber is part of start_task
-        return;
+        return ct;
     }
     ssize = JL_STACK_SIZE;
     char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
     ptls->stackbase = stkbuf + ssize;
     ptls->stacksize = ssize;
 #endif
+    return ct;
 }
 
 JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
@@ -1346,7 +1397,7 @@ JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT
 {
-    return t->tid;
+    return jl_atomic_load_relaxed(&t->tid);
 }
 
 
diff --git a/src/threading.c b/src/threading.c
index 235bb9f870ba1..f10612016ef8a 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -88,7 +88,7 @@ __attribute__((constructor)) void jl_init_tls(void)
 
 JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
 {
-    return pthread_getspecific(jl_pgcstack_key);
+    return (jl_gcframe_t**)pthread_getspecific(jl_pgcstack_key);
 }
 
 void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
@@ -176,7 +176,7 @@ JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
 {
     SAVE_ERRNO;
-    jl_gcframe_t **pgcstack = (jl_ptls_t)TlsGetValue(jl_pgcstack_key);
+    jl_gcframe_t **pgcstack = (jl_gcframe_t**)TlsGetValue(jl_pgcstack_key);
     LOAD_ERRNO;
     return pgcstack;
 }
@@ -271,7 +271,7 @@ JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, jl_pgcstack_key_t
 
 JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack(void) JL_GLOBALLY_ROOTED
 {
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
     return jl_get_pgcstack_cb();
 #endif
 }
@@ -287,21 +287,21 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
 #endif
 
 jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
-uint8_t *jl_measure_compile_time = NULL;
-uint64_t *jl_cumulative_compile_time = NULL;
+JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
+JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
 
 // return calling thread's ID
 // Also update the suspended_threads list in signals-mach when changing the
 // type of the thread id.
 JL_DLLEXPORT int16_t jl_threadid(void)
 {
-    return jl_current_task->tid;
+    return jl_atomic_load_relaxed(&jl_current_task->tid);
 }
 
 jl_ptls_t jl_init_threadtls(int16_t tid)
 {
     jl_ptls_t ptls = (jl_ptls_t)calloc(1, sizeof(jl_tls_states_t));
-    ptls->system_id = jl_thread_self();
+    ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self();
     seed_cong(&ptls->rngseed);
 #ifdef _OS_WINDOWS_
     if (tid == 0) {
@@ -314,7 +314,7 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     }
 #endif
     ptls->tid = tid;
-    ptls->gc_state = 0; // GC unsafe
+    jl_atomic_store_relaxed(&ptls->gc_state, 0); // GC unsafe
     // Conditionally initialize the safepoint address. See comment in
     // `safepoint.c`
     if (tid == 0) {
@@ -336,14 +336,13 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     return ptls;
 }
 
-// lock for code generation
-jl_mutex_t codegen_lock;
+JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 jl_mutex_t typecache_lock;
 
-ssize_t jl_tls_offset = -1;
+JL_DLLEXPORT ssize_t jl_tls_offset = -1;
 
 #ifdef JL_ELF_TLS_VARIANT
-const int jl_tls_elf_support = 1;
+JL_DLLEXPORT const int jl_tls_elf_support = 1;
 // Optimize TLS access in codegen if the TLS buffer is using a IE or LE model.
 // To detect such case, we find the size of the TLS segment in the main
 // executable and the thread pointer (TP) and then see if the TLS pointer on the
@@ -439,7 +438,8 @@ static void jl_check_tls(void)
     jl_tls_offset = offset;
 }
 #else
-const int jl_tls_elf_support = 0;
+// !JL_ELF_TLS_VARIANT
+JL_DLLEXPORT const int jl_tls_elf_support = 0;
 #endif
 
 // interface to Julia; sets up to make the runtime thread-safe
@@ -467,9 +467,7 @@ void jl_init_threading(void)
     }
     if (jl_n_threads <= 0)
         jl_n_threads = 1;
-    jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
-    jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
     jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
 #endif
 }
@@ -502,7 +500,7 @@ void jl_start_threads(void)
         }
         memset(mask, 0, cpumasksize);
         mask[0] = 1;
-        uvtid = (uv_thread_t)uv_thread_self();
+        uvtid = uv_thread_self();
         uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
         mask[0] = 0;
     }
@@ -529,21 +527,22 @@ void jl_start_threads(void)
     uv_barrier_wait(&thread_init_done);
 }
 
-unsigned volatile _threadedregion; // HACK: keep track of whether it is safe to do IO
+_Atomic(unsigned) _threadedregion; // HACK: keep track of whether to prioritize IO or threading
 
 JL_DLLEXPORT int jl_in_threaded_region(void)
 {
-    return _threadedregion != 0;
+    return jl_atomic_load_relaxed(&jl_current_task->tid) != 0 ||
+        jl_atomic_load_relaxed(&_threadedregion) != 0;
 }
 
 JL_DLLEXPORT void jl_enter_threaded_region(void)
 {
-    _threadedregion += 1;
+    jl_atomic_fetch_add(&_threadedregion, 1);
 }
 
 JL_DLLEXPORT void jl_exit_threaded_region(void)
 {
-    _threadedregion -= 1;
+    jl_atomic_fetch_add(&_threadedregion, -1);
     jl_wake_libuv();
     // make sure no more callbacks will run while user code continues
     // outside thread region and might touch an I/O object.
diff --git a/src/timing.c b/src/timing.c
index 12093d2e142c0..929a09305f993 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -18,7 +18,7 @@ extern "C" {
 #endif
 
 static uint64_t t0;
-uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0};
+JL_DLLEXPORT uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0};
 const char *jl_timing_names[(int)JL_TIMING_LAST] =
     {
 #define X(name) #name
diff --git a/src/toplevel.c b/src/toplevel.c
index c11dea57c8489..363d44b3cd642 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -17,7 +17,6 @@
 #endif
 #include "julia.h"
 #include "julia_internal.h"
-#include "uv.h"
 #include "julia_assert.h"
 #include "intrinsics.h"
 #include "builtin_proto.h"
@@ -69,7 +68,7 @@ void jl_module_run_initializer(jl_module_t *m)
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
     JL_TRY {
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_apply(&f, 1);
         ct->world_age = last_age;
     }
@@ -117,7 +116,7 @@ static int jl_is__toplevel__mod(jl_module_t *mod)
 static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
 {
     jl_task_t *ct = jl_current_task;
-    assert(ex->head == module_sym);
+    assert(ex->head == jl_module_sym);
     if (jl_array_len(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
         jl_error("syntax: malformed module expression");
     }
@@ -190,12 +189,12 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args;
     for (int i = 0; i < jl_array_len(exprs); i++) {
         // process toplevel form
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno);
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         (void)jl_toplevel_eval_flex(newm, form, 1, 1);
     }
-    newm->primary_world = jl_world_counter;
+    newm->primary_world = jl_atomic_load_acquire(&jl_world_counter);
     ct->world_age = last_age;
 
 #if 0
@@ -280,7 +279,7 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f
     else {
         args[0] = jl_eval_global_var(jl_base_relative_to(m), jl_symbol("getproperty"));
         size_t last_age = ct->world_age;
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         args[0] = jl_apply(args, 3);
         ct->world_age = last_age;
     }
@@ -310,36 +309,36 @@ static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs, i
         return;
     jl_expr_t *e = (jl_expr_t*)v;
     jl_sym_t *head = e->head;
-    if (head == toplevel_sym || head == thunk_sym) {
+    if (head == jl_toplevel_sym || head == jl_thunk_sym) {
         return;
     }
-    else if (head == global_sym) {
+    else if (head == jl_global_sym) {
         // this could be considered has_defs, but loops that assign to globals
         // might still need to be optimized.
         return;
     }
-    else if (head == const_sym || head == copyast_sym) {
+    else if (head == jl_const_sym || head == jl_copyast_sym) {
         // Note: `copyast` is included here since it indicates the presence of
         // `quote` and probably `eval`.
         *has_defs = 1;
         return;
     }
-    else if (head == method_sym || jl_is_toplevel_only_expr(v)) {
+    else if (head == jl_method_sym || jl_is_toplevel_only_expr(v)) {
         *has_defs = 1;
     }
-    else if (head == cfunction_sym) {
+    else if (head == jl_cfunction_sym) {
         *has_intrinsics = 1;
         return;
     }
-    else if (head == foreigncall_sym) {
+    else if (head == jl_foreigncall_sym) {
         *has_intrinsics = 1;
         return;
     }
-    else if (head == new_opaque_closure_sym) {
+    else if (head == jl_new_opaque_closure_sym) {
         *has_opaque = 1;
         return;
     }
-    else if (head == call_sym && jl_expr_nargs(e) > 0) {
+    else if (head == jl_call_sym && jl_expr_nargs(e) > 0) {
         jl_value_t *called = NULL;
         jl_value_t *f = jl_exprarg(e, 0);
         if (jl_is_globalref(f)) {
@@ -378,6 +377,8 @@ int jl_code_requires_compiler(jl_code_info_t *src)
     assert(jl_typeis(body, jl_array_any_type));
     size_t i;
     int has_intrinsics = 0, has_defs = 0, has_opaque = 0;
+    if (jl_has_meta(body, jl_force_compile_sym))
+        return 1;
     for(i=0; i < jl_array_len(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
         expr_attributes(stmt, &has_intrinsics, &has_defs, &has_opaque);
@@ -387,7 +388,7 @@ int jl_code_requires_compiler(jl_code_info_t *src)
     return 0;
 }
 
-static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs, int *has_loops, int *has_opaque)
+static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs, int *has_loops, int *has_opaque, int *forced_compile)
 {
     size_t i;
     *has_loops = 0;
@@ -405,6 +406,7 @@ static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs
         }
         expr_attributes(stmt, has_intrinsics, has_defs, has_opaque);
     }
+    *forced_compile = jl_has_meta(body, jl_force_compile_sym);
 }
 
 static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_ROOTED
@@ -418,7 +420,7 @@ static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_RO
     }
     if (require_func != NULL) {
         size_t last_age = ct->world_age;
-        ct->world_age = (build_mode ? jl_base_module->primary_world : jl_world_counter);
+        ct->world_age = (build_mode ? jl_base_module->primary_world : jl_atomic_load_acquire(&jl_world_counter));
         jl_value_t *reqargs[3];
         reqargs[0] = require_func;
         reqargs[1] = (jl_value_t*)mod;
@@ -451,7 +453,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
         m = from;
         i = 0;
     }
-    else if (var != dot_sym) {
+    else if (var != jl_dot_sym) {
         // `A.B`: call the loader to obtain the root A in the current environment.
         if (jl_core_module && var == jl_core_module->name) {
             m = jl_core_module;
@@ -472,7 +474,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
             if (i >= jl_array_len(args))
                 jl_error("invalid module path");
             var = (jl_sym_t*)jl_array_ptr_ref(args, i);
-            if (var != dot_sym)
+            if (var != jl_dot_sym)
                 break;
             i++;
             assert(m);
@@ -484,7 +486,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
         var = (jl_sym_t*)jl_array_ptr_ref(args, i);
         if (!jl_is_symbol(var))
             jl_type_error(keyword, (jl_value_t*)jl_symbol_type, (jl_value_t*)var);
-        if (var == dot_sym)
+        if (var == jl_dot_sym)
             jl_errorf("invalid %s path: \".\" in identifier path", keyword);
         if (i == jl_array_len(args)-1)
             break;
@@ -501,16 +503,16 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
 int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT
 {
     return jl_is_expr(e) &&
-        (((jl_expr_t*)e)->head == module_sym ||
-         ((jl_expr_t*)e)->head == import_sym ||
-         ((jl_expr_t*)e)->head == using_sym ||
-         ((jl_expr_t*)e)->head == export_sym ||
-         ((jl_expr_t*)e)->head == thunk_sym ||
-         ((jl_expr_t*)e)->head == global_sym ||
-         ((jl_expr_t*)e)->head == const_sym ||
-         ((jl_expr_t*)e)->head == toplevel_sym ||
-         ((jl_expr_t*)e)->head == error_sym ||
-         ((jl_expr_t*)e)->head == incomplete_sym);
+        (((jl_expr_t*)e)->head == jl_module_sym ||
+         ((jl_expr_t*)e)->head == jl_import_sym ||
+         ((jl_expr_t*)e)->head == jl_using_sym ||
+         ((jl_expr_t*)e)->head == jl_export_sym ||
+         ((jl_expr_t*)e)->head == jl_thunk_sym ||
+         ((jl_expr_t*)e)->head == jl_global_sym ||
+         ((jl_expr_t*)e)->head == jl_const_sym ||
+         ((jl_expr_t*)e)->head == jl_toplevel_sym ||
+         ((jl_expr_t*)e)->head == jl_error_sym ||
+         ((jl_expr_t*)e)->head == jl_incomplete_sym);
 }
 
 int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
@@ -519,12 +521,12 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
         return 0;
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_sym_t *head = ex->head;
-    if (head == module_sym || head == import_sym || head == using_sym ||
-        head == export_sym || head == thunk_sym || head == toplevel_sym ||
-        head == error_sym || head == incomplete_sym || head == method_sym) {
+    if (head == jl_module_sym || head == jl_import_sym || head == jl_using_sym ||
+        head == jl_export_sym || head == jl_thunk_sym || head == jl_toplevel_sym ||
+        head == jl_error_sym || head == jl_incomplete_sym || head == jl_method_sym) {
         return 0;
     }
-    if (head == global_sym || head == const_sym) {
+    if (head == jl_global_sym || head == jl_const_sym) {
         size_t i, l = jl_array_len(ex->args);
         for (i = 0; i < l; i++) {
             jl_value_t *a = jl_exprarg(ex, i);
@@ -573,10 +575,10 @@ static jl_module_t *eval_import_from(jl_module_t *m JL_PROPAGATES_ROOT, jl_expr_
 {
     if (jl_expr_nargs(ex) == 1 && jl_is_expr(jl_exprarg(ex, 0))) {
         jl_expr_t *fr = (jl_expr_t*)jl_exprarg(ex, 0);
-        if (fr->head == colon_sym) {
+        if (fr->head == jl_colon_sym) {
             if (jl_expr_nargs(fr) > 0 && jl_is_expr(jl_exprarg(fr, 0))) {
                 jl_expr_t *path = (jl_expr_t*)jl_exprarg(fr, 0);
-                if (((jl_expr_t*)path)->head == dot_sym) {
+                if (((jl_expr_t*)path)->head == jl_dot_sym) {
                     jl_sym_t *name = NULL;
                     jl_module_t *from = eval_import_path(m, NULL, path->args, &name, keyword);
                     if (name != NULL) {
@@ -607,7 +609,7 @@ static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword
 // location in julia code gets into the backtrace.
 static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
 {
-    jl_value_t *throw_ex = (jl_value_t*)jl_exprn(call_sym, 2);
+    jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2);
     JL_GC_PUSH1(&throw_ex);
     jl_exprargset(throw_ex, 0, jl_builtin_throw);
     va_list args;
@@ -642,7 +644,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     jl_expr_t *ex = (jl_expr_t*)e;
 
-    if (ex->head == dot_sym && jl_expr_nargs(ex) != 1) {
+    if (ex->head == jl_dot_sym && jl_expr_nargs(ex) != 1) {
         if (jl_expr_nargs(ex) != 2)
             jl_eval_errorf(m, "syntax: malformed \".\" expression");
         jl_value_t *lhs = jl_exprarg(ex, 0);
@@ -663,18 +665,18 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     size_t last_age = ct->world_age;
     if (!expanded && jl_needs_lowering(e)) {
-        ct->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, jl_filename, jl_lineno);
         ct->world_age = last_age;
     }
     jl_sym_t *head = jl_is_expr(ex) ? ex->head : NULL;
 
-    if (head == module_sym) {
+    if (head == jl_module_sym) {
         jl_value_t *val = jl_eval_module_expr(m, ex);
         JL_GC_POP();
         return val;
     }
-    else if (head == using_sym) {
+    else if (head == jl_using_sym) {
         jl_sym_t *name = NULL;
         jl_module_t *from = eval_import_from(m, ex, "using");
         size_t i = 0;
@@ -684,7 +686,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         }
         for (; i < jl_expr_nargs(ex); i++) {
             jl_value_t *a = jl_exprarg(ex, i);
-            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == dot_sym) {
+            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
                 name = NULL;
                 jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "using");
                 jl_module_t *u = import;
@@ -708,8 +710,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
                 }
                 continue;
             }
-            else if (from && jl_is_expr(a) && ((jl_expr_t*)a)->head == as_sym && jl_expr_nargs(a) == 2 &&
-                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == dot_sym) {
+            else if (from && jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_as_sym && jl_expr_nargs(a) == 2 &&
+                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == jl_dot_sym) {
                 jl_sym_t *asname = (jl_sym_t*)jl_exprarg(a, 1);
                 if (jl_is_symbol(asname)) {
                     jl_expr_t *path = (jl_expr_t*)jl_exprarg(a, 0);
@@ -727,7 +729,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == import_sym) {
+    else if (head == jl_import_sym) {
         jl_sym_t *name = NULL;
         jl_module_t *from = eval_import_from(m, ex, "import");
         size_t i = 0;
@@ -737,7 +739,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         }
         for (; i < jl_expr_nargs(ex); i++) {
             jl_value_t *a = jl_exprarg(ex, i);
-            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == dot_sym) {
+            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
                 name = NULL;
                 jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "import");
                 if (name == NULL) {
@@ -750,8 +752,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
                 }
                 continue;
             }
-            else if (jl_is_expr(a) && ((jl_expr_t*)a)->head == as_sym && jl_expr_nargs(a) == 2 &&
-                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == dot_sym) {
+            else if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_as_sym && jl_expr_nargs(a) == 2 &&
+                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == jl_dot_sym) {
                 jl_sym_t *asname = (jl_sym_t*)jl_exprarg(a, 1);
                 if (jl_is_symbol(asname)) {
                     jl_expr_t *path = (jl_expr_t*)jl_exprarg(a, 0);
@@ -774,7 +776,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == export_sym) {
+    else if (head == jl_export_sym) {
         for (size_t i = 0; i < jl_array_len(ex->args); i++) {
             jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(ex->args, i);
             if (!jl_is_symbol(name))
@@ -784,7 +786,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == global_sym) {
+    else if (head == jl_global_sym) {
         // create uninitialized mutable binding for "global x" decl
         size_t i, l = jl_array_len(ex->args);
         for (i = 0; i < l; i++) {
@@ -805,7 +807,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == const_sym) {
+    else if (head == jl_const_sym) {
         jl_sym_t *arg = (jl_sym_t*)jl_exprarg(ex, 0);
         jl_module_t *gm;
         jl_sym_t *gs;
@@ -823,7 +825,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == toplevel_sym) {
+    else if (head == jl_toplevel_sym) {
         jl_value_t *res = jl_nothing;
         int i;
         for (i = 0; i < jl_array_len(ex->args); i++) {
@@ -832,7 +834,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return res;
     }
-    else if (head == error_sym || head == incomplete_sym) {
+    else if (head == jl_error_sym || head == jl_incomplete_sym) {
         if (jl_expr_nargs(ex) == 0)
             jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head));
         if (jl_is_string(jl_exprarg(ex, 0)))
@@ -848,19 +850,20 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         return (jl_value_t*)ex;
     }
 
-    int has_intrinsics = 0, has_defs = 0, has_loops = 0, has_opaque = 0;
-    assert(head == thunk_sym);
+    int has_intrinsics = 0, has_defs = 0, has_loops = 0, has_opaque = 0, forced_compile = 0;
+    assert(head == jl_thunk_sym);
     thk = (jl_code_info_t*)jl_exprarg(ex, 0);
     assert(jl_is_code_info(thk));
     assert(jl_typeis(thk->code, jl_array_any_type));
-    body_attributes((jl_array_t*)thk->code, &has_intrinsics, &has_defs, &has_loops, &has_opaque);
+    body_attributes((jl_array_t*)thk->code, &has_intrinsics, &has_defs, &has_loops, &has_opaque, &forced_compile);
 
     jl_value_t *result;
-    if (has_intrinsics || (!has_defs && fast && has_loops &&
-                           jl_options.compile_enabled != JL_OPTIONS_COMPILE_OFF &&
-                           jl_options.compile_enabled != JL_OPTIONS_COMPILE_MIN &&
-                           jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
-                           jl_get_module_compile(m) != JL_OPTIONS_COMPILE_MIN)) {
+    if (forced_compile || has_intrinsics ||
+            (!has_defs && fast && has_loops &&
+            jl_options.compile_enabled != JL_OPTIONS_COMPILE_OFF &&
+            jl_options.compile_enabled != JL_OPTIONS_COMPILE_MIN &&
+            jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
+            jl_get_module_compile(m) != JL_OPTIONS_COMPILE_MIN)) {
         // use codegen
         mfunc = method_instance_for_thunk(thk, m);
         jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
@@ -868,7 +871,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         // worthwhile and also unsound (see #24316).
         // TODO: This is still not correct since an `eval` can happen elsewhere, but it
         // helps in common cases.
-        size_t world = jl_world_counter;
+        size_t world = jl_atomic_load_acquire(&jl_world_counter);
         ct->world_age = world;
         if (!has_defs && jl_get_module_infer(m) != 0) {
             (void)jl_type_infer(mfunc, world, 0);
@@ -991,8 +994,8 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     JL_GC_PUSH3(&ast, &result, &expression);
 
     ast = jl_svecref(jl_parse(jl_string_data(text), jl_string_len(text),
-                              filename, 0, (jl_value_t*)all_sym), 0);
-    if (!jl_is_expr(ast) || ((jl_expr_t*)ast)->head != toplevel_sym) {
+                              filename, 0, (jl_value_t*)jl_all_sym), 0);
+    if (!jl_is_expr(ast) || ((jl_expr_t*)ast)->head != jl_toplevel_sym) {
         jl_errorf("jl_parse_all() must generate a top level expression");
     }
 
@@ -1015,7 +1018,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
             }
             expression = jl_expand_with_loc_warn(expression, module,
                                                  jl_string_data(filename), lineno);
-            ct->world_age = jl_world_counter;
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
             result = jl_toplevel_eval_flex(module, expression, 1, 1);
         }
     }
diff --git a/src/typemap.c b/src/typemap.c
index 58dd2b8b13069..dfa8ac67f6abc 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -259,28 +259,28 @@ static int is_cache_leaf(jl_value_t *ty, int tparam)
     return (jl_is_concrete_type(ty) && (tparam || !jl_is_kind(ty)));
 }
 
-static jl_typemap_t **mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static _Atomic(jl_typemap_t*) *mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
     if (cache == (jl_array_t*)jl_an_empty_vec_any)
         return NULL;
-    jl_typemap_t **pml = jl_table_peek_bp(cache, ty);
+    _Atomic(jl_typemap_t*) *pml = jl_table_peek_bp(cache, ty);
     JL_GC_PROMISE_ROOTED(pml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return pml;
 }
 
-static void mtcache_hash_insert(jl_array_t **cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
+static void mtcache_hash_insert(_Atomic(jl_array_t*) *cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
 {
     int inserted = 0;
-    jl_array_t *a = *cache;
+    jl_array_t *a = jl_atomic_load_relaxed(cache);
     if (a == (jl_array_t*)jl_an_empty_vec_any) {
         a = jl_alloc_vec_any(16);
-        *cache = a;
+        jl_atomic_store_release(cache, a);
         jl_gc_wb(parent, a);
     }
     a = jl_eqtable_put(a, key, val, &inserted);
     assert(inserted);
-    if (a != *cache) {
-        *cache = a;
+    if (a != jl_atomic_load_relaxed(cache)) {
+        jl_atomic_store_release(cache, a);
         jl_gc_wb(parent, a);
     }
 }
@@ -299,7 +299,7 @@ static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, j
 static int jl_typemap_array_visitor(jl_array_t *a, jl_typemap_visitor_fptr fptr, void *closure)
 {
     size_t i, l = jl_array_len(a);
-    jl_typemap_t **data = (jl_typemap_t **)jl_array_data(a);
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
     for (i = 1; i < l; i += 2) {
         jl_value_t *d = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(d);
@@ -394,7 +394,7 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
 {
     JL_GC_PUSH1(&a);
     size_t i, l = jl_array_len(a);
-    jl_typemap_t **data = (jl_typemap_t **)jl_array_data(a);
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
     unsigned height = tparam & 2 ? jl_supertype_height((jl_datatype_t*)ty) : 0;
     for (i = 0; i < l; i += 2) {
         jl_value_t *t = jl_atomic_load_relaxed(&data[i]);
@@ -402,7 +402,7 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
         if (t == jl_nothing || t == NULL)
             continue;
         if (tparam & 2) {
-            jl_typemap_t *ml = data[i + 1];
+            jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i + 1]);
             JL_GC_PROMISE_ROOTED(ml);
             if (ty == (jl_value_t*)jl_any_type || // easy case: Any always matches
                 tname_intersection((jl_datatype_t*)ty, (jl_typename_t*)t, height)) {
@@ -845,7 +845,7 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                     if (!ty || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
                         // couldn't figure out unique `a0` initial point, so scan all for matches
                         size_t i, l = jl_array_len(tname);
-                        jl_typemap_t **data = (jl_typemap_t **)jl_array_ptr_data(tname);
+                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
                         JL_GC_PUSH1(&tname);
                         for (i = 1; i < l; i += 2) {
                             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -884,7 +884,7 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 else {
                     // doing subtype, but couldn't figure out unique `ty`, so scan all for supertypes
                     size_t i, l = jl_array_len(name1);
-                    jl_typemap_t **data = (jl_typemap_t **)jl_array_ptr_data(name1);
+                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(name1);
                     JL_GC_PUSH1(&name1);
                     for (i = 1; i < l; i += 2) {
                         jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1034,10 +1034,10 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
             else {
                 // couldn't figure out unique `name` initial point, so must scan all for matches
                 size_t i, l = jl_array_len(tname);
-                jl_typemap_t **data = (jl_typemap_t **)jl_array_ptr_data(tname);
+                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
                 JL_GC_PUSH1(&tname);
                 for (i = 1; i < l; i += 2) {
-                    jl_typemap_t *ml_or_cache = data[i];
+                    jl_typemap_t *ml_or_cache = jl_atomic_load_relaxed(&data[i]);
                     if (ml_or_cache == NULL || ml_or_cache == jl_nothing)
                         continue;
                     jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs + 1, world);
@@ -1082,7 +1082,7 @@ static unsigned jl_typemap_list_count_locked(jl_typemap_entry_t *ml) JL_NOTSAFEP
     unsigned count = 0;
     while (ml != (void*)jl_nothing) {
         count++;
-        ml = ml->next;
+        ml = jl_atomic_load_relaxed(&ml->next);
     }
     return count;
 }
@@ -1095,12 +1095,12 @@ static jl_typemap_level_t *jl_new_typemap_level(void)
     jl_typemap_level_t *cache =
         (jl_typemap_level_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_level_t),
                                          jl_typemap_level_type);
-    cache->arg1 = (jl_array_t*)jl_an_empty_vec_any;
-    cache->targ = (jl_array_t*)jl_an_empty_vec_any;
-    cache->name1 = (jl_array_t*)jl_an_empty_vec_any;
-    cache->tname = (jl_array_t*)jl_an_empty_vec_any;
-    cache->linear = (jl_typemap_entry_t*)jl_nothing;
-    cache->any = jl_nothing;
+    jl_atomic_store_relaxed(&cache->arg1, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->targ, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->name1, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->tname, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->linear, (jl_typemap_entry_t*)jl_nothing);
+    jl_atomic_store_relaxed(&cache->any, jl_nothing);
     return cache;
 }
 
@@ -1111,8 +1111,9 @@ static jl_typemap_level_t *jl_method_convert_list_to_cache(
     jl_typemap_entry_t *next = NULL;
     JL_GC_PUSH3(&cache, &next, &ml);
     while (ml != (void*)jl_nothing) {
-        next = ml->next;
-        ml->next = (jl_typemap_entry_t*)jl_nothing;
+        next = jl_atomic_load_relaxed(&ml->next);
+        jl_atomic_store_relaxed(&ml->next, (jl_typemap_entry_t*)jl_nothing);
+        // n.b. this is being done concurrently with lookups!
         // TODO: is it safe to be doing this concurrently with lookups?
         jl_typemap_level_insert_(map, cache, ml, offs);
         ml = next;
@@ -1122,56 +1123,58 @@ static jl_typemap_level_t *jl_method_convert_list_to_cache(
 }
 
 static void jl_typemap_list_insert_(
-        jl_typemap_t *map, jl_typemap_entry_t **pml, jl_value_t *parent,
+        jl_typemap_t *map, _Atomic(jl_typemap_entry_t*) *pml, jl_value_t *parent,
         jl_typemap_entry_t *newrec)
 {
-    jl_typemap_entry_t *l = *pml;
+    jl_typemap_entry_t *l = jl_atomic_load_relaxed(pml);
     while ((jl_value_t*)l != jl_nothing) {
         if (newrec->isleafsig || !l->isleafsig)
             if (newrec->issimplesig || !l->issimplesig)
                 break;
         pml = &l->next;
         parent = (jl_value_t*)l;
-        l = l->next;
+        l = jl_atomic_load_relaxed(&l->next);
     }
-    newrec->next = l;
-    jl_gc_wb(newrec, newrec->next);
+    jl_atomic_store_relaxed(&newrec->next, l);
+    jl_gc_wb(newrec, l);
     jl_atomic_store_release(pml, newrec);
     jl_gc_wb(parent, newrec);
 }
 
 static void jl_typemap_insert_generic(
-        jl_typemap_t *map, jl_typemap_t **pml, jl_value_t *parent,
+        jl_typemap_t *map, _Atomic(jl_typemap_t*) *pml, jl_value_t *parent,
         jl_typemap_entry_t *newrec, int8_t offs)
 {
-    if (jl_typeof(*pml) == (jl_value_t*)jl_typemap_level_type) {
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs);
+    jl_typemap_t *ml = jl_atomic_load_relaxed(pml);
+    if (jl_typeof(ml) == (jl_value_t*)jl_typemap_level_type) {
+        jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
-    unsigned count = jl_typemap_list_count_locked((jl_typemap_entry_t*)*pml);
+    unsigned count = jl_typemap_list_count_locked((jl_typemap_entry_t*)ml);
     if (count > MAX_METHLIST_COUNT) {
-        *pml = (jl_typemap_t*)jl_method_convert_list_to_cache(
-            map, (jl_typemap_entry_t *)*pml,
-            offs);
-        jl_gc_wb(parent, *pml);
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs);
+        ml = (jl_typemap_t*)jl_method_convert_list_to_cache(
+            map, (jl_typemap_entry_t*)ml, offs);
+        jl_atomic_store_release(pml, ml);
+        jl_gc_wb(parent, ml);
+        jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
-    jl_typemap_list_insert_(map, (jl_typemap_entry_t **)pml,
+    jl_typemap_list_insert_(map, (_Atomic(jl_typemap_entry_t*)*)pml,
         parent, newrec);
 }
 
 static void jl_typemap_array_insert_(
-        jl_typemap_t *map, jl_array_t **cache, jl_value_t *key, jl_typemap_entry_t *newrec,
+        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
         jl_value_t *parent, int8_t offs)
 {
-    jl_typemap_t **pml = mtcache_hash_lookup_bp(*cache, key);
+    jl_array_t *cache = jl_atomic_load_relaxed(pcache);
+    _Atomic(jl_typemap_t*) *pml = mtcache_hash_lookup_bp(cache, key);
     if (pml != NULL)
-        jl_typemap_insert_generic(map, pml, (jl_value_t*)*cache, newrec, offs+1);
+        jl_typemap_insert_generic(map, pml, (jl_value_t*)cache, newrec, offs+1);
     else
-        mtcache_hash_insert(cache, parent, key, (jl_typemap_t*)newrec);
+        mtcache_hash_insert(pcache, parent, key, (jl_typemap_t*)newrec);
 }
 
 static void jl_typemap_level_insert_(
@@ -1276,7 +1279,7 @@ jl_typemap_entry_t *jl_typemap_alloc(
     newrec->simplesig = simpletype;
     newrec->func.value = newvalue;
     newrec->guardsigs = guardsigs;
-    newrec->next = (jl_typemap_entry_t*)jl_nothing;
+    jl_atomic_store_relaxed(&newrec->next, (jl_typemap_entry_t*)jl_nothing);
     newrec->min_world = min_world;
     newrec->max_world = max_world;
     newrec->va = isva;
@@ -1285,10 +1288,11 @@ jl_typemap_entry_t *jl_typemap_alloc(
     return newrec;
 }
 
-void jl_typemap_insert(jl_typemap_t **cache, jl_value_t *parent,
+void jl_typemap_insert(_Atomic(jl_typemap_t *) *pcache, jl_value_t *parent,
         jl_typemap_entry_t *newrec, int8_t offs)
 {
-    jl_typemap_insert_generic(*cache, cache, parent, newrec, offs);
+    jl_typemap_t *cache = jl_atomic_load_relaxed(pcache);
+    jl_typemap_insert_generic(cache, pcache, parent, newrec, offs);
 }
 
 #ifdef __cplusplus
diff --git a/src/uprobes.d b/src/uprobes.d
new file mode 100644
index 0000000000000..a583daf8c58f4
--- /dev/null
+++ b/src/uprobes.d
@@ -0,0 +1,18 @@
+/* Julia DTrace provider */
+
+provider julia {
+    probe gc__begin(int collection);
+    probe gc__stop_the_world();
+    probe gc__mark__begin();
+    probe gc__mark__end(int64_t scanned_bytes, int64_t perm_scanned_bytes);
+    probe gc__sweep__begin(int full);
+    probe gc__sweep__end();
+    probe gc__end();
+    probe gc__finalizer();
+};
+
+#pragma D attributes Evolving/Evolving/Common provider julia provider
+#pragma D attributes Evolving/Evolving/Common provider julia module
+#pragma D attributes Evolving/Evolving/Common provider julia function
+#pragma D attributes Evolving/Evolving/Common provider julia name
+#pragma D attributes Evolving/Evolving/Common provider julia args
diff --git a/src/utils.scm b/src/utils.scm
index c1a893102053c..7be6b2999a90c 100644
--- a/src/utils.scm
+++ b/src/utils.scm
@@ -79,6 +79,7 @@
 
 (define (caddddr x) (car (cdr (cdr (cdr (cdr x))))))
 (define (cdddddr x) (cdr (cdr (cdr (cdr (cdr x))))))
+(define (cadddddr x) (car (cdddddr x)))
 
 (define (table.clone t)
   (let ((nt (table)))
diff --git a/src/support/win32_ucontext.c b/src/win32_ucontext.c
similarity index 99%
rename from src/support/win32_ucontext.c
rename to src/win32_ucontext.c
index df50eb209341e..de3cec5b778e5 100644
--- a/src/support/win32_ucontext.c
+++ b/src/win32_ucontext.c
@@ -26,11 +26,9 @@ JL_DLLEXPORT EXCEPTION_DISPOSITION NTAPI __julia_personality(
 
     EXCEPTION_DISPOSITION rval;
     switch (jl_exception_handler(&ExceptionInfo)) {
-#ifndef _MSC_VER
         case EXCEPTION_EXECUTE_HANDLER:
             rval = ExceptionExecuteHandler;
             break;
-#endif
         case EXCEPTION_CONTINUE_EXECUTION:
             rval = ExceptionContinueExecution;
             break;
diff --git a/src/support/win32_ucontext.h b/src/win32_ucontext.h
similarity index 95%
rename from src/support/win32_ucontext.h
rename to src/win32_ucontext.h
index 6730cb96ee873..b856abdc26eef 100644
--- a/src/support/win32_ucontext.h
+++ b/src/win32_ucontext.h
@@ -16,7 +16,7 @@ typedef struct {
         size_t ss_size;
     } uc_stack;
     jmp_buf uc_mcontext;
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     void *tsan_state;
 #endif
 } win32_ucontext_t;
diff --git a/stdlib/.gitignore b/stdlib/.gitignore
index 891eda58c689d..d90aaa993d049 100644
--- a/stdlib/.gitignore
+++ b/stdlib/.gitignore
@@ -15,5 +15,7 @@
 /NetworkOptions
 /SuiteSparse-*
 /SuiteSparse
+/SHA-*
+/SHA
 /*_jll/StdlibArtifacts.toml
 /*/Manifest.toml
diff --git a/stdlib/ArgTools.version b/stdlib/ArgTools.version
index c9026ce76af7b..0ae273bb18db6 100644
--- a/stdlib/ArgTools.version
+++ b/stdlib/ArgTools.version
@@ -1,2 +1,4 @@
 ARGTOOLS_BRANCH = master
-ARGTOOLS_SHA1 = fa878696ff2ae4ba7ca9942bf9544556c0d86ce4
+ARGTOOLS_SHA1 = 08b11b2707593d4d7f92e5f1b9dba7668285ff82
+ARGTOOLS_GIT_URL := https://github.com/JuliaIO/ArgTools.jl.git
+ARGTOOLS_TAR_URL = https://api.github.com/repos/JuliaIO/ArgTools.jl/tarball/$1
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
index 43e90ebb89e01..15ca525723c07 100644
--- a/stdlib/CompilerSupportLibraries_jll/Project.toml
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -1,5 +1,9 @@
 name = "CompilerSupportLibraries_jll"
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+
+# NOTE: When updating this, also make sure to update the value
+# `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable
+# automatic usage of BB-built CSLs on extremely up-to-date systems!
 version = "0.5.0+0"
 
 [deps]
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index b8fdad56dc78c..4975f175bbf16 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -579,6 +579,26 @@ julia> Dates.value(Dates.Millisecond(10))
 10
 ```
 
+Representing periods or durations that are not integer multiples of the basic types can be achieved
+with the [`Dates.CompoundPeriod`](@ref) type. Compound periods may be constructed manually from simple
+[`Period`](@ref) types. Additionally, the [`canonicalize`](@ref) function can be used to break down a
+period into a [`Dates.CompoundPeriod`](@ref). This is particularly useful to convert a duration, e.g.,
+a difference of two `DateTime`, into a more convenient representation.
+
+```jldoctest
+julia> cp = Dates.CompoundPeriod(Day(1),Minute(1))
+1 day, 1 minute
+
+julia> t1 = DateTime(2018,8,8,16,58,00)
+2018-08-08T16:58:00
+
+julia> t2 = DateTime(2021,6,23,10,00,00)
+2021-06-23T10:00:00
+
+julia> canonicalize(t2-t1) # creates a CompoundPeriod
+149 weeks, 6 days, 17 hours, 2 minutes
+```
+
 ## Rounding
 
 [`Date`](@ref) and [`DateTime`](@ref) values can be rounded to a specified resolution (e.g., 1
@@ -784,6 +804,7 @@ Dates.toprev(::Function, ::Dates.TimeType)
 ```@docs
 Dates.Period(::Any)
 Dates.CompoundPeriod(::Vector{<:Dates.Period})
+Dates.canonicalize
 Dates.value
 Dates.default
 Dates.periods
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index 22c792cb2f333..61df01302521b 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -357,6 +357,9 @@ function Base.string(x::CompoundPeriod)
 end
 Base.show(io::IO,x::CompoundPeriod) = print(io, string(x))
 
+Base.convert(::Type{T}, x::CompoundPeriod) where T<:Period =
+    isconcretetype(T) ? sum(T, x.periods) : throw(MethodError(convert,(T,x)))
+
 # E.g. Year(1) + Day(1)
 (+)(x::Period,y::Period) = CompoundPeriod(Period[x, y])
 (+)(x::CompoundPeriod, y::Period) = CompoundPeriod(vcat(x.periods, y))
diff --git a/stdlib/Dates/test/arithmetic.jl b/stdlib/Dates/test/arithmetic.jl
index a1a6884d4b0c3..485fea5624066 100644
--- a/stdlib/Dates/test/arithmetic.jl
+++ b/stdlib/Dates/test/arithmetic.jl
@@ -508,4 +508,13 @@ end
     end
 end
 
+@testset "Diff of dates" begin
+    for t ∈ [Day, Week, Hour, Minute]
+        a = DateTime(2021,1,1):t(1):DateTime(2021,2,1)
+        d = diff(a)
+        @test d == diff(collect(a))
+        @test eltype(d) === typeof(a[1] - a[2])
+    end
+end
+
 end
diff --git a/stdlib/Dates/test/periods.jl b/stdlib/Dates/test/periods.jl
index 81aacd1a9e54b..3bb310be4ee84 100644
--- a/stdlib/Dates/test/periods.jl
+++ b/stdlib/Dates/test/periods.jl
@@ -519,5 +519,18 @@ end
     #Test combined Fixed and Other Periods
     @test (1m + 1d < 1m + 1s) == false
 end
+
+@testset "Convert CompoundPeriod to Period" begin
+    @test convert(Month, Year(1) + Month(1)) === Month(13)
+    @test convert(Second, Minute(1) + Second(30)) === Second(90)
+    @test convert(Minute, Minute(1) + Second(60)) === Minute(2)
+    @test convert(Millisecond, Minute(1) + Second(30)) === Millisecond(90_000)
+    @test_throws InexactError convert(Minute, Minute(1) + Second(30))
+    @test_throws MethodError convert(Month, Minute(1) + Second(30))
+    @test_throws MethodError convert(Second, Month(1) + Second(30))
+    @test_throws MethodError convert(Period, Minute(1) + Second(30))
+    @test_throws MethodError convert(Dates.FixedPeriod, Minute(1) + Second(30))
+end
+
 end
 
diff --git a/stdlib/Distributed/src/Distributed.jl b/stdlib/Distributed/src/Distributed.jl
index dd9101fa1b4ce..d428a6df0e683 100644
--- a/stdlib/Distributed/src/Distributed.jl
+++ b/stdlib/Distributed/src/Distributed.jl
@@ -84,15 +84,15 @@ function _require_callback(mod::Base.PkgId)
     end
 end
 
-const REF_ID = Ref(1)
-next_ref_id() = (id = REF_ID[]; REF_ID[] = id+1; id)
+const REF_ID = Threads.Atomic{Int}(1)
+next_ref_id() = Threads.atomic_add!(REF_ID, 1)
 
 struct RRID
     whence::Int
     id::Int
 
-    RRID() = RRID(myid(),next_ref_id())
-    RRID(whence, id) = new(whence,id)
+    RRID() = RRID(myid(), next_ref_id())
+    RRID(whence, id) = new(whence, id)
 end
 
 hash(r::RRID, h::UInt) = hash(r.whence, hash(r.id, h))
diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl
index 591ce3f850551..cea8258f36939 100644
--- a/stdlib/Distributed/src/cluster.jl
+++ b/stdlib/Distributed/src/cluster.jl
@@ -96,13 +96,13 @@ end
 mutable struct Worker
     id::Int
     msg_lock::Threads.ReentrantLock # Lock for del_msgs, add_msgs, and gcflag
-    del_msgs::Array{Any,1}
+    del_msgs::Array{Any,1} # XXX: Could del_msgs and add_msgs be Channels?
     add_msgs::Array{Any,1}
-    gcflag::Bool
+    @atomic gcflag::Bool
     state::WorkerState
-    c_state::Threads.Condition # wait for state changes, lock for state
-    ct_time::Float64           # creation time
-    conn_func::Any             # used to setup connections lazily
+    c_state::Condition      # wait for state changes
+    ct_time::Float64        # creation time
+    conn_func::Any          # used to setup connections lazily
 
     r_stream::IO
     w_stream::IO
@@ -134,7 +134,7 @@ mutable struct Worker
         if haskey(map_pid_wrkr, id)
             return map_pid_wrkr[id]
         end
-        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Threads.Condition(), time(), conn_func)
+        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Condition(), time(), conn_func)
         w.initialized = Event()
         register_worker(w)
         w
@@ -144,16 +144,12 @@ mutable struct Worker
 end
 
 function set_worker_state(w, state)
-    lock(w.c_state) do
-        w.state = state
-        notify(w.c_state; all=true)
-    end
+    w.state = state
+    notify(w.c_state; all=true)
 end
 
 function check_worker_state(w::Worker)
-    lock(w.c_state)
     if w.state === W_CREATED
-        unlock(w.c_state)
         if !isclusterlazy()
             if PGRP.topology === :all_to_all
                 # Since higher pids connect with lower pids, the remote worker
@@ -173,8 +169,6 @@ function check_worker_state(w::Worker)
             errormonitor(t)
             wait_for_conn(w)
         end
-    else
-        unlock(w.c_state)
     end
 end
 
@@ -193,25 +187,13 @@ function exec_conn_func(w::Worker)
 end
 
 function wait_for_conn(w)
-    lock(w.c_state)
     if w.state === W_CREATED
-        unlock(w.c_state)
         timeout =  worker_timeout() - (time() - w.ct_time)
         timeout <= 0 && error("peer $(w.id) has not connected to $(myid())")
 
-        T = Threads.@spawn begin
-            sleep($timeout)
-            lock(w.c_state) do
-                notify(w.c_state; all=true)
-            end
-        end
-        errormonitor(T)
-        lock(w.c_state) do
-            wait(w.c_state)
-            w.state === W_CREATED && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
-        end
-    else
-        unlock(w.c_state)
+        @async (sleep(timeout); notify(w.c_state; all=true))
+        wait(w.c_state)
+        w.state === W_CREATED && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
     end
     nothing
 end
@@ -506,10 +488,7 @@ function addprocs_locked(manager::ClusterManager; kwargs...)
         while true
             if isempty(launched)
                 istaskdone(t_launch) && break
-                @async begin
-                    sleep(1)
-                    notify(launch_ntfy)
-                end
+                @async (sleep(1); notify(launch_ntfy))
                 wait(launch_ntfy)
             end
 
@@ -662,12 +641,7 @@ function create_worker(manager, wconfig)
         # require the value of config.connect_at which is set only upon connection completion
         for jw in PGRP.workers
             if (jw.id != 1) && (jw.id < w.id)
-                # wait for wl to join
-                lock(jw.c_state) do
-                    if jw.state === W_CREATED
-                        wait(jw.c_state)
-                    end
-                end
+                (jw.state === W_CREATED) && wait(jw.c_state)
                 push!(join_list, jw)
             end
         end
@@ -690,12 +664,7 @@ function create_worker(manager, wconfig)
         end
 
         for wl in wlist
-            if wl.state === W_CREATED
-                # wait for wl to join
-                lock(wl.c_state) do
-                   wait(wl.c_state)
-                end
-            end
+            (wl.state === W_CREATED) && wait(wl.c_state)
             push!(join_list, wl)
         end
     end
@@ -712,11 +681,7 @@ function create_worker(manager, wconfig)
     @async manage(w.manager, w.id, w.config, :register)
     # wait for rr_ntfy_join with timeout
     timedout = false
-    @async begin
-        sleep($timeout)
-        timedout = true
-        put!(rr_ntfy_join, 1)
-    end
+    @async (sleep($timeout); timedout = true; put!(rr_ntfy_join, 1))
     wait(rr_ntfy_join)
     if timedout
         error("worker did not connect within $timeout seconds")
diff --git a/stdlib/Distributed/src/macros.jl b/stdlib/Distributed/src/macros.jl
index 24a24f4c08ed4..0a62fdd5439f0 100644
--- a/stdlib/Distributed/src/macros.jl
+++ b/stdlib/Distributed/src/macros.jl
@@ -343,6 +343,9 @@ macro distributed(args...)
     var = loop.args[1].args[1]
     r = loop.args[1].args[2]
     body = loop.args[2]
+    if Meta.isexpr(body, :block) && body.args[end] isa LineNumberNode
+        resize!(body.args, length(body.args) - 1)
+    end
     if na==1
         syncvar = esc(Base.sync_varname)
         return quote
diff --git a/stdlib/Distributed/src/managers.jl b/stdlib/Distributed/src/managers.jl
index 5b4f016c63a78..91a27aa95cb98 100644
--- a/stdlib/Distributed/src/managers.jl
+++ b/stdlib/Distributed/src/managers.jl
@@ -163,7 +163,7 @@ function launch(manager::SSHManager, params::Dict, launched::Array, launch_ntfy:
     # Wait for all launches to complete.
     @sync for (i, (machine, cnt)) in enumerate(manager.machines)
         let machine=machine, cnt=cnt
-            @async try
+             @async try
                 launch_on_machine(manager, $machine, $cnt, params, launched, launch_ntfy)
             catch e
                 print(stderr, "exception launching on machine $(machine) : $(e)\n")
@@ -183,7 +183,7 @@ function parse_machine(machine::AbstractString)
 
     if machine[begin] == '['  # ipv6 bracket notation (RFC 2732)
         ipv6_end = findlast(']', machine)
-        if ipv6_end == nothing
+        if ipv6_end === nothing
             throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine_def\""))
         end
         hoststr = machine[begin+1 : prevind(machine,ipv6_end)]
@@ -201,7 +201,7 @@ function parse_machine(machine::AbstractString)
         portstr = machine_def[2]
 
         portnum = tryparse(Int, portstr)
-        if portnum == nothing
+        if portnum === nothing
             msg = "invalid machine definition format string: invalid port format \"$machine_def\""
             throw(ArgumentError(msg))
         end
diff --git a/stdlib/Distributed/src/messages.jl b/stdlib/Distributed/src/messages.jl
index fcba709b4db4a..fe3e5ab90b028 100644
--- a/stdlib/Distributed/src/messages.jl
+++ b/stdlib/Distributed/src/messages.jl
@@ -126,21 +126,30 @@ function flush_gc_msgs(w::Worker)
     if !isdefined(w, :w_stream)
         return
     end
-    lock(w.msg_lock) do
-        w.gcflag || return # early exit if someone else got to this
-        w.gcflag = false
-        msgs = w.add_msgs
-        w.add_msgs = Any[]
-        if !isempty(msgs)
-            remote_do(add_clients, w, msgs)
+    add_msgs = nothing
+    del_msgs = nothing
+    @lock w.msg_lock begin
+        if !w.gcflag # No work needed for this worker
+            return
+        end
+        @atomic w.gcflag = false
+        if !isempty(w.add_msgs)
+            add_msgs = w.add_msgs
+            w.add_msgs = Any[]
         end
 
-        msgs = w.del_msgs
-        w.del_msgs = Any[]
-        if !isempty(msgs)
-            remote_do(del_clients, w, msgs)
+        if !isempty(w.del_msgs)
+            del_msgs = w.del_msgs
+            w.del_msgs = Any[]
         end
     end
+    if add_msgs !== nothing
+        remote_do(add_clients, w, add_msgs)
+    end
+    if del_msgs !== nothing
+        remote_do(del_clients, w, del_msgs)
+    end
+    return
 end
 
 # Boundary inserted between messages on the wire, used for recovering
@@ -185,7 +194,7 @@ end
 function flush_gc_msgs()
     try
         for w in (PGRP::ProcessGroup).workers
-            if isa(w,Worker) && w.gcflag && (w.state == W_CONNECTED)
+            if isa(w,Worker) && (w.state == W_CONNECTED) && w.gcflag
                 flush_gc_msgs(w)
             end
         end
diff --git a/stdlib/Distributed/src/process_messages.jl b/stdlib/Distributed/src/process_messages.jl
index 8d5dac5af571e..a093ffff01d34 100644
--- a/stdlib/Distributed/src/process_messages.jl
+++ b/stdlib/Distributed/src/process_messages.jl
@@ -57,7 +57,7 @@ function showerror(io::IO, re::RemoteException)
     showerror(io, re.captured)
 end
 
-function run_work_thunk(thunk, print_error)
+function run_work_thunk(thunk::Function, print_error::Bool)
     local result
     try
         result = thunk()
@@ -230,8 +230,8 @@ function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
             deregister_worker(wpid)
         end
 
-        isopen(r_stream) && close(r_stream)
-        isopen(w_stream) && close(w_stream)
+        close(r_stream)
+        close(w_stream)
 
         if (myid() == 1) && (wpid > 1)
             if oldstate != W_TERMINATING
@@ -271,11 +271,11 @@ function process_hdr(s, validate_cookie)
 end
 
 function handle_msg(msg::CallMsg{:call}, header, r_stream, w_stream, version)
-    schedule_call(header.response_oid, ()->msg.f(msg.args...; msg.kwargs...))
+    schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
 end
 function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, version)
     errormonitor(@async begin
-        v = run_work_thunk(()->msg.f(msg.args...; msg.kwargs...), false)
+        v = run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), false)
         if isa(v, SyncTake)
             try
                 deliver_result(w_stream, :call_fetch, header.notify_oid, v.v)
@@ -291,14 +291,14 @@ end
 
 function handle_msg(msg::CallWaitMsg, header, r_stream, w_stream, version)
     errormonitor(@async begin
-        rv = schedule_call(header.response_oid, ()->msg.f(msg.args...; msg.kwargs...))
+        rv = schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
         deliver_result(w_stream, :call_wait, header.notify_oid, fetch(rv.c))
         nothing
     end)
 end
 
 function handle_msg(msg::RemoteDoMsg, header, r_stream, w_stream, version)
-    errormonitor(@async run_work_thunk(()->msg.f(msg.args...; msg.kwargs...), true))
+    errormonitor(@async run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), true))
 end
 
 function handle_msg(msg::ResultMsg, header, r_stream, w_stream, version)
diff --git a/stdlib/Distributed/src/remotecall.jl b/stdlib/Distributed/src/remotecall.jl
index 5ac397656ce44..e314df589ae2a 100644
--- a/stdlib/Distributed/src/remotecall.jl
+++ b/stdlib/Distributed/src/remotecall.jl
@@ -26,12 +26,13 @@ mutable struct Future <: AbstractRemoteRef
     where::Int
     whence::Int
     id::Int
-    v::Union{Some{Any}, Nothing}
+    lock::ReentrantLock
+    @atomic v::Union{Some{Any}, Nothing}
 
     Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing) =
-        (r = new(w,rrid.whence,rrid.id,v); return test_existing_ref(r))
+        (r = new(w,rrid.whence,rrid.id,ReentrantLock(),v); return test_existing_ref(r))
 
-    Future(t::NTuple{4, Any}) = new(t[1],t[2],t[3],t[4])  # Useful for creating dummy, zeroed-out instances
+    Future(t::NTuple{4, Any}) = new(t[1],t[2],t[3],ReentrantLock(),t[4])  # Useful for creating dummy, zeroed-out instances
 end
 
 """
@@ -69,10 +70,17 @@ function test_existing_ref(r::AbstractRemoteRef)
     found = getkey(client_refs, r, nothing)
     if found !== nothing
         @assert r.where > 0
-        if isa(r, Future) && found.v === nothing && r.v !== nothing
-            # we have recd the value from another source, probably a deserialized ref, send a del_client message
-            send_del_client(r)
-            found.v = r.v
+        if isa(r, Future)
+            # this is only for copying the reference from Future to RemoteRef (just created)
+            fv_cache = @atomic :acquire found.v
+            rv_cache = @atomic :monotonic r.v
+            if fv_cache === nothing && rv_cache !== nothing
+                # we have recd the value from another source, probably a deserialized ref, send a del_client message
+                send_del_client(r)
+                @lock found.lock begin
+                    @atomicreplace found.v nothing => rv_cache
+                end
+            end
         end
         return found::typeof(r)
     end
@@ -84,20 +92,25 @@ end
 
 function finalize_ref(r::AbstractRemoteRef)
     if r.where > 0 # Handle the case of the finalizer having been called manually
-        if islocked(client_refs)
-            # delay finalizer for later, when it's not already locked
+        if trylock(client_refs.lock) # trylock doesn't call wait which causes yields
+            try
+                delete!(client_refs.ht, r) # direct removal avoiding locks
+                if isa(r, RemoteChannel)
+                    send_del_client_no_lock(r)
+                else
+                    # send_del_client only if the reference has not been set
+                    v_cache = @atomic :monotonic r.v
+                    v_cache === nothing && send_del_client_no_lock(r)
+                    @atomic :monotonic r.v = nothing
+                end
+                r.where = 0
+            finally
+                unlock(client_refs.lock)
+            end
+        else
             finalizer(finalize_ref, r)
             return nothing
         end
-        delete!(client_refs, r)
-        if isa(r, RemoteChannel)
-            send_del_client(r)
-        else
-            # send_del_client only if the reference has not been set
-            r.v === nothing && send_del_client(r)
-            r.v = nothing
-        end
-        r.where = 0
     end
     nothing
 end
@@ -197,7 +210,8 @@ isready(f)  # will not block
 ```
 """
 function isready(rr::Future)
-    rr.v === nothing || return true
+    v_cache = @atomic rr.v
+    v_cache === nothing || return true
 
     rid = remoteref_id(rr)
     return if rr.where == myid()
@@ -229,13 +243,18 @@ del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
 del_client(id, client) = del_client(PGRP, id, client)
 function del_client(pg, id, client)
     lock(client_refs) do
-        rv = get(pg.refs, id, false)
-        if rv !== false
-            delete!(rv.clientset, client)
-            if isempty(rv.clientset)
-                delete!(pg.refs, id)
-                #print("$(myid()) collected $id\n")
-            end
+        _del_client(pg, id, client)
+    end
+    nothing
+end
+
+function _del_client(pg, id, client)
+    rv = get(pg.refs, id, false)
+    if rv !== false
+        delete!(rv.clientset, client)
+        if isempty(rv.clientset)
+            delete!(pg.refs, id)
+            #print("$(myid()) collected $id\n")
         end
     end
     nothing
@@ -258,9 +277,13 @@ function start_gc_msgs_task()
         Threads.@spawn begin
             while true
                 lock(any_gc_flag) do
+                    # this might miss events
                     wait(any_gc_flag)
-                    flush_gc_msgs() # handles throws internally
                 end
+                # Use invokelatest() so that custom message transport streams
+                # for workers can be defined in a newer world age than the Task
+                # which runs the loop here.
+                invokelatest(flush_gc_msgs) # handles throws internally
             end
         end
     )
@@ -271,21 +294,42 @@ function send_del_client(rr)
     if rr.where == myid()
         del_client(rr)
     elseif id_in_procs(rr.where) # process only if a valid worker
-        w = worker_from_id(rr.where)::Worker
-        msg = (remoteref_id(rr), myid())
-        # We cannot acquire locks from finalizers
-        Threads.@spawn begin
-            lock(w.msg_lock) do
-                push!(w.del_msgs, msg)
-                w.gcflag = true
-            end
-            lock(any_gc_flag) do
-                notify(any_gc_flag)
-            end
-        end
+        process_worker(rr)
+    end
+end
+
+function send_del_client_no_lock(rr)
+    # for gc context to avoid yields
+    if rr.where == myid()
+        _del_client(PGRP, remoteref_id(rr), myid())
+    elseif id_in_procs(rr.where) # process only if a valid worker
+        process_worker(rr)
     end
 end
 
+function publish_del_msg!(w::Worker, msg)
+    lock(w.msg_lock) do
+        push!(w.del_msgs, msg)
+        @atomic w.gcflag = true
+    end
+    lock(any_gc_flag) do
+        notify(any_gc_flag)
+    end
+end
+
+function process_worker(rr)
+    w = worker_from_id(rr.where)::Worker
+    msg = (remoteref_id(rr), myid())
+
+    # Needs to aquire a lock on the del_msg queue
+    T = Threads.@spawn begin
+        publish_del_msg!($w, $msg)
+    end
+    Base.errormonitor(T)
+
+    return
+end
+
 function add_client(id, client)
     lock(client_refs) do
         rv = lookup_ref(id)
@@ -310,7 +354,7 @@ function send_add_client(rr::AbstractRemoteRef, i)
         w = worker_from_id(rr.where)
         lock(w.msg_lock) do
             push!(w.add_msgs, (remoteref_id(rr), i))
-            w.gcflag = true
+            @atomic w.gcflag = true
         end
         lock(any_gc_flag) do
             notify(any_gc_flag)
@@ -320,26 +364,33 @@ end
 
 channel_type(rr::RemoteChannel{T}) where {T} = T
 
-serialize(s::ClusterSerializer, f::Future) = serialize(s, f, f.v === nothing)
-serialize(s::ClusterSerializer, rr::RemoteChannel) = serialize(s, rr, true)
-function serialize(s::ClusterSerializer, rr::AbstractRemoteRef, addclient)
-    if addclient
+function serialize(s::ClusterSerializer, f::Future)
+    v_cache = @atomic f.v
+    if v_cache === nothing
         p = worker_id_from_socket(s.io)
-        (p !== rr.where) && send_add_client(rr, p)
+        (p !== f.where) && send_add_client(f, p)
     end
+    fc = Future((f.where, f.whence, f.id, v_cache)) # copy to be used for serialization (contains a reset lock)
+    invoke(serialize, Tuple{ClusterSerializer, Any}, s, fc)
+end
+
+function serialize(s::ClusterSerializer, rr::RemoteChannel)
+    p = worker_id_from_socket(s.io)
+    (p !== rr.where) && send_add_client(rr, p)
     invoke(serialize, Tuple{ClusterSerializer, Any}, s, rr)
 end
 
 function deserialize(s::ClusterSerializer, t::Type{<:Future})
-    f = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t)
-    f2 = Future(f.where, RRID(f.whence, f.id), f.v) # ctor adds to client_refs table
+    fc = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t) # deserialized copy
+    f2 = Future(fc.where, RRID(fc.whence, fc.id), fc.v) # ctor adds to client_refs table
 
     # 1) send_add_client() is not executed when the ref is being serialized
     #    to where it exists, hence do it here.
     # 2) If we have received a 'fetch'ed Future or if the Future ctor found an
     #    already 'fetch'ed instance in client_refs (Issue #25847), we should not
     #    track it in the backing RemoteValue store.
-    if f2.where == myid() && f2.v === nothing
+    f2v_cache = @atomic f2.v
+    if f2.where == myid() && f2v_cache === nothing
         add_client(remoteref_id(f2), myid())
     end
     f2
@@ -372,10 +423,7 @@ end
 # make a thunk to call f on args in a way that simulates what would happen if
 # the function were sent elsewhere
 function local_remotecall_thunk(f, args, kwargs)
-    if isempty(args) && isempty(kwargs)
-        return f
-    end
-    return ()->f(args...; kwargs...)
+    return ()->invokelatest(f, args...; kwargs...)
 end
 
 function remotecall(f, w::LocalProcess, args...; kwargs...)
@@ -536,7 +584,7 @@ end
 
 Wait for a value to become available for the specified [`Future`](@ref).
 """
-wait(r::Future) = (r.v !== nothing && return r; call_on_owner(wait_ref, r, myid()); r)
+wait(r::Future) = (v_cache = @atomic r.v; v_cache !== nothing && return r; call_on_owner(wait_ref, r, myid()); r)
 
 """
     wait(r::RemoteChannel, args...)
@@ -553,11 +601,41 @@ Further calls to `fetch` on the same reference return the cached value. If the r
 is an exception, throws a [`RemoteException`](@ref) which captures the remote exception and backtrace.
 """
 function fetch(r::Future)
-    r.v !== nothing && return something(r.v)
-    v = call_on_owner(fetch_ref, r)
-    r.v = Some(v)
+    v_cache = @atomic r.v
+    v_cache !== nothing && return something(v_cache)
+
+    if r.where == myid()
+        rv, v_cache = @lock r.lock begin
+            v_cache = @atomic :monotonic r.v
+            rv = v_cache === nothing ? lookup_ref(remoteref_id(r)) : nothing
+            rv, v_cache
+        end
+
+        if v_cache !== nothing
+            return something(v_cache)
+        else
+            v_local = fetch(rv.c)
+        end
+    else
+        v_local = call_on_owner(fetch_ref, r)
+    end
+
+    v_cache = @atomic r.v
+
+    if v_cache === nothing # call_on_owner case
+        v_old, status = @lock r.lock begin
+            @atomicreplace r.v nothing => Some(v_local)
+        end
+        # status == true - when value obtained through call_on_owner
+        # status == false - any other situation: atomicreplace fails, because by the time the lock is obtained cache will be populated
+        # why? local put! performs caching and putting into channel under r.lock
+
+        # for local put! use the cached value, for call_on_owner cases just take the v_local as it was just cached in r.v
+        v_cache = status ? v_local : v_old
+    end
+
     send_del_client(r)
-    v
+    something(v_cache)
 end
 
 fetch_ref(rid, args...) = fetch(lookup_ref(rid).c, args...)
@@ -581,12 +659,30 @@ A `put!` on an already set `Future` throws an `Exception`.
 All asynchronous remote calls return `Future`s and set the
 value to the return value of the call upon completion.
 """
-function put!(rr::Future, v)
-    rr.v !== nothing && error("Future can be set only once")
-    call_on_owner(put_future, rr, v, myid())
-    rr.v = Some(v)
-    rr
+function put!(r::Future, v)
+    if r.where == myid()
+        rid = remoteref_id(r)
+        rv = lookup_ref(rid)
+        isready(rv) && error("Future can be set only once")
+        @lock r.lock begin
+            put!(rv, v) # this notifies the tasks waiting on the channel in fetch
+            set_future_cache(r, v) # set the cache before leaving the lock, so that the notified tasks already see it cached
+        end
+        del_client(rid, myid())
+    else
+        @lock r.lock begin # same idea as above if there were any local tasks fetching on this Future
+            call_on_owner(put_future, r, v, myid())
+            set_future_cache(r, v)
+        end
+    end
+    r
 end
+
+function set_future_cache(r::Future, v)
+    _, ok = @atomicreplace r.v nothing => Some(v)
+    ok || error("internal consistency error detected for Future")
+end
+
 function put_future(rid, v, caller)
     rv = lookup_ref(rid)
     isready(rv) && error("Future can be set only once")
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
index 3b99afac8cc15..fd85bca0ca511 100644
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ b/stdlib/Distributed/test/distributed_exec.jl
@@ -132,6 +132,39 @@ end
 testf(id_me)
 testf(id_other)
 
+function poll_while(f::Function; timeout_seconds::Integer = 120)
+    start_time = time_ns()
+    while f()
+        sleep(1)
+        if ( ( time_ns() - start_time )/1e9 ) > timeout_seconds
+            @error "Timed out" timeout_seconds
+            return false
+        end
+    end
+    return true
+end
+
+function _getenv_include_thread_unsafe()
+    environment_variable_name = "JULIA_TEST_INCLUDE_THREAD_UNSAFE"
+    default_value = "false"
+    environment_variable_value = strip(get(ENV, environment_variable_name, default_value))
+    b = parse(Bool, environment_variable_value)::Bool
+    return b
+end
+const _env_include_thread_unsafe = _getenv_include_thread_unsafe()
+function include_thread_unsafe()
+    if Threads.nthreads() > 1
+        if _env_include_thread_unsafe
+            return true
+        end
+        msg = "Skipping a thread-unsafe test because `Threads.nthreads() > 1`"
+        @warn msg Threads.nthreads()
+        Test.@test_broken false
+        return false
+    end
+    return true
+end
+
 # Distributed GC tests for Futures
 function test_futures_dgc(id)
     f = remotecall(myid, id)
@@ -143,8 +176,7 @@ function test_futures_dgc(id)
     @test fetch(f) == id
     @test f.v !== nothing
     yield(); # flush gc msgs
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == false
-
+    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
 
     # if unfetched, it should be deleted after a finalize
     f = remotecall(myid, id)
@@ -153,7 +185,7 @@ function test_futures_dgc(id)
     @test f.v === nothing
     finalize(f)
     yield(); # flush gc msgs
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == false
+    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
 end
 
 test_futures_dgc(id_me)
@@ -243,7 +275,7 @@ function test_remoteref_dgc(id)
     @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
     finalize(rr)
     yield(); # flush gc msgs
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == false
+    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid))
 end
 test_remoteref_dgc(id_me)
 test_remoteref_dgc(id_other)
@@ -256,10 +288,14 @@ let wid1 = workers()[1],
     fstore = RemoteChannel(wid2)
 
     put!(fstore, rr)
-    @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    if include_thread_unsafe()
+        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    end
     finalize(rr) # finalize locally
     yield() # flush gc msgs
-    @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    if include_thread_unsafe()
+        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    end
     remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
     sleep(0.5) # to ensure that wid2 messages have been executed on wid1
     @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == false
@@ -314,6 +350,9 @@ function test_regular_io_ser(ref::Distributed.AbstractRemoteRef)
         v = getfield(ref2, fld)
         if isa(v, Number)
             @test v === zero(typeof(v))
+        elseif fld == :lock
+            @test v isa ReentrantLock
+            @test !islocked(v)
         elseif v !== nothing
             error(string("Add test for field ", fld))
         end
@@ -1031,7 +1070,6 @@ function test_add_procs_threaded_blas()
         @warn "Skipping blas num threads tests due to unsupported blas version"
         return
     end
-    @test master_blas_thread_count <= 8 # check that Base set the environment variable in __init__ before LinearAlgebra dlopen'd it
 
     # Test with default enable_threaded_blas false
     processes_added = addprocs_with_testenv(2)
@@ -1696,5 +1734,4 @@ include("splitrange.jl")
 # Run topology tests last after removing all workers, since a given
 # cluster at any time only supports a single topology.
 rmprocs(workers())
-include("threads.jl")
 include("topology.jl")
diff --git a/stdlib/Distributed/test/threads.jl b/stdlib/Distributed/test/threads.jl
deleted file mode 100644
index 57d99b7ea056c..0000000000000
--- a/stdlib/Distributed/test/threads.jl
+++ /dev/null
@@ -1,63 +0,0 @@
-using Test
-using Distributed, Base.Threads
-using Base.Iterators: product
-
-exeflags = ("--startup-file=no",
-            "--check-bounds=yes",
-            "--depwarn=error",
-            "--threads=2")
-
-function call_on(f, wid, tid)
-    remotecall(wid) do
-        t = Task(f)
-        ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid - 1)
-        schedule(t)
-        @assert threadid(t) == tid
-        t
-    end
-end
-
-# Run function on process holding the data to only serialize the result of f.
-# This becomes useful for things that cannot be serialized (e.g. running tasks)
-# or that would be unnecessarily big if serialized.
-fetch_from_owner(f, rr) = remotecall_fetch(f ∘ fetch, rr.where, rr)
-
-isdone(rr) = fetch_from_owner(istaskdone, rr)
-isfailed(rr) = fetch_from_owner(istaskfailed, rr)
-
-@testset "RemoteChannel allows put!/take! from thread other than 1" begin
-    ws = ts = product(1:2, 1:2)
-    @testset "from worker $w1 to $w2 via 1" for (w1, w2) in ws
-        @testset "from thread $w1.$t1 to $w2.$t2" for (t1, t2) in ts
-          # We want (the default) lazyness, so that we wait for `Worker.c_state`!
-            procs_added = addprocs(2; exeflags, lazy=true)
-            @everywhere procs_added using Base.Threads
-
-            p1 = procs_added[w1]
-            p2 = procs_added[w2]
-            chan_id = first(procs_added)
-            chan = RemoteChannel(chan_id)
-            send = call_on(p1, t1) do
-                put!(chan, nothing)
-            end
-            recv = call_on(p2, t2) do
-                take!(chan)
-            end
-
-            # Wait on the spawned tasks on the owner
-            @sync begin
-                Threads.@spawn fetch_from_owner(wait, recv)
-                Threads.@spawn fetch_from_owner(wait, send)
-            end
-
-            # Check the tasks
-            @test isdone(send)
-            @test isdone(recv)
-
-            @test !isfailed(send)
-            @test !isfailed(recv)
-
-            rmprocs(procs_added)
-        end
-    end
-end
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index 8ac1bc22f228a..4220d3288fda1 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,2 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = cd002c3c6936d144ae668d70e18337931706c63a
+DOWNLOADS_SHA1 = a55825b0815fb22d0e3fe47903a180e9ffa23eab
+DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
+DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index d929d1ebfb98d..fd26b62132047 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -21,15 +21,17 @@ export
 import Base: @handle_as, wait, close, eventloop, notify_error, IOError,
     _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
     iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct,
-    preserve_handle, unpreserve_handle, isreadable, iswritable, |
+    preserve_handle, unpreserve_handle, isreadable, iswritable, isopen,
+    |, getproperty, propertynames
 import Base.Filesystem.StatStruct
 if Sys.iswindows()
     import Base.WindowsRawSocket
 end
 
+
 # libuv file watching event flags
-const UV_RENAME = 1
-const UV_CHANGE = 2
+const UV_RENAME = Int32(1)
+const UV_CHANGE = Int32(2)
 struct FileEvent
     renamed::Bool
     changed::Bool
@@ -45,30 +47,33 @@ FileEvent(flags::Integer) = FileEvent((flags & UV_RENAME) != 0,
               a.changed | b.changed,
               a.timedout | b.timedout)
 
+# libuv file descriptor event flags
+const UV_READABLE = Int32(1)
+const UV_WRITABLE = Int32(2)
+const UV_DISCONNECT = Int32(4)
+const UV_PRIORITIZED = Int32(8)
 struct FDEvent
-    readable::Bool
-    writable::Bool
-    disconnect::Bool
-    timedout::Bool
-    FDEvent(r::Bool, w::Bool, d::Bool, t::Bool) = new(r, w, d, t)
+    events::Int32
+    FDEvent(flags::Integer=0) = new(flags)
 end
-# libuv file descriptor event flags
-const UV_READABLE = 1
-const UV_WRITABLE = 2
-const UV_DISCONNECT = 4
+
+FDEvent(r::Bool, w::Bool, d::Bool, t::Bool) = FDEvent((UV_READABLE * r) | (UV_WRITABLE * w) | (UV_DISCONNECT * d)) # deprecated method
+
+function getproperty(f::FDEvent, field::Symbol)
+    events = getfield(f, :events)
+    field === :readable && return (events & UV_READABLE) != 0
+    field === :writable && return (events & UV_WRITABLE) != 0
+    field === :disconnect && return (events & UV_DISCONNECT) != 0
+    field === :prioritized && return (events & UV_PRIORITIZED) != 0
+    field === :timedout && return events == 0
+    field === :events && return Int(events)
+    getfield(f, field)::Union{}
+end
+propertynames(f::FDEvent) = (:readable, :writable, :disconnect, :prioritized, :timedout, :events)
 
 isreadable(f::FDEvent) = f.readable
 iswritable(f::FDEvent) = f.writable
-FDEvent() = FDEvent(false, false, false, true)
-FDEvent(flags::Integer) = FDEvent((flags & UV_READABLE) != 0,
-                                  (flags & UV_WRITABLE) != 0,
-                                  (flags & UV_DISCONNECT) != 0,
-                                  false)
-|(a::FDEvent, b::FDEvent) =
-    FDEvent(a.readable | b.readable,
-            a.writable | b.writable,
-            a.disconnect | b.disconnect,
-            a.timedout | b.timedout)
+|(a::FDEvent, b::FDEvent) = FDEvent(getfield(a, :events) | getfield(b, :events))
 
 mutable struct FileMonitor
     handle::Ptr{Cvoid}
@@ -93,15 +98,13 @@ mutable struct FileMonitor
     end
 end
 
-
 mutable struct FolderMonitor
     handle::Ptr{Cvoid}
     notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError}
-    open::Bool
     FolderMonitor(folder::AbstractString) = FolderMonitor(String(folder))
     function FolderMonitor(folder::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
-        this = new(handle, Channel(Inf), false)
+        this = new(handle, Channel(Inf))
         associate_julia_struct(handle, this)
         iolock_begin()
         err = ccall(:uv_fs_event_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
@@ -109,7 +112,6 @@ mutable struct FolderMonitor
             Libc.free(handle)
             throw(_UVError("FolderMonitor", err))
         end
-        this.open = true
         finalizer(uvfinalize, this)
         uv_error("FolderMonitor (start)",
                  ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
@@ -152,9 +154,10 @@ mutable struct _FDWatcher
     events::Int32
     active::Tuple{Bool, Bool}
 
-    let FDWatchers = Vector{Any}()
+    let FDWatchers = Vector{Any}() # XXX: this structure and refcount need thread-safety locks
         global _FDWatcher, uvfinalize
         @static if Sys.isunix()
+            _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
             function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
                 if !readable && !writable
                     throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
@@ -181,7 +184,7 @@ mutable struct _FDWatcher
                     fdnum,
                     (Int(readable), Int(writable)),
                     Base.ThreadSynchronizer(),
-                    0,
+                    Int32(0),
                     (false, false))
                 associate_julia_struct(handle, this)
                 err = ccall(:uv_poll_init, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, RawFD), eventloop(), handle, fd)
@@ -212,7 +215,7 @@ mutable struct _FDWatcher
                         FDWatchers[t.fdnum] = nothing
                     end
                 end
-                notify(t.notify, FDEvent())
+                notify(t.notify, Int32(0))
             finally
                 unlock(t.notify)
             end
@@ -222,10 +225,12 @@ mutable struct _FDWatcher
     end
 
     @static if Sys.iswindows()
+        _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
             handle = Libc._get_osfhandle(fd)
             return _FDWatcher(handle, readable, writable)
         end
+        _FDWatcher(fd::WindowsRawSocket, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::WindowsRawSocket, readable::Bool, writable::Bool)
             if !readable && !writable
                 throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
@@ -254,30 +259,39 @@ mutable struct _FDWatcher
     end
 end
 
-function iswaiting(fwd::_FDWatcher, t::Task)
-    return fwd.notify.waitq === t.queue
-end
-
 mutable struct FDWatcher
-    watcher::_FDWatcher
-    readable::Bool
-    writable::Bool
     # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd`
+    watcher::_FDWatcher
+    mask::FDEvent
     function FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
-        this = new(_FDWatcher(fd, readable, writable), readable, writable)
+        return FDWatcher(fd, FDEvent(readable, writable, false, false))
+    end
+    function FDWatcher(fd::RawFD, mask::FDEvent)
+        this = new(_FDWatcher(fd, mask), mask)
         finalizer(close, this)
         return this
     end
     @static if Sys.iswindows()
         function FDWatcher(fd::WindowsRawSocket, readable::Bool, writable::Bool)
-            this = new(_FDWatcher(fd, readable, writable), readable, writable)
+            return FDWatcher(fd, FDEvent(readable, writable, false, false))
+        end
+        function FDWatcher(fd::WindowsRawSocket, mask::FDEvent)
+            this = new(_FDWatcher(fd, mask), mask)
             finalizer(close, this)
             return this
         end
     end
 end
 
+function getproperty(fdw::FDWatcher, s::Symbol)
+    # support deprecated field names
+    s === :readable && return fdw.mask.readable
+    s === :writable && return fdw.mask.writable
+    return getfield(fdw, s)
+end
+
 
+close(t::_FDWatcher, mask::FDEvent) = close(t, mask.readable, mask.writable)
 function close(t::_FDWatcher, readable::Bool, writable::Bool)
     iolock_begin()
     if t.refcount != (0, 0)
@@ -285,15 +299,17 @@ function close(t::_FDWatcher, readable::Bool, writable::Bool)
     end
     if t.refcount == (0, 0)
         uvfinalize(t)
+    else
+        @lock t.notify notify(t.notify, Int32(0))
     end
     iolock_end()
     nothing
 end
 
 function close(t::FDWatcher)
-    r, w = t.readable, t.writable
-    t.readable = t.writable = false
-    close(t.watcher, r, w)
+    mask = t.mask
+    t.mask = FDEvent()
+    close(t.watcher, mask)
 end
 
 function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
@@ -339,12 +355,17 @@ function _uv_hook_close(uv::FileMonitor)
 end
 
 function _uv_hook_close(uv::FolderMonitor)
-    uv.open = false
     uv.handle = C_NULL
     close(uv.notify)
     nothing
 end
 
+isopen(fm::FileMonitor) = fm.handle != C_NULL
+isopen(fm::FolderMonitor) = fm.handle != C_NULL
+isopen(pfw::PollingFileWatcher) = pfw.handle != C_NULL
+isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0)
+isopen(pfw::FDWatcher) = !pfw.mask.timedout
+
 function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
     t = @handle_as handle FileMonitor
     lock(t.notify)
@@ -388,7 +409,7 @@ function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
                     ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
                 end
             end
-            notify(t.notify, FDEvent(events))
+            notify(t.notify, events)
         end
     finally
         unlock(t.notify)
@@ -499,67 +520,78 @@ function stop_watching(t::FileMonitor)
     nothing
 end
 
-function wait(fdw::FDWatcher)
-    GC.@preserve fdw begin
-        return wait(fdw.watcher, readable = fdw.readable, writable = fdw.writable)
-    end
-end
-
-function wait(fdw::_FDWatcher; readable=true, writable=true)
-    events = FDEvent(Int32(0))
+# n.b. this _wait may return spuriously early with a timedout event
+function _wait(fdw::_FDWatcher, mask::FDEvent)
     iolock_begin()
     preserve_handle(fdw)
     lock(fdw.notify)
     try
-        while true
-            haveevent = false
-            events |= FDEvent(fdw.events)
-            if readable && isreadable(events)
-                fdw.events &= ~UV_READABLE
-                haveevent = true
-            end
-            if writable && iswritable(events)
-                fdw.events &= ~UV_WRITABLE
-                haveevent = true
-            end
-            if haveevent
-                break
-            end
-            if fdw.refcount == (0, 0) # !open
-                throw(EOFError())
-            else
-                start_watching(fdw) # make sure the poll is active
-                iolock_end()
-                events = wait(fdw.notify)::FDEvent
-                unlock(fdw.notify)
-                iolock_begin()
-                lock(fdw.notify)
-            end
+        events = FDEvent(fdw.events & mask.events)
+        if !isopen(fdw) # !open
+            throw(EOFError())
+        elseif events.timedout
+            start_watching(fdw) # make sure the poll is active
+            iolock_end()
+            return FDEvent(wait(fdw.notify)::Int32)
+        else
+            iolock_end()
+            return events
         end
     finally
         unlock(fdw.notify)
         unpreserve_handle(fdw)
     end
-    iolock_end()
-    return events
 end
 
-function wait(fd::RawFD; readable=false, writable=false)
-    fdw = _FDWatcher(fd, readable, writable)
+function wait(fdw::_FDWatcher; readable=true, writable=true)
+    return wait(fdw, FDEvent(readable, writable, false, false))
+end
+function wait(fdw::_FDWatcher, mask::FDEvent)
+    while true
+        mask.timedout && return mask
+        events = _wait(fdw, mask)
+        if !events.timedout
+            @lock fdw.notify fdw.events &= ~events.events
+            return events
+        end
+    end
+end
+
+function wait(fdw::FDWatcher)
+    isopen(fdw) || throw(EOFError())
+    while true
+        events = GC.@preserve fdw _wait(fdw.watcher, fdw.mask)
+        isopen(fdw) || throw(EOFError())
+        if !events.timedout
+            @lock fdw.watcher.notify fdw.watcher.events &= ~events.events
+            return events
+        end
+    end
+end
+
+function wait(socket::RawFD; readable=false, writable=false)
+    return wait(socket, FDEvent(readable, writable, false, false))
+end
+function wait(fd::RawFD, mask::FDEvent)
+    fdw = _FDWatcher(fd, mask)
     try
-        return wait(fdw, readable=readable, writable=writable)
+        return wait(fdw, mask)
     finally
-        close(fdw, readable, writable)
+        close(fdw, mask)
     end
 end
 
+
 if Sys.iswindows()
     function wait(socket::WindowsRawSocket; readable=false, writable=false)
-        fdw = _FDWatcher(socket, readable, writable)
+        return wait(socket, FDEvent(readable, writable, false, false))
+    end
+    function wait(socket::WindowsRawSocket, mask::FDEvent)
+        fdw = _FDWatcher(socket, mask)
         try
-            return wait(fdw, readable=readable, writable=writable)
+            return wait(fdw, mask)
         finally
-            close(fdw, readable, writable)
+            close(fdw, mask)
         end
     end
 end
@@ -651,38 +683,44 @@ The returned value is an object with boolean fields `readable`, `writable`, and
 giving the result of the polling.
 """
 function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, timeout_s::Real=-1; readable=false, writable=false)
-    wt = Condition()
-    fdw = _FDWatcher(s, readable, writable)
+    mask = FDEvent(readable, writable, false, false)
+    mask.timedout && return mask
+    fdw = _FDWatcher(s, mask)
     local timer
+    timedout = false # TODO: make this atomic
     try
         if timeout_s >= 0
-            result::FDEvent = FDEvent()
-            t = @async begin
-                timer = Timer(timeout_s) do t
-                    notify(wt)
-                end
-                try
-                    result = wait(fdw, readable=readable, writable=writable)
-                catch e
-                    notify_error(wt, e)
-                    return
+            # delay creating the timer until shortly before we start the poll wait
+            timer = Timer(timeout_s) do t
+                timedout && return
+                timedout = true
+                close(fdw, mask)
+            end
+            try
+                while true
+                    events = _wait(fdw, mask)
+                    if timedout || !events.timedout
+                        @lock fdw.notify fdw.events &= ~events.events
+                        return events
+                    end
                 end
-                notify(wt)
+            catch ex
+                ex isa EOFError() || rethrow()
+                return FDEvent()
             end
-            wait(wt)
-            # It's possible that both the timer and the poll fired on the same
-            # libuv loop. In that case, which event we see here first depends
-            # on task schedule order. If we can see that the task isn't waiting
-            # on the file watcher anymore, just let it finish so we can see
-            # the modification to `result`
-            iswaiting(fdw, t) || wait(t)
-            return result
         else
-            return wait(fdw, readable=readable, writable=writable)
+            return wait(fdw, mask)
         end
     finally
-        close(fdw, readable, writable)
-        @isdefined(timer) && close(timer)
+        if @isdefined(timer)
+            if !timedout
+                timedout = true
+                close(timer)
+                close(fdw, mask)
+            end
+        else
+            close(fdw, mask)
+        end
     end
 end
 
diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl
index 345ffce07482f..f302f28295a01 100644
--- a/stdlib/FileWatching/test/runtests.jl
+++ b/stdlib/FileWatching/test/runtests.jl
@@ -22,7 +22,7 @@ for i in 1:n
         uv_error("pipe", ccall(:uv_pipe, Cint, (Ptr{NTuple{2, Base.OS_HANDLE}}, Cint, Cint), Ref(pipe_fds, i), 0, 0))
     end
     Ctype = Sys.iswindows() ? Ptr{Cvoid} : Cint
-    FDmax = Sys.iswindows() ? 0x7fff : (n + 60) # expectations on reasonable values
+    FDmax = Sys.iswindows() ? 0x7fff : (n + 60 + (isdefined(Main, :Revise) * 30)) # expectations on reasonable values
     fd_in_limits =
         0 <= Int(Base.cconvert(Ctype, pipe_fds[i][1])) <= FDmax &&
         0 <= Int(Base.cconvert(Ctype, pipe_fds[i][2])) <= FDmax
diff --git a/stdlib/GMP_jll/Project.toml b/stdlib/GMP_jll/Project.toml
index a4d989a410199..0fc262e562da7 100644
--- a/stdlib/GMP_jll/Project.toml
+++ b/stdlib/GMP_jll/Project.toml
@@ -1,10 +1,10 @@
 name = "GMP_jll"
 uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
-version = "6.2.1+0"
+version = "6.2.1+1"
 
 [deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
 [compat]
 julia = "1.6"
diff --git a/stdlib/InteractiveUtils/docs/src/index.md b/stdlib/InteractiveUtils/docs/src/index.md
index 71499744ecb1d..9ad4b5a7cea80 100644
--- a/stdlib/InteractiveUtils/docs/src/index.md
+++ b/stdlib/InteractiveUtils/docs/src/index.md
@@ -26,5 +26,6 @@ InteractiveUtils.code_llvm
 InteractiveUtils.@code_llvm
 InteractiveUtils.code_native
 InteractiveUtils.@code_native
+InteractiveUtils.@time_imports
 InteractiveUtils.clipboard
 ```
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 6f8ba9ea0b080..51a5dcd69cd56 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -6,7 +6,7 @@ Base.Experimental.@optlevel 1
 
 export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith, varinfo,
     versioninfo, subtypes, supertypes, @which, @edit, @less, @functionloc, @code_warntype,
-    @code_typed, @code_lowered, @code_llvm, @code_native, clipboard
+    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard
 
 import Base.Docs.apropos
 
@@ -21,7 +21,7 @@ include("macros.jl")
 include("clipboard.jl")
 
 """
-    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name)
+    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, minsize::Int = 0)
 
 Return a markdown table giving information about exported global variables in a module, optionally restricted
 to those matching `pattern`.
@@ -32,42 +32,45 @@ The memory consumption estimate is an approximate lower bound on the size of the
 - `imported` : also list objects explicitly imported from other modules.
 - `recursive` : recursively include objects in sub-modules, observing the same settings in each.
 - `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
+- `minsize` : only includes objects with size at least `minsize` bytes. Defaults to `0`.
 """
-function varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false)
-    @assert sortby in [:name, :size, :summary] "Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"
-    function _populate_rows(m2::Module, allrows, include_self::Bool, prep::String)
-        newrows = Any[
-            let
-                value = getfield(m2, v)
-                ssize_str, ssize = if value===Base || value===Main || value===Core
+function varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false, minsize::Int=0)
+    sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"))
+    rows = Vector{Any}[]
+    workqueue = [(m, ""),]
+    while !isempty(workqueue)
+        m2, prep = popfirst!(workqueue)
+        for v in names(m2; all, imported)
+            if !isdefined(m2, v) || !occursin(pattern, string(v))
+                continue
+            end
+            value = getfield(m2, v)
+            isbuiltin = value === Base || value === Main || value === Core
+            if recursive && !isbuiltin && isa(value, Module) && value !== m2 && nameof(value) === v && parentmodule(value) === m2
+                push!(workqueue, (value, "$prep$v."))
+            end
+            ssize_str, ssize = if isbuiltin
                     ("", typemax(Int))
                 else
                     ss = summarysize(value)
                     (format_bytes(ss), ss)
                 end
-                Any[string(prep, v), ssize_str, summary(value), ssize]
-            end
-            for v in names(m2; all, imported)
-            if (string(v) != split(string(m2), ".")[end] || include_self) && isdefined(m2, v) && occursin(pattern, string(v)) ]
-        append!(allrows, newrows)
-        if recursive
-            for row in newrows
-                if row[3] == "Module" && !in(split(row[1], ".")[end], [split(string(m2), ".")[end], "Base", "Main", "Core"])
-                    _populate_rows(getfield(m2, Symbol(split(row[1], ".")[end])), allrows, false, prep * "$(row[1]).")
-                end
+            if ssize >= minsize
+                push!(rows, Any[string(prep, v), ssize_str, summary(value), ssize])
             end
         end
-        return allrows
     end
-    rows = _populate_rows(m, Vector{Any}[], true, "")
-    if sortby == :name
-        col, reverse = 1, false
-    elseif sortby == :size
-        col, reverse = 4, true
-    elseif sortby == :summary
-        col, reverse = 3, false
+    let (col, rev) = if sortby == :name
+            1, false
+        elseif sortby == :size
+            4, true
+        elseif sortby == :summary
+            3, false
+        else
+            @assert "unreachable"
+        end
+        sort!(rows; by=r->r[col], rev)
     end
-    rows = sort!(rows, by=r->r[col], rev=reverse)
     pushfirst!(rows, Any["name", "size", "summary"])
 
     return Markdown.MD(Any[Markdown.Table(map(r->r[1:3], rows), Symbol[:l, :r, :l])])
@@ -208,54 +211,35 @@ function methodswith(t::Type; supertypes::Bool=false)
 end
 
 # subtypes
-function _subtypes(m::Module, x::Type, sts=Base.IdSet{Any}(), visited=Base.IdSet{Module}())
-    push!(visited, m)
+function _subtypes_in!(mods::Array, x::Type)
     xt = unwrap_unionall(x)
-    if !isa(xt, DataType)
-        return sts
+    if !isabstracttype(x) || !isa(xt, DataType)
+        # Fast path
+        return Type[]
     end
-    xt = xt::DataType
-    for s in names(m, all = true)
-        if isdefined(m, s) && !isdeprecated(m, s)
-            t = getfield(m, s)
-            if isa(t, DataType)
-                t = t::DataType
-                if t.name.name === s && supertype(t).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
-                end
-            elseif isa(t, UnionAll)
-                t = t::UnionAll
-                tt = unwrap_unionall(t)
-                isa(tt, DataType) || continue
-                tt = tt::DataType
-                if tt.name.name === s && supertype(tt).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
+    sts = Vector{Any}()
+    while !isempty(mods)
+        m = pop!(mods)
+        xt = xt::DataType
+        for s in names(m, all = true)
+            if isdefined(m, s) && !isdeprecated(m, s)
+                t = getfield(m, s)
+                dt = isa(t, UnionAll) ? unwrap_unionall(t) : t
+                if isa(dt, DataType)
+                    if dt.name.name === s && dt.name.module == m && supertype(dt).name == xt.name
+                        ti = typeintersect(t, x)
+                        ti != Bottom && push!(sts, ti)
+                    end
+                elseif isa(t, Module) && nameof(t) === s && parentmodule(t) === m && t !== m
+                    t === Base || push!(mods, t) # exclude Base, since it also parented by Main
                 end
-            elseif isa(t, Module)
-                t = t::Module
-                in(t, visited) || _subtypes(t, x, sts, visited)
             end
         end
     end
-    return sts
+    return permute!(sts, sortperm(map(string, sts)))
 end
 
-function _subtypes_in(mods::Array, x::Type)
-    if !isabstracttype(x)
-        # Fast path
-        return Type[]
-    end
-    sts = Base.IdSet{Any}()
-    visited = Base.IdSet{Module}()
-    for m in mods
-        _subtypes(m, x, sts, visited)
-    end
-    return sort!(collect(sts), by=string)
-end
-
-subtypes(m::Module, x::Type) = _subtypes_in([m], x)
+subtypes(m::Module, x::Type) = _subtypes_in!([m], x)
 
 """
     subtypes(T::DataType)
@@ -274,7 +258,7 @@ julia> subtypes(Integer)
  Unsigned
 ```
 """
-subtypes(x::Type) = _subtypes_in(Base.loaded_modules_array(), x)
+subtypes(x::Type) = _subtypes_in!(Base.loaded_modules_array(), x)
 
 """
     supertypes(T::Type)
@@ -298,74 +282,6 @@ function supertypes(T::Type)
     return S === T ? (T,) : (T, supertypes(S)...)
 end
 
-# dumptype is for displaying abstract type hierarchies,
-# based on Jameson Nash's typetree.jl in https://github.com/JuliaArchive/Examples
-function dumptype(io::IO, @nospecialize(x), n::Int, indent)
-    print(io, x)
-    n == 0 && return  # too deeply nested
-    isa(x, DataType) && x.name.abstract && dumpsubtypes(io, x, Main, n, indent)
-    nothing
-end
-
-directsubtype(a::DataType, b::DataType) = supertype(a).name === b.name
-directsubtype(a::UnionAll, b::DataType) = directsubtype(a.body, b)
-directsubtype(a::Union, b::DataType) = directsubtype(a.a, b) || directsubtype(a.b, b)
-# Fallback to handle TypeVar's
-directsubtype(a, b::DataType) = false
-function dumpsubtypes(io::IO, x::DataType, m::Module, n::Int, indent)
-    for s in names(m, all = true)
-        if isdefined(m, s) && !isdeprecated(m, s)
-            t = getfield(m, s)
-            if t === x || t === m
-                continue
-            elseif isa(t, Module) && nameof(t) === s && parentmodule(t) === m
-                # recurse into primary module bindings
-                dumpsubtypes(io, x, t, n, indent)
-            elseif isa(t, UnionAll) && directsubtype(t::UnionAll, x)
-                dt = unwrap_unionall(t)
-                println(io)
-                if isa(dt, DataType) && dt.name.wrapper === t
-                    # primary type binding
-                    print(io, indent, "  ")
-                    dumptype(io, dt, n - 1, string(indent, "  "))
-                else
-                    # aliases to types
-                    print(io, indent, "  ", m, ".", s, "{")
-                    tvar_io::IOContext = io
-                    tp = t
-                    while true
-                        show(tvar_io, tp.var)
-                        tvar_io = IOContext(tvar_io, :unionall_env => tp.var)
-                        tp = tp.body
-                        if isa(tp, UnionAll)
-                            print(io, ", ")
-                        else
-                            print(io, "} = ")
-                            break
-                        end
-                    end
-                    show(tvar_io, tp)
-                end
-            elseif isa(t, Union) && directsubtype(t::Union, x)
-                println(io)
-                print(io, indent, "  ", m, ".", s, " = ", t)
-            elseif isa(t, DataType) && directsubtype(t::DataType, x)
-                println(io)
-                if t.name.module !== m || t.name.name != s
-                    # aliases to types
-                    print(io, indent, "  ", m, ".", s, " = ")
-                    show(io, t)
-                else
-                    # primary type binding
-                    print(io, indent, "  ")
-                    dumptype(io, t, n - 1, string(indent, "  "))
-                end
-            end
-        end
-    end
-    nothing
-end
-
 # TODO: @deprecate peakflops to LinearAlgebra
 export peakflops
 """
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index d6f7cd9daa24d..7c749f0af27c8 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -57,7 +57,7 @@ Keyword argument `debuginfo` may be one of `:source` or `:none` (default), to sp
 
 See [`@code_warntype`](@ref man-code-warntype) for more information.
 """
-function code_warntype(io::IO, @nospecialize(f), @nospecialize(t);
+function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt(f));
                        debuginfo::Symbol=:default, optimize::Bool=false, kwargs...)
     debuginfo = Base.IRShow.debuginfo(debuginfo)
     lineprinter = Base.IRShow.__debuginfo[debuginfo]
@@ -134,7 +134,7 @@ function code_warntype(io::IO, @nospecialize(f), @nospecialize(t);
     end
     nothing
 end
-code_warntype(@nospecialize(f), @nospecialize(t); kwargs...) =
+code_warntype(@nospecialize(f), @nospecialize(t=Base.default_tt(f)); kwargs...) =
     code_warntype(stdout, f, t; kwargs...)
 
 import Base.CodegenParams
@@ -143,7 +143,7 @@ import Base.CodegenParams
 function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
                         strip_ir_metadata::Bool, dump_module::Bool, syntax::Symbol,
                         optimize::Bool, debuginfo::Symbol, binary::Bool,
-                        params::CodegenParams=CodegenParams())
+                        params::CodegenParams=CodegenParams(debug_info_kind=Cint(0)))
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
     if isa(f, Core.Builtin)
         throw(ArgumentError("argument is not a generic function"))
@@ -153,8 +153,20 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
     match = Base._which(signature_type(f, t), world)
     linfo = Core.Compiler.specialize_method(match)
     # get the code for it
+    if debuginfo === :default
+        debuginfo = :source
+    elseif debuginfo !== :source && debuginfo !== :none
+        throw(ArgumentError("'debuginfo' must be either :source or :none"))
+    end
     if native
-        str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary)
+        if syntax !== :att && syntax !== :intel
+            throw(ArgumentError("'syntax' must be either :intel or :att"))
+        end
+        if dump_module
+            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary, params)
+        else
+            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary)
+        end
     else
         str = _dump_function_linfo_llvm(linfo, world, wrapper, strip_ir_metadata, dump_module, optimize, debuginfo, params)
     end
@@ -164,17 +176,18 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
 end
 
 function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool)
-    if syntax !== :att && syntax !== :intel
-        throw(ArgumentError("'syntax' must be either :intel or :att"))
-    end
-    if debuginfo === :default
-        debuginfo = :source
-    elseif debuginfo !== :source && debuginfo !== :none
-        throw(ArgumentError("'debuginfo' must be either :source or :none"))
-    end
     str = ccall(:jl_dump_method_asm, Ref{String},
-                (Any, UInt, Cint, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
-                linfo, world, 0, wrapper, syntax, debuginfo, binary)
+                (Any, UInt, Bool, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
+                linfo, world, false, wrapper, syntax, debuginfo, binary)
+    return str
+end
+
+function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool, params::CodegenParams)
+    llvmf = ccall(:jl_get_llvmf_defn, Ptr{Cvoid}, (Any, UInt, Bool, Bool, CodegenParams), linfo, world, wrapper, true, params)
+    llvmf == C_NULL && error("could not compile the specified method")
+    str = ccall(:jl_dump_function_asm, Ref{String},
+                (Ptr{Cvoid}, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
+                llvmf, false, syntax, debuginfo, binary)
     return str
 end
 
@@ -183,11 +196,6 @@ function _dump_function_linfo_llvm(
         strip_ir_metadata::Bool, dump_module::Bool,
         optimize::Bool, debuginfo::Symbol,
         params::CodegenParams)
-    if debuginfo === :default
-        debuginfo = :source
-    elseif debuginfo !== :source && debuginfo !== :none
-        throw(ArgumentError("'debuginfo' must be either :source or :none"))
-    end
     llvmf = ccall(:jl_get_llvmf_defn, Ptr{Cvoid}, (Any, UInt, Bool, Bool, CodegenParams), linfo, world, wrapper, optimize, params)
     llvmf == C_NULL && error("could not compile the specified method")
     str = ccall(:jl_dump_function_ir, Ref{String},
@@ -216,14 +224,13 @@ function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
         print(io, d)
     end
 end
-code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Tuple); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
+code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
     code_llvm(io, f, types, raw, dump_module, optimize, debuginfo)
-code_llvm(@nospecialize(f), @nospecialize(types=Tuple); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
-    code_llvm(stdout, f, types; raw=raw, dump_module=dump_module, optimize=optimize, debuginfo=debuginfo)
-
+code_llvm(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
+    code_llvm(stdout, f, types; raw, dump_module, optimize, debuginfo)
 
 """
-    code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default, binary=false)
+    code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default, binary=false, dump_module=true)
 
 Prints the native assembly instructions generated for running the method matching the given
 generic function and type signature to `io`.
@@ -231,18 +238,18 @@ Switch assembly syntax using `syntax` symbol parameter set to `:att` for AT&T sy
 Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
 If `binary` is `true`, it also prints the binary machine code for each instruction precedented by an abbreviated address.
 """
-function code_native(io::IO, @nospecialize(f), @nospecialize(types=Tuple);
-                     syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false)
-    d = _dump_function(f, types, true, false, false, false, syntax, true, debuginfo, binary)
+function code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f));
+                     dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false)
+    d = _dump_function(f, types, true, false, false, dump_module, syntax, true, debuginfo, binary)
     if highlighting[:native] && get(io, :color, false)
         print_native(io, d)
     else
         print(io, d)
     end
 end
-code_native(@nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false) =
-    code_native(stdout, f, types; syntax=syntax, debuginfo=debuginfo, binary=binary)
-code_native(::IO, ::Any, ::Symbol) = error("illegal code_native call") # resolve ambiguous call
+code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false) =
+    code_native(stdout, f, types; dump_module, syntax, debuginfo, binary)
+code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call
 
 ## colorized IR and assembly printing
 
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index 98c47f02f2707..cb7dbde144906 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -232,6 +232,17 @@ macro code_lowered(ex0...)
     end
 end
 
+macro time_imports(ex)
+    quote
+        try
+            Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1)
+            $(esc(ex))
+        finally
+            Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1)
+        end
+    end
+end
+
 """
     @functionloc
 
@@ -332,3 +343,36 @@ Set the optional keyword argument `debuginfo` by putting it before the function
 `debuginfo` may be one of `:source` (default) or `:none`, to specify the verbosity of code comments.
 """
 :@code_native
+
+"""
+    @time_imports
+
+A macro to execute an expression and produce a report of any time spent importing packages and their
+dependencies.
+
+If a package's dependencies have already been imported either globally or by another dependency they will
+not appear under that package and the package will accurately report a faster load time than if it were to
+be loaded in isolation.
+
+```julia-repl
+julia> @time_imports using CSV
+      3.5 ms    ┌ IteratorInterfaceExtensions
+     27.4 ms  ┌ TableTraits
+    614.0 ms  ┌ SentinelArrays
+    138.6 ms  ┌ Parsers
+      2.7 ms  ┌ DataValueInterfaces
+      3.4 ms    ┌ DataAPI
+     59.0 ms  ┌ WeakRefStrings
+     35.4 ms  ┌ Tables
+     49.5 ms  ┌ PooledArrays
+    972.1 ms  CSV
+```
+
+!!! note
+    During the load process a package sequentially imports where necessary all of its dependencies, not just
+    its direct dependencies. That is also true for the dependencies themselves so nested importing will likely
+    occur, but not always. Therefore the nesting shown in this output report is not equivalent to the dependency
+    tree, but does indicate where import time has accumulated.
+
+"""
+:@time_imports
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index f9e3a4ce71e9f..8372fb16d3a13 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -199,6 +199,10 @@ end
 let v = repr(varinfo(_test_varinfo_, all = true, recursive = true))
     @test occursin("inner_x", v)
 end
+let v = repr(varinfo(_test_varinfo_, all = true, minsize = 9))
+    @test !occursin("x_exported", v) # excluded: 8 bytes
+    @test occursin("a_smaller", v)
+end
 
 # Issue 14173
 module Tmp14173
@@ -331,7 +335,7 @@ let err = tempname(),
         redirect_stderr(new_stderr)
         println(new_stderr, "start")
         flush(new_stderr)
-        @eval @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
+        @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
         Libc.flush_cstdio()
         println(new_stderr, "end")
         flush(new_stderr)
@@ -341,10 +345,11 @@ let err = tempname(),
         close(new_stderr)
         let errstr = read(err, String)
             @test startswith(errstr, """start
+                end
                 Internal error: encountered unexpected error during compilation of f_broken_code:
                 ErrorException(\"unsupported or misplaced expression \"invalid\" in function f_broken_code\")
                 """) || errstr
-            @test endswith(errstr, "\nend\n") || errstr
+            @test !endswith(errstr, "\nend\n") || errstr
         end
         rm(err)
     end
@@ -444,12 +449,12 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))
     buf = IOBuffer()
     #test that the string output is at&t syntax by checking for occurrences of '%'s
     code_native(buf, linear_foo, (), syntax = :att, debuginfo = :none)
-    output = String(take!(buf))
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test occursin(rgx, output)
 
     #test that the code output is intel syntax by checking it has no occurrences of '%'
     code_native(buf, linear_foo, (), syntax = :intel, debuginfo = :none)
-    output = String(take!(buf))
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test !occursin(rgx, output)
 
     code_native(buf, linear_foo, ())
@@ -461,13 +466,13 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))
         ret = r"^; [0-9a-f]{4}: c3$"m
 
         # without binary flag (default)
-        code_native(buf, linear_foo, ())
+        code_native(buf, linear_foo, (), dump_module=false)
         output = String(take!(buf))
         @test !occursin(ret, output)
 
         # with binary flag
         for binary in false:true
-            code_native(buf, linear_foo, (), binary = binary)
+            code_native(buf, linear_foo, (); binary, dump_module=false)
             output = String(take!(buf))
             @test occursin(ret, output) == binary
         end
@@ -563,6 +568,31 @@ file, ln = functionloc(versioninfo, Tuple{})
 @test isfile(pathof(InteractiveUtils))
 @test isdir(pkgdir(InteractiveUtils))
 
+@testset "buildbot path updating" begin
+    file, ln = functionloc(versioninfo, Tuple{})
+    @test isfile(file)
+
+    e = try versioninfo("wat")
+    catch e
+        e
+    end
+    @test e isa MethodError
+    m = @which versioninfo()
+    s = sprint(showerror, e)
+    m = match(Regex("at (.*?):$(m.line)"), s)
+    @test isfile(expanduser(m.captures[1]))
+
+    g() = x
+    e, bt = try code_llvm(g, Tuple{Int})
+    catch e
+        e, catch_backtrace()
+    end
+    @test e isa Exception
+    s = sprint(showerror, e, bt)
+    m = match(r"(\S*InteractiveUtils[\/\\]src\S*):", s)
+    @test isfile(expanduser(m.captures[1]))
+end
+
 @testset "Issue #34434" begin
     io = IOBuffer()
     code_native(io, eltype, Tuple{Int})
@@ -586,3 +616,57 @@ let
     opt = false
     @test !(first(@code_typed optimize=opt sum(1:10)).inferred)
 end
+
+@testset "@time_imports" begin
+    mktempdir() do dir
+        cd(dir) do
+            try
+                pushfirst!(LOAD_PATH, dir)
+                foo_file = joinpath(dir, "Foo3242.jl")
+                write(foo_file,
+                    """
+                    module Foo3242
+                    foo() = 1
+                    end
+                    """)
+
+                Base.compilecache(Base.PkgId("Foo3242"))
+
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stdout(f) do
+                    @eval @time_imports using Foo3242
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("ms  Foo3242\n", String(buf))
+
+            finally
+                filter!((≠)(dir), LOAD_PATH)
+            end
+        end
+    end
+end
+
+let # `default_tt` should work with any function with one method
+    @test (code_warntype(devnull, function ()
+        sin(42)
+    end); true)
+    @test (code_warntype(devnull, function (a::Int)
+        sin(a)
+    end); true)
+    @test (code_llvm(devnull, function ()
+        sin(42)
+    end); true)
+    @test (code_llvm(devnull, function (a::Int)
+        sin(a)
+    end); true)
+    @test (code_native(devnull, function ()
+        sin(42)
+    end); true)
+    @test (code_native(devnull, function (a::Int)
+        sin(a)
+    end); true)
+end
diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml
index a079140667184..36c24111d4d31 100644
--- a/stdlib/LLVMLibUnwind_jll/Project.toml
+++ b/stdlib/LLVMLibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLVMLibUnwind_jll"
 uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
-version = "11.0.1+1"
+version = "12.0.1+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibCURL.version b/stdlib/LibCURL.version
index 92a70ccc9ee18..2281042bda4a4 100644
--- a/stdlib/LibCURL.version
+++ b/stdlib/LibCURL.version
@@ -1,2 +1,4 @@
 LIBCURL_BRANCH = master
-LIBCURL_SHA1 = cddeb7f4a7d5718a4a1be602ffcbe68299a1a37e
+LIBCURL_SHA1 = 04c450c17024d5b49cb30013f1409306efd35203
+LIBCURL_GIT_URL := https://github.com/JuliaWeb/LibCURL.jl.git
+LIBCURL_TAR_URL = https://api.github.com/repos/JuliaWeb/LibCURL.jl/tarball/$1
diff --git a/stdlib/LibGit2/src/rebase.jl b/stdlib/LibGit2/src/rebase.jl
index 8151217b3950b..51b52ef006c38 100644
--- a/stdlib/LibGit2/src/rebase.jl
+++ b/stdlib/LibGit2/src/rebase.jl
@@ -83,7 +83,7 @@ function commit(rb::GitRebase, sig::GitSignature)
                       oid_ptr, rb.ptr, C_NULL, sig.ptr, C_NULL, C_NULL)
     catch err
         # TODO: return current HEAD instead
-        err.code === Error.EAPPLIED && return nothing
+        err isa GitError && err.code === Error.EAPPLIED && return nothing
         rethrow()
     end
     return oid_ptr[]
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index 9ffcaa3646127..b68dbb7c0bf02 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -248,7 +248,7 @@ distinct payload. Each callback, when called, will receive `Dict` which will hol
 callback's custom payload which can be accessed using the callback name.
 
 # Examples
-```julia
+```julia-repl
 julia> c = LibGit2.Callbacks(:credentials => (LibGit2.credentials_cb(), LibGit2.CredentialPayload()));
 
 julia> LibGit2.clone(url, callbacks=c);
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
index 241a2a16edb61..ec084417b7744 100644
--- a/stdlib/LibUV_jll/Project.toml
+++ b/stdlib/LibUV_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUV_jll"
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+2"
+version = "2.0.1+5"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index baafe762cea3f..38c48bfe6d8d2 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -414,7 +414,7 @@ Base.cis(::AbstractMatrix)
 Base.:^(::AbstractMatrix, ::Number)
 Base.:^(::Number, ::AbstractMatrix)
 LinearAlgebra.log(::StridedMatrix)
-LinearAlgebra.sqrt(::StridedMatrix{<:Real})
+LinearAlgebra.sqrt(::StridedMatrix)
 LinearAlgebra.cos(::StridedMatrix{<:Real})
 LinearAlgebra.sin(::StridedMatrix{<:Real})
 LinearAlgebra.sincos(::StridedMatrix{<:Real})
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index f0f13776146d1..ae8b9d461ffc2 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -9,12 +9,12 @@ module LinearAlgebra
 
 import Base: \, /, *, ^, +, -, ==
 import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech,
-    asin, asinh, atan, atanh, axes, big, broadcast, ceil, conj, convert, copy, copyto!, cos,
+    asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, conj, convert, copy, copyto!, cos,
     cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, getindex, hcat,
     getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, kron, kron!, length, log, map, ndims,
-    oneunit, parent, power_by_squaring, print_matrix, promote_rule, real, round, sec, sech,
+    one, oneunit, parent, power_by_squaring, print_matrix, promote_rule, real, round, sec, sech,
     setindex!, show, similar, sin, sincos, sinh, size, sqrt,
-    strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec
+    strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec, zero
 using Base: IndexLinear, promote_eltype, promote_op, promote_typeof,
     @propagate_inbounds, @pure, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
     splat
@@ -262,10 +262,8 @@ function sym_uplo(uplo::Char)
     end
 end
 
-
 @noinline throw_uplo() = throw(ArgumentError("uplo argument must be either :U (upper) or :L (lower)"))
 
-
 """
     ldiv!(Y, A, B) -> Y
 
@@ -291,14 +289,14 @@ julia> ldiv!(Y, qr(A), X);
 julia> Y
 3-element Vector{Float64}:
   0.7128099173553719
- -0.051652892561983806
-  0.10020661157024781
+ -0.051652892561983674
+  0.10020661157024757
 
 julia> A\\X
 3-element Vector{Float64}:
   0.7128099173553719
- -0.05165289256198342
-  0.1002066115702479
+ -0.05165289256198333
+  0.10020661157024785
 ```
 """
 ldiv!(Y, A, B)
@@ -328,14 +326,14 @@ julia> ldiv!(qr(A), X);
 julia> X
 3-element Vector{Float64}:
   0.7128099173553719
- -0.051652892561983806
-  0.10020661157024781
+ -0.051652892561983674
+  0.10020661157024757
 
 julia> A\\Y
 3-element Vector{Float64}:
   0.7128099173553719
- -0.05165289256198342
-  0.1002066115702479
+ -0.05165289256198333
+  0.10020661157024785
 ```
 """
 ldiv!(A, B)
@@ -398,7 +396,7 @@ The resulting array is mutable. It can be used, for example, to pass the data of
 `A` to an efficient in-place method for a matrix factorization such as `lu!`, in
 cases where a more specific implementation of `lu!` (or `lu`) is not available.
 
-See also: `copy_oftype`, `copy_similar`
+See also: `copy_oftype`, `copy_similar`.
 """
 copy_to_array(A::AbstractArray, ::Type{T}) where {T} = copyto!(Array{T}(undef, size(A)...), A)
 
@@ -454,6 +452,12 @@ export ⋅, ×
 _cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
 _cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
 
+# SymTridiagonal ev can be the same length as dv, but the last element is
+# ignored. However, some methods can fail if they read the entired ev
+# rather than just the meaningful elements. This is a helper function
+# for getting only the meaningful elements of ev. See #41089
+_evview(S::SymTridiagonal) = @view S.ev[begin:length(S.dv) - 1]
+
 ## append right hand side with zeros if necessary
 _zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
 _zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
@@ -586,9 +590,6 @@ function __init__()
             BLAS.lbt_forward(liblapack_path)
         end
         BLAS.check()
-        Threads.resize_nthreads!(Abuf)
-        Threads.resize_nthreads!(Bbuf)
-        Threads.resize_nthreads!(Cbuf)
     catch ex
         Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra")
     end
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
index 29f91e6edaed4..f5903f380ee53 100644
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Base: @propagate_inbounds, @_inline_meta
+using Base: @propagate_inbounds
 import Base: length, size, axes, IndexStyle, getindex, setindex!, parent, vec, convert, similar
 
 ### basic definitions (types, aliases, constructors, abstractarray interface, sundry similar)
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 1a75c6a9287f0..57f9c3fefe647 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -10,6 +10,7 @@ struct Bidiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
         if length(ev) != max(length(dv)-1, 0)
             throw(DimensionMismatch("length of diagonal vector is $(length(dv)), length of off-diagonal vector is $(length(ev))"))
         end
+        (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,V}(dv, ev, uplo)
     end
 end
@@ -62,7 +63,7 @@ julia> Bl = Bidiagonal(dv, ev, :L) # ev is on the first subdiagonal
 ```
 """
 function Bidiagonal(dv::V, ev::V, uplo::Symbol) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, char_uplo(uplo))
+    Bidiagonal{T,V}(dv, ev, uplo)
 end
 function Bidiagonal(dv::V, ev::V, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
     Bidiagonal{T,V}(dv, ev, uplo)
@@ -109,26 +110,37 @@ function Bidiagonal(A::AbstractMatrix, uplo::Symbol)
     Bidiagonal(diag(A, 0), diag(A, uplo === :U ? 1 : -1), uplo)
 end
 
+
 Bidiagonal(A::Bidiagonal) = A
 Bidiagonal{T}(A::Bidiagonal{T}) where {T} = A
 Bidiagonal{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A.dv, A.ev, A.uplo)
 
-function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
-    if !((1 <= i <= size(A,2)) && (1 <= j <= size(A,2)))
-        throw(BoundsError(A,(i,j)))
+bidiagzero(::Bidiagonal{T}, i, j) where {T} = zero(T)
+function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
+    Tel = eltype(eltype(A.dv))
+    if i < j && A.uplo == 'U' #= top right zeros =#
+        return zeros(Tel, size(A.ev[i], 1), size(A.ev[j-1], 2))
+    elseif j < i && A.uplo == 'L' #= bottom left zeros =#
+        return zeros(Tel, size(A.ev[i-1], 1), size(A.ev[j], 2))
+    else
+        return zeros(Tel, size(A.dv[i], 1), size(A.dv[j], 2))
     end
+end
+
+@inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
+    @boundscheck checkbounds(A, i, j)
     if i == j
-        return A.dv[i]
+        return @inbounds A.dv[i]
     elseif A.uplo == 'U' && (i == j - 1)
-        return A.ev[i]
+        return @inbounds A.ev[i]
     elseif A.uplo == 'L' && (i == j + 1)
-        return A.ev[j]
+        return @inbounds A.ev[j]
     else
-        return zero(T)
+        return bidiagzero(A, i, j)
     end
 end
 
-function setindex!(A::Bidiagonal, x, i::Integer, j::Integer)
+@inline function setindex!(A::Bidiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.dv[i] = x
@@ -298,45 +310,45 @@ function istril(M::Bidiagonal, k::Integer=0)
 end
 isdiag(M::Bidiagonal) = iszero(M.ev)
 
-function tril!(M::Bidiagonal, k::Integer=0)
+function tril!(M::Bidiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif M.uplo == 'U' && k < 0
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif k < -1
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'U' && k == 0
-        fill!(M.ev,0)
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'L' && k == -1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
     end
     return M
 end
 
-function triu!(M::Bidiagonal, k::Integer=0)
+function triu!(M::Bidiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif M.uplo == 'L' && k > 0
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif k > 1
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'L' && k == 0
-        fill!(M.ev,0)
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'U' && k == 1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
     end
     return M
 end
 
-function diag(M::Bidiagonal, n::Integer=0)
+function diag(M::Bidiagonal{T}, n::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
     if n == 0
@@ -344,7 +356,7 @@ function diag(M::Bidiagonal, n::Integer=0)
     elseif (n == 1 && M.uplo == 'U') ||  (n == -1 && M.uplo == 'L')
         return copyto!(similar(M.ev, length(M.ev)), M.ev)
     elseif -size(M,1) <= n <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-abs(n)), 0)
+        return fill!(similar(M.dv, size(M,1)-abs(n)), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
             "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
@@ -735,7 +747,7 @@ function ldiv!(A::Bidiagonal, b::AbstractVector)
     end
 
     if N == 0
-        return x
+        return b
     end
 
     @inbounds begin
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 327beb020901b..19edc52cfff17 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -6,8 +6,8 @@ Interface to BLAS subroutines.
 module BLAS
 
 import ..axpy!, ..axpby!
-import Base: copyto!, USE_BLAS64
-using Base: require_one_based_indexing
+import Base: copyto!
+using Base: require_one_based_indexing, USE_BLAS64
 
 export
 # Level 1
@@ -33,6 +33,7 @@ export
     sbmv!,
     sbmv,
     spmv!,
+    spr!,
     symv!,
     symv,
     trsv!,
@@ -74,12 +75,12 @@ const libblas = libblastrampoline
 const liblapack = libblastrampoline
 
 import LinearAlgebra
-import LinearAlgebra: BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, stride1, chkstride1, axpy!
+using LinearAlgebra: BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, stride1, chkstride1
 
 include("lbt.jl")
 
 """
-get_config()
+    get_config()
 
 Return an object representing the current `libblastrampoline` configuration.
 
@@ -148,7 +149,7 @@ function check()
     config = get_config()
 
     # Ensure that one of our loaded libraries satisfies our interface requirement
-    interface = Base.USE_BLAS64 ? :ilp64 : :lp64
+    interface = USE_BLAS64 ? :ilp64 : :lp64
     if !any(lib.interface == interface for lib in config.loaded_libs)
         interfacestr = uppercase(string(interface))
         @error("No loaded BLAS libraries were built with $(interfacestr) support")
@@ -157,6 +158,13 @@ function check()
     end
 end
 
+"Check that upper/lower (for special matrices) is correctly specified"
+function chkuplo(uplo::AbstractChar)
+    if !(uplo == 'U' || uplo == 'L')
+        throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
+    end
+    uplo
+end
 
 # Level 1
 ## copy
@@ -375,7 +383,7 @@ for (elty, f) in ((Float32, :dot), (Float64, :dot),
             xstride = stride(x,1)
             ystride = stride(y,1)
             x_delta = xstride < 0 ? n : 1
-            GC.@preserve x $f(n,pointer(x,x_delta),xstride,y,ystride)
+            GC.@preserve x $f(n, pointer(x, x_delta), xstride, y, ystride)
         end
 
         function $f(x::DenseArray{$elty}, y::StridedVector{$elty})
@@ -383,7 +391,7 @@ for (elty, f) in ((Float32, :dot), (Float64, :dot),
             xstride = stride(x,1)
             ystride = stride(y,1)
             y_delta = ystride < 0 ? n : 1
-            GC.@preserve y $f(n,x,xstride,pointer(y,y_delta),ystride)
+            GC.@preserve y $f(n, x, xstride, pointer(y, y_delta), ystride)
         end
 
         function $f(x::StridedVector{$elty}, y::StridedVector{$elty})
@@ -392,7 +400,7 @@ for (elty, f) in ((Float32, :dot), (Float64, :dot),
             ystride = stride(y,1)
             x_delta = xstride < 0 ? n : 1
             y_delta = ystride < 0 ? n : 1
-            GC.@preserve x y $f(n,pointer(x,x_delta),xstride,pointer(y,y_delta),ystride)
+            GC.@preserve x y $f(n, pointer(x, x_delta), xstride, pointer(y, y_delta), ystride)
         end
     end
 end
@@ -664,13 +672,19 @@ for (fname, elty) in ((:dgemv_,:Float64),
                 throw(DimensionMismatch("the transpose of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            lda = stride(A,2)
+            lda >= max(1, size(A,1)) || error("`stride(A,2)` must be at least `max(1, size(A,1))`")
+            sX = stride(X,1)
+            pX = pointer(X, sX > 0 ? firstindex(X) : lastindex(X))
+            sY = stride(Y,1)
+            pY = pointer(Y, sY > 0 ? firstindex(Y) : lastindex(Y))
+            GC.@preserve X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
                  trans, size(A,1), size(A,2), alpha,
-                 A, max(1,stride(A,2)), X, stride(X,1),
-                 beta, Y, stride(Y,1), 1)
+                 A, lda, pX, sX,
+                 beta, pY, sY, 1)
             Y
         end
         function gemv(trans::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, X::AbstractVector{$elty})
@@ -792,6 +806,7 @@ for (fname, elty, lib) in ((:dsymv_,:Float64,libblastrampoline),
         function symv!(uplo::AbstractChar, alpha::Union{($elty), Bool},
                        A::AbstractMatrix{$elty}, x::AbstractVector{$elty},
                        beta::Union{($elty), Bool}, y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             m, n = size(A)
             if m != n
@@ -853,6 +868,7 @@ for (fname, elty) in ((:zhemv_,:ComplexF64),
                       (:chemv_,:ComplexF32))
     @eval begin
         function hemv!(uplo::AbstractChar, α::Union{$elty, Bool}, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, β::Union{$elty, Bool}, y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             m, n = size(A)
             if m != n
@@ -954,6 +970,7 @@ end
 function hpmv!(uplo::AbstractChar,
                α::Number, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}},
                β::Number, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasComplex}
+    chkuplo(uplo)
     require_one_based_indexing(AP, x, y)
     N = length(x)
     if N != length(y)
@@ -962,6 +979,7 @@ function hpmv!(uplo::AbstractChar,
     if 2*length(AP) < N*(N + 1)
         throw(DimensionMismatch("Packed Hermitian matrix A has size smaller than length(x) =  $(N)."))
     end
+    chkstride1(AP)
     return hpmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1))
 end
 
@@ -1001,6 +1019,7 @@ for (fname, elty) in ((:dsbmv_,:Float64),
              # *     .. Array Arguments ..
              #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
         function sbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             chkstride1(A)
             ccall((@blasfunc($fname), libblastrampoline), Cvoid,
@@ -1104,6 +1123,7 @@ end
 function spmv!(uplo::AbstractChar,
                α::Real, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}},
                β::Real, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasReal}
+    chkuplo(uplo)
     require_one_based_indexing(AP, x, y)
     N = length(x)
     if N != length(y)
@@ -1112,6 +1132,7 @@ function spmv!(uplo::AbstractChar,
     if 2*length(AP) < N*(N + 1)
         throw(DimensionMismatch("Packed symmetric matrix A has size smaller than length(x) = $(N)."))
     end
+    chkstride1(AP)
     return spmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1))
 end
 
@@ -1139,6 +1160,73 @@ Return the updated `y`.
 """
 spmv!
 
+### spr!, (SP) symmetric packed matrix-vector operation defined as A := alpha*x*x' + A
+for (fname, elty) in ((:dspr_, :Float64),
+                      (:sspr_, :Float32))
+    @eval begin
+        function spr!(uplo::AbstractChar,
+                      n::Integer,
+                      α::$elty,
+                      x::Union{Ptr{$elty}, AbstractArray{$elty}},
+                      incx::Integer,
+                      AP::Union{Ptr{$elty}, AbstractArray{$elty}})
+
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                  (Ref{UInt8},     # uplo,
+                   Ref{BlasInt},   # n,
+                   Ref{$elty},     # α,
+                   Ptr{$elty},     # x,
+                   Ref{BlasInt},   # incx,
+                   Ptr{$elty},     # AP,
+                   Clong),         # length of uplo
+                  uplo,
+                  n,
+                  α,
+                  x,
+                  incx,
+                  AP,
+                  1)
+            return AP
+        end
+    end
+end
+
+function spr!(uplo::AbstractChar,
+              α::Real, x::Union{DenseArray{T}, AbstractVector{T}},
+              AP::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasReal}
+    chkuplo(uplo)
+    require_one_based_indexing(AP, x)
+    N = length(x)
+    if 2*length(AP) < N*(N + 1)
+        throw(DimensionMismatch("Packed symmetric matrix A has size smaller than length(x) = $(N)."))
+    end
+    chkstride1(AP)
+    return spr!(uplo, N, convert(T, α), x, stride(x, 1), AP)
+end
+
+"""
+    spr!(uplo, α, x, AP)
+
+Update matrix `A` as `α*A*x*x'`, where `A` is a symmetric matrix provided
+in packed format `AP` and `x` is a vector.
+
+With `uplo = 'U'`, the array AP must contain the upper triangular part of the
+symmetric matrix packed sequentially, column by column, so that `AP[1]`
+contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
+respectively, and so on.
+
+With `uplo = 'L'`, the array AP must contain the lower triangular part of the
+symmetric matrix packed sequentially, column by column, so that `AP[1]`
+contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
+respectively, and so on.
+
+The scalar input `α` must be real.
+
+The array inputs `x` and `AP` must all be of `Float32` or `Float64` type.
+Return the updated `AP`.
+"""
+spr!
+
 ### hbmv, (HB) Hermitian banded matrix-vector multiplication
 for (fname, elty) in ((:zhbmv_,:ComplexF64),
                       (:chbmv_,:ComplexF32))
@@ -1151,6 +1239,7 @@ for (fname, elty) in ((:zhbmv_,:ComplexF64),
              # *     .. Array Arguments ..
              #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
         function hbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             chkstride1(A)
             ccall((@blasfunc($fname), libblastrampoline), Cvoid,
@@ -1207,6 +1296,7 @@ for (fname, elty) in ((:dtrmv_,:Float64),
                 # *     .. Array Arguments ..
                 #       DOUBLE PRECISION A(LDA,*),X(*)
         function trmv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if n != length(x)
@@ -1262,6 +1352,7 @@ for (fname, elty) in ((:dtrsv_,:Float64),
                 #       .. Array Arguments ..
                 #       DOUBLE PRECISION A(LDA,*),X(*)
         function trsv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if n != length(x)
@@ -1330,6 +1421,7 @@ for (fname, elty, lib) in ((:dsyr_,:Float64,libblastrampoline),
                            (:csyr_,:ComplexF32,libblastrampoline))
     @eval begin
         function syr!(uplo::AbstractChar, α::$elty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if length(x) != n
@@ -1360,6 +1452,7 @@ for (fname, elty, relty) in ((:zher_,:ComplexF64, :Float64),
                              (:cher_,:ComplexF32, :Float32))
     @eval begin
         function her!(uplo::AbstractChar, α::$relty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if length(x) != n
@@ -1469,6 +1562,7 @@ for (mfname, elty) in ((:dsymm_,:Float64),
         function symm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
                        A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
                        beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B, C)
             m, n = size(C)
             j = checksquare(A)
@@ -1542,6 +1636,7 @@ for (mfname, elty) in ((:zhemm_,:ComplexF64),
         function hemm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
                        A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
                        beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B, C)
             m, n = size(C)
             j = checksquare(A)
@@ -1625,31 +1720,32 @@ for (fname, elty) in ((:dsyrk_,:Float64),
                       (:ssyrk_,:Float32),
                       (:zsyrk_,:ComplexF64),
                       (:csyrk_,:ComplexF32))
-   @eval begin
-       # SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-       # *     .. Scalar Arguments ..
-       #       REAL ALPHA,BETA
-       #       INTEGER K,LDA,LDC,N
-       #       CHARACTER TRANS,UPLO
-       # *     .. Array Arguments ..
-       #       REAL A(LDA,*),C(LDC,*)
-       function syrk!(uplo::AbstractChar, trans::AbstractChar,
+    @eval begin
+        # SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
+        # *     .. Scalar Arguments ..
+        #       REAL ALPHA,BETA
+        #       INTEGER K,LDA,LDC,N
+        #       CHARACTER TRANS,UPLO
+        # *     .. Array Arguments ..
+        #       REAL A(LDA,*),C(LDC,*)
+        function syrk!(uplo::AbstractChar, trans::AbstractChar,
                       alpha::Union{($elty), Bool}, A::AbstractVecOrMat{$elty},
                       beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-           require_one_based_indexing(A, C)
-           n = checksquare(C)
-           nn = size(A, trans == 'N' ? 1 : 2)
-           if nn != n throw(DimensionMismatch("C has size ($n,$n), corresponding dimension of A is $nn")) end
-           k  = size(A, trans == 'N' ? 2 : 1)
-           chkstride1(A)
-           chkstride1(C)
-           ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                  Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                 uplo, trans, n, k,
-                 alpha, A, max(1,stride(A,2)), beta,
-                 C, max(1,stride(C,2)), 1, 1)
+            chkuplo(uplo)
+            require_one_based_indexing(A, C)
+            n = checksquare(C)
+            nn = size(A, trans == 'N' ? 1 : 2)
+            if nn != n throw(DimensionMismatch("C has size ($n,$n), corresponding dimension of A is $nn")) end
+            k  = size(A, trans == 'N' ? 2 : 1)
+            chkstride1(A)
+            chkstride1(C)
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
+                   Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
+                  uplo, trans, n, k,
+                  alpha, A, max(1,stride(A,2)), beta,
+                  C, max(1,stride(C,2)), 1, 1)
             C
         end
     end
@@ -1680,42 +1776,43 @@ function herk end
 
 for (fname, elty, relty) in ((:zherk_, :ComplexF64, :Float64),
                              (:cherk_, :ComplexF32, :Float32))
-   @eval begin
-       # SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-       # *     .. Scalar Arguments ..
-       #       REAL ALPHA,BETA
-       #       INTEGER K,LDA,LDC,N
-       #       CHARACTER TRANS,UPLO
-       # *     ..
-       # *     .. Array Arguments ..
-       #       COMPLEX A(LDA,*),C(LDC,*)
-       function herk!(uplo::AbstractChar, trans::AbstractChar,
-                      α::Union{$relty, Bool}, A::AbstractVecOrMat{$elty},
-                      β::Union{$relty, Bool}, C::AbstractMatrix{$elty})
-           require_one_based_indexing(A, C)
-           n = checksquare(C)
-           nn = size(A, trans == 'N' ? 1 : 2)
-           if nn != n
-               throw(DimensionMismatch("the matrix to update has dimension $n but the implied dimension of the update is $(size(A, trans == 'N' ? 1 : 2))"))
-           end
-           chkstride1(A)
-           chkstride1(C)
-           k  = size(A, trans == 'N' ? 2 : 1)
-           ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                  Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ref{$relty},
-                  Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                 uplo, trans, n, k,
-                 α, A, max(1,stride(A,2)), β,
-                 C, max(1,stride(C,2)), 1, 1)
-           C
-       end
-       function herk(uplo::AbstractChar, trans::AbstractChar, α::$relty, A::AbstractVecOrMat{$elty})
-           n = size(A, trans == 'N' ? 1 : 2)
-           herk!(uplo, trans, α, A, zero($relty), similar(A, (n,n)))
-       end
-       herk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty}) = herk(uplo, trans, one($relty), A)
-   end
+    @eval begin
+        # SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
+        # *     .. Scalar Arguments ..
+        #       REAL ALPHA,BETA
+        #       INTEGER K,LDA,LDC,N
+        #       CHARACTER TRANS,UPLO
+        # *     ..
+        # *     .. Array Arguments ..
+        #       COMPLEX A(LDA,*),C(LDC,*)
+        function herk!(uplo::AbstractChar, trans::AbstractChar,
+                        α::Union{$relty, Bool}, A::AbstractVecOrMat{$elty},
+                        β::Union{$relty, Bool}, C::AbstractMatrix{$elty})
+            chkuplo(uplo)
+            require_one_based_indexing(A, C)
+            n = checksquare(C)
+            nn = size(A, trans == 'N' ? 1 : 2)
+            if nn != n
+                throw(DimensionMismatch("the matrix to update has dimension $n but the implied dimension of the update is $(size(A, trans == 'N' ? 1 : 2))"))
+            end
+            chkstride1(A)
+            chkstride1(C)
+            k  = size(A, trans == 'N' ? 2 : 1)
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                    Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ref{$relty},
+                    Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
+                    uplo, trans, n, k,
+                    α, A, max(1,stride(A,2)), β,
+                    C, max(1,stride(C,2)), 1, 1)
+            C
+        end
+        function herk(uplo::AbstractChar, trans::AbstractChar, α::$relty, A::AbstractVecOrMat{$elty})
+            n = size(A, trans == 'N' ? 1 : 2)
+            herk!(uplo, trans, α, A, zero($relty), similar(A, (n,n)))
+        end
+        herk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty}) = herk(uplo, trans, one($relty), A)
+    end
 end
 
 ## syr2k
@@ -1724,18 +1821,19 @@ for (fname, elty) in ((:dsyr2k_,:Float64),
                       (:zsyr2k_,:ComplexF64),
                       (:csyr2k_,:ComplexF32))
     @eval begin
-             #       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             #
-             #       .. Scalar Arguments ..
-             #       REAL PRECISION ALPHA,BETA
-             #       INTEGER K,LDA,LDB,LDC,N
-             #       CHARACTER TRANS,UPLO
-             #       ..
-             #       .. Array Arguments ..
-             #       REAL PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
+            #       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+            #
+            #       .. Scalar Arguments ..
+            #       REAL PRECISION ALPHA,BETA
+            #       INTEGER K,LDA,LDB,LDC,N
+            #       CHARACTER TRANS,UPLO
+            #       ..
+            #       .. Array Arguments ..
+            #       REAL PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
         function syr2k!(uplo::AbstractChar, trans::AbstractChar,
                         alpha::($elty), A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty},
                         beta::($elty), C::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B, C)
             n = checksquare(C)
             nn = size(A, trans == 'N' ? 1 : 2)
@@ -1789,43 +1887,45 @@ or `transpose(A)*B + transpose(B)*A`, according to [`trans`](@ref stdlib-blas-tr
 syr2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat, B::AbstractVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B)
 
 for (fname, elty1, elty2) in ((:zher2k_,:ComplexF64,:Float64), (:cher2k_,:ComplexF32,:Float32))
-   @eval begin
-       # SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-       #
-       #       .. Scalar Arguments ..
-       #       COMPLEX ALPHA
-       #       REAL BETA
-       #       INTEGER K,LDA,LDB,LDC,N
-       #       CHARACTER TRANS,UPLO
-       #       ..
-       #       .. Array Arguments ..
-       #       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-       function her2k!(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1),
-                       A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1},
-                       beta::($elty2), C::AbstractMatrix{$elty1})
-           require_one_based_indexing(A, B, C)
-           n = checksquare(C)
-           nn = size(A, trans == 'N' ? 1 : 2)
-           if nn != n throw(DimensionMismatch("C has size ($n,$n), corresponding dimension of A is $nn")) end
-           chkstride1(A)
-           chkstride1(B)
-           chkstride1(C)
-           k  = size(A, trans == 'N' ? 2 : 1)
-           ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                  Ref{$elty1}, Ptr{$elty1}, Ref{BlasInt}, Ptr{$elty1}, Ref{BlasInt},
-                  Ref{$elty2},  Ptr{$elty1}, Ref{BlasInt}, Clong, Clong),
-                 uplo, trans, n, k,
-                 alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
-                 beta, C, max(1,stride(C,2)), 1, 1)
-           C
-       end
-       function her2k(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1})
-           n = size(A, trans == 'N' ? 1 : 2)
-           her2k!(uplo, trans, alpha, A, B, zero($elty2), similar(A, $elty1, (n,n)))
-       end
-       her2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) = her2k(uplo, trans, one($elty1), A, B)
-   end
+    @eval begin
+        # SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+        #
+        #       .. Scalar Arguments ..
+        #       COMPLEX ALPHA
+        #       REAL BETA
+        #       INTEGER K,LDA,LDB,LDC,N
+        #       CHARACTER TRANS,UPLO
+        #       ..
+        #       .. Array Arguments ..
+        #       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
+        function her2k!(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1),
+                        A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1},
+                        beta::($elty2), C::AbstractMatrix{$elty1})
+            chkuplo(uplo)
+            require_one_based_indexing(A, B, C)
+            n = checksquare(C)
+            nn = size(A, trans == 'N' ? 1 : 2)
+            if nn != n throw(DimensionMismatch("C has size ($n,$n), corresponding dimension of A is $nn")) end
+            chkstride1(A)
+            chkstride1(B)
+            chkstride1(C)
+            k  = size(A, trans == 'N' ? 2 : 1)
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                    Ref{$elty1}, Ptr{$elty1}, Ref{BlasInt}, Ptr{$elty1}, Ref{BlasInt},
+                    Ref{$elty2},  Ptr{$elty1}, Ref{BlasInt}, Clong, Clong),
+                    uplo, trans, n, k,
+                    alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
+                    beta, C, max(1,stride(C,2)), 1, 1)
+            C
+        end
+        function her2k(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1})
+            n = size(A, trans == 'N' ? 1 : 2)
+            her2k!(uplo, trans, alpha, A, B, zero($elty2), similar(A, $elty1, (n,n)))
+        end
+        her2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) =
+            her2k(uplo, trans, one($elty1), A, B)
+    end
 end
 
 """
@@ -1917,6 +2017,7 @@ for (mmname, smname, elty) in
         #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
         function trmm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::Number,
                        A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B)
             m, n = size(B)
             nA = checksquare(A)
@@ -1947,6 +2048,7 @@ for (mmname, smname, elty) in
         #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
         function trsm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar,
                        alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B)
             m, n = size(B)
             k = checksquare(A)
@@ -1956,14 +2058,14 @@ for (mmname, smname, elty) in
             chkstride1(A)
             chkstride1(B)
             ccall((@blasfunc($smname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                 Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong, Clong, Clong),
-                 side, uplo, transa, diag,
-                 m, n, alpha, A,
-                 max(1,stride(A,2)), B, max(1,stride(B,2)),
-                 1, 1, 1, 1)
+                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
+                    Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
+                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                    Clong, Clong, Clong, Clong),
+                   side, uplo, transa, diag,
+                   m, n, alpha, A,
+                   max(1,stride(A,2)), B, max(1,stride(B,2)),
+                   1, 1, 1, 1)
             B
         end
         function trsm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index 82406812f8fde..22e9fcfdbca83 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -690,6 +690,8 @@ function logdet(C::CholeskyPivoted)
     end
 end
 
+logabsdet(C::Union{Cholesky, CholeskyPivoted}) = logdet(C), one(eltype(C)) # since C is p.s.d.
+
 inv!(C::Cholesky{<:BlasFloat,<:StridedMatrix}) =
     copytri!(LAPACK.potri!(C.uplo, C.factors), C.uplo, true)
 
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index 461d38c1e8759..ffcd9e64e0752 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -13,7 +13,7 @@ const NRM2_CUTOFF = 32
 # This constant should ideally be determined by the actual CPU cache size
 const ISONE_CUTOFF = 2^21 # 2M
 
-function isone(A::StridedMatrix)
+function isone(A::AbstractMatrix)
     m, n = size(A)
     m != n && return false # only square matrices can satisfy x == one(x)
     if sizeof(A) < ISONE_CUTOFF
@@ -23,7 +23,7 @@ function isone(A::StridedMatrix)
     end
 end
 
-@inline function _isone_triacheck(A::StridedMatrix, m::Int)
+@inline function _isone_triacheck(A::AbstractMatrix, m::Int)
     @inbounds for i in 1:m, j in i:m
         if i == j
             isone(A[i,i]) || return false
@@ -35,7 +35,7 @@ end
 end
 
 # Inner loop over rows to be friendly to the CPU cache
-@inline function _isone_cachefriendly(A::StridedMatrix, m::Int)
+@inline function _isone_cachefriendly(A::AbstractMatrix, m::Int)
     @inbounds for i in 1:m, j in 1:m
         if i == j
             isone(A[i,i]) || return false
@@ -565,7 +565,10 @@ exp(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(exp(parent(A)))
 """
     cis(A::AbstractMatrix)
 
-Compute ``\\exp(i A)`` for a square matrix ``A``.
+More efficient method for `exp(im*A)` of square matrix `A`
+(especially if `A` is `Hermitian` or real-`Symmetric`).
+
+See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref).
 
 !!! compat "Julia 1.7"
     Support for using `cis` with matrices was added in Julia 1.7.
@@ -576,8 +579,8 @@ julia> cis([π 0; 0 π]) ≈ -I
 true
 ```
 """
-Base.cis(A::AbstractMatrix) = exp(im * A)  # fallback
-Base.cis(A::AbstractMatrix{<:Base.HWNumber}) = exp_maybe_inplace(float.(im .* A))
+cis(A::AbstractMatrix) = exp(im * A)  # fallback
+cis(A::AbstractMatrix{<:Base.HWNumber}) = exp_maybe_inplace(float.(im .* A))
 
 exp_maybe_inplace(A::StridedMatrix{<:Union{ComplexF32, ComplexF64}}) = exp!(A)
 exp_maybe_inplace(A) = exp(A)
@@ -638,22 +641,24 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         P = A2
         U = mul!(C[4]*P, true, C[2]*I, true, true) #U = C[2]*I + C[4]*P
         V = mul!(C[3]*P, true, C[1]*I, true, true) #V = C[1]*I + C[3]*P
-        for k in 2:(div(size(C, 1), 2) - 1)
-            k2 = 2 * k
+        for k in 2:(div(length(C), 2) - 1)
             P *= A2
-            mul!(U, C[k2 + 2], P, true, true) # U += C[k2+2]*P
-            mul!(V, C[k2 + 1], P, true, true) # V += C[k2+1]*P
+            mul!(U, C[2k + 2], P, true, true) # U += C[2k+2]*P
+            mul!(V, C[2k + 1], P, true, true) # V += C[2k+1]*P
         end
 
         U = A * U
-        X = V + U
+
         # Padé approximant:  (V-U)\(V+U)
-        LAPACK.gesv!(V-U, X)
+        tmp1, tmp2 = A, A2 # Reuse already allocated arrays
+        tmp1 .= V .- U
+        tmp2 .= V .+ U
+        X = LAPACK.gesv!(tmp1, tmp2)[1]
     else
         s  = log2(nA/5.4)               # power of 2 later reversed by squaring
         if s > 0
             si = ceil(Int,s)
-            A /= convert(T,2^si)
+            A ./= convert(T,2^si)
         end
         CC = T[64764752532480000.,32382376266240000.,7771770303897600.,
                 1187353796428800.,  129060195264000.,  10559470521600.,
@@ -663,32 +668,35 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         A2 = A * A
         A4 = A2 * A2
         A6 = A2 * A4
-        Ut = mul!(CC[4]*A2, true,CC[2]*I, true, true); # Ut = CC[4]*A2+CC[2]*I
+        tmp1, tmp2 = similar(A6), similar(A6)
+
         # Allocation economical version of:
-        #U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
-        #          CC[8].*A6 .+ CC[6].*A4 .+ Ut)
-        U = mul!(CC[8].*A6 .+ CC[6].*A4 .+ Ut,
-                 A6,
-                 CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2,
-                 true, true)
-        U = A*U
-
-        # Allocation economical version of: Vt = CC[3]*A2 (recycle Ut)
-        Vt = mul!(Ut, CC[3], A2, true, false)
-        mul!(Vt, true, CC[1]*I, true, true); # Vt += CC[1]*I
+        # U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
+        #           CC[8].*A6 .+ CC[6].*A4 .+ CC[4]*A2+CC[2]*I)
+        tmp1 .= CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2
+        tmp2 .= CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2
+        mul!(tmp2, true,CC[2]*I, true, true) # tmp2 .+= CC[2]*I
+        U = mul!(tmp2, A6, tmp1, true, true)
+        U, tmp1 = mul!(tmp1, A, U), A # U = A * U0
+
         # Allocation economical version of:
-        #V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
-        #           CC[7].*A6 .+ CC[5].*A4 .+ Vt
-        V = mul!(CC[7].*A6 .+ CC[5].*A4 .+ Vt,
-                 A6,
-                 CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2,
-                 true, true)
-
-        X = V + U
-        LAPACK.gesv!(V-U, X)
-
-        if s > 0            # squaring to reverse dividing by power of 2
-            for t=1:si; X *= X end
+        # V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
+        #           CC[7].*A6 .+ CC[5].*A4 .+ CC[3]*A2 .+ CC[1]*I
+        tmp1 .= CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2
+        tmp2 .= CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2
+        mul!(tmp2, true, CC[1]*I, true, true) # tmp2 .+= CC[1]*I
+        V = mul!(tmp2, A6, tmp1, true, true)
+
+        tmp1 .= V .+ U
+        tmp2 .= V .- U # tmp2 aleady contained V but this seems more readable
+        X = LAPACK.gesv!(tmp2, tmp1)[1] # X now contains r_13 in Higham 2008
+
+        if s > 0
+            # Repeated squaring to compute X = r_13^(2^si)
+            for t=1:si
+                mul!(tmp2, X, X)
+                X, tmp2 = tmp2, X
+            end
         end
     end
 
@@ -794,7 +802,7 @@ that is the unique matrix ``X`` with eigenvalues having positive real part such
 
 If `A` is real-symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is
 used to compute the square root.   For such matrices, eigenvalues λ that
-appear to be slightly negative due to roundoff errors are treated as if they were zero
+appear to be slightly negative due to roundoff errors are treated as if they were zero.
 More precisely, matrices with all eigenvalues `≥ -rtol*(max |λ|)` are treated as semidefinite
 (yielding a Hermitian square root), with negative eigenvalues taken to be zero.
 `rtol` is a keyword argument to `sqrt` (in the Hermitian/real-symmetric case only) that
@@ -831,6 +839,8 @@ julia> sqrt(A)
  0.0  2.0
 ```
 """
+sqrt(::StridedMatrix)
+
 function sqrt(A::StridedMatrix{T}) where {T<:Union{Real,Complex}}
     if ishermitian(A)
         sqrtHermA = sqrt(Hermitian(A))
@@ -1373,6 +1383,7 @@ function factorize(A::StridedMatrix{T}) where T
 end
 factorize(A::Adjoint)   =   adjoint(factorize(parent(A)))
 factorize(A::Transpose) = transpose(factorize(parent(A)))
+factorize(a::Number)    = a # same as how factorize behaves on Diagonal types
 
 ## Moore-Penrose pseudoinverse
 
@@ -1457,7 +1468,7 @@ end
     nullspace(M, rtol::Real) = nullspace(M; rtol=rtol) # to be deprecated in Julia 2.0
 
 Computes a basis for the nullspace of `M` by including the singular
-vectors of `M` whose singular values have magnitudes greater than `max(atol, rtol*σ₁)`,
+vectors of `M` whose singular values have magnitudes smaller than `max(atol, rtol*σ₁)`,
 where `σ₁` is `M`'s largest singular value.
 
 By default, the relative tolerance `rtol` is `n*ϵ`, where `n`
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index ce155acb34c5c..ef01bb6cd3ba9 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -10,9 +10,17 @@ struct Diagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
         new{T,V}(diag)
     end
 end
+Diagonal{T,V}(d::Diagonal) where {T,V<:AbstractVector{T}} = Diagonal{T,V}(d.diag)
 Diagonal(v::AbstractVector{T}) where {T} = Diagonal{T,typeof(v)}(v)
 Diagonal{T}(v::AbstractVector) where {T} = Diagonal(convert(AbstractVector{T}, v)::AbstractVector{T})
 
+function Base.promote_rule(A::Type{<:Diagonal{<:Any,V}}, B::Type{<:Diagonal{<:Any,W}}) where {V,W}
+    X = promote_type(V, W)
+    T = eltype(X)
+    isconcretetype(T) && return Diagonal{T,X}
+    return typejoin(A, B)
+end
+
 """
     Diagonal(V::AbstractVector)
 
@@ -88,7 +96,7 @@ similar(D::Diagonal, ::Type{T}) where {T} = Diagonal(similar(D.diag, T))
 
 copyto!(D1::Diagonal, D2::Diagonal) = (copyto!(D1.diag, D2.diag); D1)
 
-size(D::Diagonal) = (length(D.diag),length(D.diag))
+size(D::Diagonal) = (n = length(D.diag); (n,n))
 
 function size(D::Diagonal,d::Integer)
     if d<1
@@ -145,24 +153,24 @@ isdiag(D::Diagonal) = all(isdiag, D.diag)
 isdiag(D::Diagonal{<:Number}) = true
 istriu(D::Diagonal, k::Integer=0) = k <= 0 || iszero(D.diag) ? true : false
 istril(D::Diagonal, k::Integer=0) = k >= 0 || iszero(D.diag) ? true : false
-function triu!(D::Diagonal,k::Integer=0)
+function triu!(D::Diagonal{T}, k::Integer=0) where T
     n = size(D,1)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 0
-        fill!(D.diag,0)
+        fill!(D.diag, zero(T))
     end
     return D
 end
 
-function tril!(D::Diagonal,k::Integer=0)
+function tril!(D::Diagonal{T}, k::Integer=0) where T
     n = size(D,1)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < 0
-        fill!(D.diag,0)
+        fill!(D.diag, zero(T))
     end
     return D
 end
@@ -198,83 +206,49 @@ Base.literal_pow(::typeof(^), D::Diagonal, valp::Val) =
     Diagonal(Base.literal_pow.(^, D.diag, valp)) # for speed
 Base.literal_pow(::typeof(^), D::Diagonal, ::Val{-1}) = inv(D) # for disambiguation
 
+function _muldiag_size_check(A, B)
+    nA = size(A, 2)
+    mB = size(B, 1)
+    @noinline throw_dimerr(::AbstractMatrix, nA, mB) = throw(DimensionMismatch("second dimension of A, $nA, does not match first dimension of B, $mB"))
+    @noinline throw_dimerr(::AbstractVector, nA, mB) = throw(DimensionMismatch("second dimension of D, $nA, does not match length of V, $mB"))
+    nA == mB || throw_dimerr(B, nA, mB)
+    return nothing
+end
+# the output matrix should have the same size as the non-diagonal input matrix or vector
+@noinline throw_dimerr(szC, szA) = throw(DimensionMismatch("output matrix has size: $szC, but should have size $szA"))
+_size_check_out(C, ::Diagonal, A) = _size_check_out(C, A)
+_size_check_out(C, A, ::Diagonal) = _size_check_out(C, A)
+_size_check_out(C, A::Diagonal, ::Diagonal) = _size_check_out(C, A)
+function _size_check_out(C, A)
+    szA = size(A)
+    szC = size(C)
+    szA == szC || throw_dimerr(szC, szA)
+    return nothing
+end
+function _muldiag_size_check(C, A, B)
+    _muldiag_size_check(A, B)
+    _size_check_out(C, A, B)
+end
+
 function (*)(Da::Diagonal, Db::Diagonal)
-    nDa, mDb = size(Da, 2), size(Db, 1)
-    if nDa != mDb
-        throw(DimensionMismatch("second dimension of Da, $nDa, does not match first dimension of Db, $mDb"))
-    end
+    _muldiag_size_check(Da, Db)
     return Diagonal(Da.diag .* Db.diag)
 end
 
 function (*)(D::Diagonal, V::AbstractVector)
-    nD = size(D, 2)
-    if nD != length(V)
-        throw(DimensionMismatch("second dimension of D, $nD, does not match length of V, $(length(V))"))
-    end
+    _muldiag_size_check(D, V)
     return D.diag .* V
 end
 
-(*)(A::AbstractTriangular, D::Diagonal) =
-    rmul!(copy_oftype(A, promote_op(*, eltype(A), eltype(D.diag))), D)
-(*)(D::Diagonal, B::AbstractTriangular) =
-    lmul!(D, copy_oftype(B, promote_op(*, eltype(B), eltype(D.diag))))
-
 (*)(A::AbstractMatrix, D::Diagonal) =
-    rmul!(copy_similar(A, promote_op(*, eltype(A), eltype(D.diag))), D)
+    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A, D)
 (*)(D::Diagonal, A::AbstractMatrix) =
-    lmul!(D, copy_similar(A, promote_op(*, eltype(A), eltype(D.diag))))
-
-function rmul!(A::AbstractMatrix, D::Diagonal)
-    require_one_based_indexing(A)
-    nA, nD = size(A, 2), length(D.diag)
-    if nA != nD
-        throw(DimensionMismatch("second dimension of A, $nA, does not match the first of D, $nD"))
-    end
-    A .= A .* permutedims(D.diag)
-    return A
-end
+    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), D, A)
 
-function lmul!(D::Diagonal, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    nB, nD = size(B, 1), length(D.diag)
-    if nB != nD
-        throw(DimensionMismatch("second dimension of D, $nD, does not match the first of B, $nB"))
-    end
-    B .= D.diag .* B
-    return B
-end
-
-rmul!(A::Union{LowerTriangular,UpperTriangular}, D::Diagonal) = typeof(A)(rmul!(A.data, D))
-function rmul!(A::UnitLowerTriangular, D::Diagonal)
-    rmul!(A.data, D)
-    for i = 1:size(A, 1)
-        A.data[i,i] = D.diag[i]
-    end
-    LowerTriangular(A.data)
-end
-function rmul!(A::UnitUpperTriangular, D::Diagonal)
-    rmul!(A.data, D)
-    for i = 1:size(A, 1)
-        A.data[i,i] = D.diag[i]
-    end
-    UpperTriangular(A.data)
-end
-
-function lmul!(D::Diagonal, B::UnitLowerTriangular)
-    lmul!(D, B.data)
-    for i = 1:size(B, 1)
-        B.data[i,i] = D.diag[i]
-    end
-    LowerTriangular(B.data)
-end
-function lmul!(D::Diagonal, B::UnitUpperTriangular)
-    lmul!(D, B.data)
-    for i = 1:size(B, 1)
-        B.data[i,i] = D.diag[i]
-    end
-    UpperTriangular(B.data)
-end
+rmul!(A::AbstractMatrix, D::Diagonal) = mul!(A, A, D)
+lmul!(D::Diagonal, B::AbstractVecOrMat) = mul!(B, D, B)
 
+#TODO: It seems better to call (D' * adjA')' directly?
 function *(adjA::Adjoint{<:Any,<:AbstractMatrix}, D::Diagonal)
     A = adjA.parent
     Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -306,37 +280,66 @@ function *(D::Diagonal, transA::Transpose{<:Any,<:AbstractMatrix})
     lmul!(D, At)
 end
 
-rmul!(A::Diagonal, B::Diagonal) = Diagonal(A.diag .*= B.diag)
-lmul!(A::Diagonal, B::Diagonal) = Diagonal(B.diag .= A.diag .* B.diag)
+@inline function __muldiag!(out, D::Diagonal, B, alpha, beta)
+    if iszero(beta)
+        out .= (D.diag .* B) .*ₛ alpha
+    else
+        out .= (D.diag .* B) .*ₛ alpha .+ out .* beta
+    end
+    return out
+end
+
+@inline function __muldiag!(out, A, D::Diagonal, alpha, beta)
+    if iszero(beta)
+        out .= (A .* permutedims(D.diag)) .*ₛ alpha
+    else
+        out .= (A .* permutedims(D.diag)) .*ₛ alpha .+ out .* beta
+    end
+    return out
+end
+
+@inline function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, alpha, beta)
+    if iszero(beta)
+        out.diag .= (D1.diag .* D2.diag) .*ₛ alpha
+    else
+        out.diag .= (D1.diag .* D2.diag) .*ₛ alpha .+ out.diag .* beta
+    end
+    return out
+end
+
+# only needed for ambiguity resolution, as mul! is explicitly defined for these arguments
+@inline __muldiag!(out, D1::Diagonal, D2::Diagonal, alpha, beta) =
+    mul!(out, D1, D2, alpha, beta)
+
+@inline function _muldiag!(out, A, B, alpha, beta)
+    _muldiag_size_check(out, A, B)
+    __muldiag!(out, A, B, alpha, beta)
+    return out
+end
 
 # Get ambiguous method if try to unify AbstractVector/AbstractMatrix here using AbstractVecOrMat
-@inline mul!(out::AbstractVector, A::Diagonal, in::AbstractVector, alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::AbstractMatrix, alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::Adjoint{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::Transpose{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, in::AbstractMatrix, A::Diagonal, alpha::Number, beta::Number) =
-    out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:AbstractVecOrMat}, A::Diagonal,
-             alpha::Number, beta::Number) =
-    out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:AbstractVecOrMat}, A::Diagonal,
-             alpha::Number, beta::Number) =
-    out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
+@inline mul!(out::AbstractVector, D::Diagonal, V::AbstractVector, alpha::Number, beta::Number) =
+    _muldiag!(out, D, V, alpha, beta)
+@inline mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, alpha::Number, beta::Number) =
+    _muldiag!(out, D, B, alpha, beta)
+@inline mul!(out::AbstractMatrix, D::Diagonal, B::Adjoint{<:Any,<:AbstractVecOrMat},
+             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
+@inline mul!(out::AbstractMatrix, D::Diagonal, B::Transpose{<:Any,<:AbstractVecOrMat},
+             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
+
+@inline mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, alpha::Number, beta::Number) =
+    _muldiag!(out, A, D, alpha, beta)
+@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, D::Diagonal,
+             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
+@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, D::Diagonal,
+             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
+@inline mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number) =
+    _muldiag!(C, Da, Db, alpha, beta)
 
 function mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number)
-    mA = size(Da, 1)
-    mB = size(Db, 1)
-    mA == mB || throw(DimensionMismatch("A has dimensions ($mA,$mA) but B has dimensions ($mB,$mB)"))
-    mC, nC = size(C)
-    mC == nC == mA || throw(DimensionMismatch("output matrix has size: ($mC,$nC), but should have size ($mA,$mA)"))
+    _muldiag_size_check(C, Da, Db)
     require_one_based_indexing(C)
+    mA = size(Da, 1)
     da = Da.diag
     db = Db.diag
     _rmul_or_fill!(C, beta)
@@ -352,41 +355,182 @@ function mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, alpha::Number, beta
     return C
 end
 
-(/)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag ./ Db.diag)
-
-ldiv!(x::AbstractArray, A::Diagonal, b::AbstractArray) = (x .= A.diag .\ b)
+/(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(similar(A, promote_op(/, eltype(A), eltype(D)), size(A)), A, D)
 
-function ldiv!(D::Diagonal, A::Union{LowerTriangular,UpperTriangular})
-    broadcast!(\, parent(A), D.diag, parent(A))
-    A
-end
-
-function rdiv!(A::AbstractMatrix, D::Diagonal)
+rdiv!(A::AbstractVecOrMat, D::Diagonal) = @inline _rdiv!(A, A, D)
+# avoid copy when possible via internal 3-arg backend
+function _rdiv!(B::AbstractVecOrMat, A::AbstractVecOrMat, D::Diagonal)
     require_one_based_indexing(A)
     dd = D.diag
-    m, n = size(A)
-    if (k = length(dd)) ≠ n
+    m, n = size(A, 1), size(A, 2)
+    if (k = length(dd)) != n
         throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
     end
     @inbounds for j in 1:n
         ddj = dd[j]
-        if iszero(ddj)
-            throw(SingularException(j))
-        end
+        iszero(ddj) && throw(SingularException(j))
         for i in 1:m
-            A[i, j] /= ddj
+            B[i, j] = A[i, j] / ddj
         end
     end
-    A
+    B
 end
 
-function rdiv!(A::Union{LowerTriangular,UpperTriangular}, D::Diagonal)
-    broadcast!(/, parent(A), parent(A), permutedims(D.diag))
-    A
+\(D::Diagonal, B::AbstractVecOrMat) = ldiv!(similar(B, promote_op(\, eltype(D), eltype(B)), size(B)), D, B)
+
+ldiv!(D::Diagonal, B::AbstractVecOrMat) = @inline ldiv!(B, D, B)
+function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
+    require_one_based_indexing(A, B)
+    dd = D.diag
+    d = length(dd)
+    m, n = size(A, 1), size(A, 2)
+    m′, n′ = size(B, 1), size(B, 2)
+    m == d || throw(DimensionMismatch("right hand side has $m rows but D is $d by $d"))
+    (m, n) == (m′, n′) || throw(DimensionMismatch("expect output to be $m by $n, but got $m′ by $n′"))
+    j = findfirst(iszero, D.diag)
+    isnothing(j) || throw(SingularException(j))
+    @inbounds for j = 1:n, i = 1:m
+        B[i, j] = dd[i] \ A[i, j]
+    end
+    B
+end
+
+# Optimizations for \, / between Diagonals
+\(D::Diagonal, B::Diagonal) = ldiv!(similar(B, promote_op(\, eltype(D), eltype(B))), D, B)
+/(A::Diagonal, D::Diagonal) = _rdiv!(similar(A, promote_op(/, eltype(A), eltype(D))), A, D)
+function _rdiv!(Dc::Diagonal, Db::Diagonal, Da::Diagonal)
+    n, k = length(Db.diag), length(Da.diag)
+    n == k || throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
+    j = findfirst(iszero, Da.diag)
+    isnothing(j) || throw(SingularException(j))
+    Dc.diag .= Db.diag ./ Da.diag
+    Dc
+end
+ldiv!(Dc::Diagonal, Da::Diagonal, Db::Diagonal) = Diagonal(ldiv!(Dc.diag, Da, Db.diag))
+
+# optimizations for (Sym)Tridiagonal and Diagonal
+@propagate_inbounds _getudiag(T::Tridiagonal, i) = T.du[i]
+@propagate_inbounds _getudiag(S::SymTridiagonal, i) = S.ev[i]
+@propagate_inbounds _getdiag(T::Tridiagonal, i) = T.d[i]
+@propagate_inbounds _getdiag(S::SymTridiagonal, i) = symmetric(S.dv[i], :U)::symmetric_type(eltype(S.dv))
+@propagate_inbounds _getldiag(T::Tridiagonal, i) = T.dl[i]
+@propagate_inbounds _getldiag(S::SymTridiagonal, i) = transpose(S.ev[i])
+
+function (\)(D::Diagonal, S::SymTridiagonal)
+    T = promote_op(\, eltype(D), eltype(S))
+    du = similar(S.ev, T, max(length(S.dv)-1, 0))
+    d  = similar(S.dv, T, length(S.dv))
+    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
+    ldiv!(Tridiagonal(dl, d, du), D, S)
+end
+(\)(D::Diagonal, T::Tridiagonal) = ldiv!(similar(T, promote_op(\, eltype(D), eltype(T))), D, T)
+function ldiv!(T::Tridiagonal, D::Diagonal, S::Union{SymTridiagonal,Tridiagonal})
+    m = size(S, 1)
+    dd = D.diag
+    if (k = length(dd)) != m
+        throw(DimensionMismatch("diagonal matrix is $k by $k but right hand side has $m rows"))
+    end
+    if length(T.d) != m
+        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
+    end
+    m == 0 && return T
+    j = findfirst(iszero, dd)
+    isnothing(j) || throw(SingularException(j))
+    ddj = dd[1]
+    T.d[1] = ddj \ _getdiag(S, 1)
+    @inbounds if m > 1
+        T.du[1] = ddj \ _getudiag(S, 1)
+        for j in 2:m-1
+            ddj = dd[j]
+            T.dl[j-1] = ddj \ _getldiag(S, j-1)
+            T.d[j]  = ddj \ _getdiag(S, j)
+            T.du[j] = ddj \ _getudiag(S, j)
+        end
+        ddj = dd[m]
+        T.dl[m-1] = ddj \ _getldiag(S, m-1)
+        T.d[m] = ddj \ _getdiag(S, m)
+    end
+    return T
 end
 
-(/)(A::Union{StridedMatrix, AbstractTriangular}, D::Diagonal) =
-    rdiv!((typeof(oneunit(eltype(D))/oneunit(eltype(A)))).(A), D)
+function (/)(S::SymTridiagonal, D::Diagonal)
+    T = promote_op(\, eltype(D), eltype(S))
+    du = similar(S.ev, T, max(length(S.dv)-1, 0))
+    d  = similar(S.dv, T, length(S.dv))
+    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
+    _rdiv!(Tridiagonal(dl, d, du), S, D)
+end
+(/)(T::Tridiagonal, D::Diagonal) = _rdiv!(similar(T, promote_op(/, eltype(T), eltype(D))), T, D)
+function _rdiv!(T::Tridiagonal, S::Union{SymTridiagonal,Tridiagonal}, D::Diagonal)
+    n = size(S, 2)
+    dd = D.diag
+    if (k = length(dd)) != n
+        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
+    end
+    if length(T.d) != n
+        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
+    end
+    n == 0 && return T
+    j = findfirst(iszero, dd)
+    isnothing(j) || throw(SingularException(j))
+    ddj = dd[1]
+    T.d[1] = _getdiag(S, 1) / ddj
+    @inbounds if n > 1
+        T.dl[1] = _getldiag(S, 1) / ddj
+        for j in 2:n-1
+            ddj = dd[j]
+            T.dl[j] = _getldiag(S, j) / ddj
+            T.d[j] = _getdiag(S, j) / ddj
+            T.du[j-1] = _getudiag(S, j-1) / ddj
+        end
+        ddj = dd[n]
+        T.d[n] = _getdiag(S, n) / ddj
+        T.du[n-1] = _getudiag(S, n-1) / ddj
+    end
+    return T
+end
+
+# Optimizations for [l/r]mul!, l/rdiv!, *, / and \ between Triangular and Diagonal.
+# These functions are generally more efficient if we calculate the whole data field.
+# The following code implements them in a unified pattern to avoid missing.
+@inline function _setdiag!(data, f, diag, diag′ = nothing)
+    @inbounds for i in 1:length(diag)
+        data[i,i] = isnothing(diag′) ? f(diag[i]) : f(diag[i],diag′[i])
+    end
+    data
+end
+for Tri in (:UpperTriangular, :LowerTriangular)
+    UTri = Symbol(:Unit, Tri)
+    # 2 args
+    for (fun, f) in zip((:*, :rmul!, :rdiv!, :/), (:identity, :identity, :inv, :inv))
+        @eval $fun(A::$Tri, D::Diagonal) = $Tri($fun(A.data, D))
+        @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($fun(A.data, D), $f, D.diag))
+    end
+    for (fun, f) in zip((:*, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv))
+        @eval $fun(D::Diagonal, A::$Tri) = $Tri($fun(D, A.data))
+        @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag))
+    end
+    # 3-arg ldiv!
+    @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data))
+    @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag))
+    # 3-arg mul!: invoke 5-arg mul! rather than lmul!
+    @eval mul!(C::$Tri, A::Union{$Tri,$UTri}, D::Diagonal) = mul!(C, A, D, true, false)
+    # 5-arg mul!
+    @eval @inline mul!(C::$Tri, D::Diagonal, A::$Tri, α::Number, β::Number) = $Tri(mul!(C.data, D, A.data, α, β))
+    @eval @inline function mul!(C::$Tri, D::Diagonal, A::$UTri, α::Number, β::Number)
+        iszero(α) && return _rmul_or_fill!(C, β)
+        diag′ = iszero(β) ? nothing : diag(C)
+        data = mul!(C.data, D, A.data, α, β)
+        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+    end
+    @eval @inline mul!(C::$Tri, A::$Tri, D::Diagonal, α::Number, β::Number) = $Tri(mul!(C.data, A.data, D, α, β))
+    @eval @inline function mul!(C::$Tri, A::$UTri, D::Diagonal, α::Number, β::Number)
+        iszero(α) && return _rmul_or_fill!(C, β)
+        diag′ = iszero(β) ? nothing : diag(C)
+        data = mul!(C.data, A.data, D, α, β)
+        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+    end
+end
 
 @inline function kron!(C::AbstractMatrix, A::Diagonal, B::Diagonal)
     valA = A.diag; nA = length(valA)
@@ -405,7 +549,7 @@ end
 kron(A::Diagonal{<:Number}, B::Diagonal{<:Number}) = Diagonal(kron(A.diag, B.diag))
 
 @inline function kron!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix)
-    Base.require_one_based_indexing(B)
+    require_one_based_indexing(B)
     (mA, nA) = size(A)
     (mB, nB) = size(B)
     (mC, nC) = size(C)
@@ -458,13 +602,13 @@ adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
 Base.permutedims(D::Diagonal) = D
 Base.permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
 
-function diag(D::Diagonal, k::Integer=0)
+function diag(D::Diagonal{T}, k::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of k
     if k == 0
         return copyto!(similar(D.diag, length(D.diag)), D.diag)
     elseif -size(D,1) <= k <= size(D,1)
-        return fill!(similar(D.diag, size(D,1)-abs(k)), 0)
+        return fill!(similar(D.diag, size(D,1)-abs(k)), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $k, must be at least $(-size(D, 1)) ",
             "and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
@@ -486,30 +630,6 @@ for f in (:exp, :cis, :log, :sqrt,
     @eval $f(D::Diagonal) = Diagonal($f.(D.diag))
 end
 
-(\)(D::Diagonal, A::AbstractMatrix) =
-    ldiv!(D, (typeof(oneunit(eltype(D))/oneunit(eltype(A)))).(A))
-
-(\)(D::Diagonal, b::AbstractVector) = D.diag .\ b
-(\)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag .\ Db.diag)
-
-function ldiv!(D::Diagonal, B::AbstractVecOrMat)
-    m, n = size(B, 1), size(B, 2)
-    if m != length(D.diag)
-        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $m rows"))
-    end
-    (m == 0 || n == 0) && return B
-    for j = 1:n
-        for i = 1:m
-            di = D.diag[i]
-            if di == 0
-                throw(SingularException(i))
-            end
-            B[i,j] = di \ B[i,j]
-        end
-    end
-    return B
-end
-
 function inv(D::Diagonal{T}) where T
     Di = similar(D.diag, typeof(inv(zero(T))))
     for i = 1:length(D.diag)
@@ -556,21 +676,27 @@ end
 #Singular system
 svdvals(D::Diagonal{<:Number}) = sort!(abs.(D.diag), rev = true)
 svdvals(D::Diagonal) = [svdvals(v) for v in D.diag]
-function svd(D::Diagonal{<:Number})
+function svd(D::Diagonal{T}) where T<:Number
     S   = abs.(D.diag)
     piv = sortperm(S, rev = true)
     U   = Diagonal(D.diag ./ S)
     Up  = hcat([U[:,i] for i = 1:length(D.diag)][piv]...)
-    V   = Diagonal(fill!(similar(D.diag), one(eltype(D.diag))))
+    V   = Diagonal(fill!(similar(D.diag), one(T)))
     Vp  = hcat([V[:,i] for i = 1:length(D.diag)][piv]...)
     return SVD(Up, S[piv], copy(Vp'))
 end
 
-# disambiguation methods: * of Diagonal and Adj/Trans AbsVec
-*(x::Adjoint{<:Any,<:AbstractVector}, D::Diagonal) = Adjoint(map((t,s) -> t'*s, D.diag, parent(x)))
-*(x::Transpose{<:Any,<:AbstractVector}, D::Diagonal) = Transpose(map((t,s) -> transpose(t)*s, D.diag, parent(x)))
-*(x::Adjoint{<:Any,<:AbstractVector},   D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
-*(x::Transpose{<:Any,<:AbstractVector}, D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
+# disambiguation methods: * and / of Diagonal and Adj/Trans AbsVec
+*(x::AdjointAbsVec, D::Diagonal) = Adjoint(map((t,s) -> t'*s, D.diag, parent(x)))
+*(x::TransposeAbsVec, D::Diagonal) = Transpose(map((t,s) -> transpose(t)*s, D.diag, parent(x)))
+*(x::AdjointAbsVec,   D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
+*(x::TransposeAbsVec, D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
+/(u::AdjointAbsVec, D::Diagonal) = adjoint(adjoint(D) \ u.parent)
+/(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) \ u.parent)
+# disambiguation methods: Call unoptimized version for user defined AbstractTriangular.
+*(A::AbstractTriangular, D::Diagonal) = Base.@invoke *(A::AbstractMatrix, D::Diagonal)
+*(D::Diagonal, A::AbstractTriangular) = Base.@invoke *(D::Diagonal, A::AbstractMatrix)
+
 dot(x::AbstractVector, D::Diagonal, y::AbstractVector) = _mapreduce_prod(dot, x, D, y)
 
 dot(A::Diagonal, B::Diagonal) = dot(A.diag, B.diag)
@@ -583,13 +709,12 @@ dot(A::AbstractMatrix, B::Diagonal) = conj(dot(B, A))
 
 function _mapreduce_prod(f, x, D::Diagonal, y)
     if isempty(x) && isempty(D) && isempty(y)
-        return zero(Base.promote_op(f, eltype(x), eltype(D), eltype(y)))
+        return zero(promote_op(f, eltype(x), eltype(D), eltype(y)))
     else
         return mapreduce(t -> f(t[1], t[2], t[3]), +, zip(x, D.diag, y))
     end
 end
 
-
 function cholesky!(A::Diagonal, ::NoPivot = NoPivot(); check::Bool = true)
     info = 0
     for (i, di) in enumerate(A.diag)
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index cf7e474468785..5db2b525ee584 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -8,7 +8,8 @@
 # inside this function.
 function *ₛ end
 Broadcast.broadcasted(::typeof(*ₛ), out, beta) =
-    iszero(beta::Number) ? false : broadcasted(*, out, beta)
+    iszero(beta::Number) ? false :
+    isone(beta::Number) ? broadcasted(identity, out) : broadcasted(*, out, beta)
 
 """
     MulAddMul(alpha, beta)
@@ -448,45 +449,11 @@ diag(A::AbstractVector) = throw(ArgumentError("use diagm instead of diag to cons
 # Dot products and norms
 
 # special cases of norm; note that they don't need to handle isempty(x)
-function generic_normMinusInf(x)
-    (v, s) = iterate(x)::Tuple
-    minabs = norm(v)
-    while true
-        y = iterate(x, s)
-        y === nothing && break
-        (v, s) = y
-        vnorm = norm(v)
-        minabs = ifelse(isnan(minabs) | (minabs < vnorm), minabs, vnorm)
-    end
-    return float(minabs)
-end
+generic_normMinusInf(x) = float(mapreduce(norm, min, x))
 
-function generic_normInf(x)
-    (v, s) = iterate(x)::Tuple
-    maxabs = norm(v)
-    while true
-        y = iterate(x, s)
-        y === nothing && break
-        (v, s) = y
-        vnorm = norm(v)
-        maxabs = ifelse(isnan(maxabs) | (maxabs > vnorm), maxabs, vnorm)
-    end
-    return float(maxabs)
-end
+generic_normInf(x) = float(mapreduce(norm, max, x))
 
-function generic_norm1(x)
-    (v, s) = iterate(x)::Tuple
-    av = float(norm(v))
-    T = typeof(av)
-    sum::promote_type(Float64, T) = av
-    while true
-        y = iterate(x, s)
-        y === nothing && break
-        (v, s) = y
-        sum += norm(v)
-    end
-    return convert(T, sum)
-end
+generic_norm1(x) = mapreduce(float ∘ norm, +, x)
 
 # faster computation of norm(x)^2, avoiding overflow for integers
 norm_sqr(x) = norm(x)^2
@@ -1515,9 +1482,9 @@ end
 end
 
 # apply reflector from left
-@inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractMatrix)
+@inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat)
     require_one_based_indexing(x)
-    m, n = size(A)
+    m, n = size(A, 1), size(A, 2)
     if length(x) != m
         throw(DimensionMismatch("reflector has length $(length(x)), which must match the first dimension of matrix A, $m"))
     end
@@ -1594,6 +1561,8 @@ julia> logabsdet(B)
 """
 logabsdet(A::AbstractMatrix) = logabsdet(lu(A, check=false))
 
+logabsdet(a::Number) = log(abs(a)), sign(a)
+
 """
     logdet(M)
 
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index 9a864da2b0a37..e79786da925aa 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -483,9 +483,9 @@ Q factor:
  0.0  -0.707107   0.707107
 H factor:
 3×3 UpperHessenberg{Float64, Matrix{Float64}}:
-  4.0      -11.3137      -1.41421
- -5.65685    5.0          2.0
-   ⋅        -1.0444e-15   1.0
+  4.0      -11.3137       -1.41421
+ -5.65685    5.0           2.0
+   ⋅        -8.88178e-16   1.0
 
 julia> F.Q * F.H * F.Q'
 3×3 Matrix{Float64}:
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index 05e080e00450d..0aa8f1689f23c 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -12,12 +12,10 @@ const libblastrampoline = "libblastrampoline"
 # of BLAS.get_config()
 const liblapack = libblastrampoline
 
-import ..LinearAlgebra.BLAS.@blasfunc
+using ..LinearAlgebra.BLAS: @blasfunc, chkuplo
 
-import ..LinearAlgebra: BlasFloat, BlasInt, LAPACKException,
-    DimensionMismatch, SingularException, PosDefException, chkstride1, checksquare
-
-using ..LinearAlgebra: triu, tril, dot
+using ..LinearAlgebra: BlasFloat, BlasInt, LAPACKException, DimensionMismatch,
+    SingularException, PosDefException, chkstride1, checksquare,triu, tril, dot
 
 using Base: iszero, require_one_based_indexing
 
@@ -56,14 +54,6 @@ function chkposdef(ret::BlasInt)
     end
 end
 
-"Check that upper/lower (for special matrices) is correctly specified"
-function chkuplo(uplo::AbstractChar)
-    if !(uplo == 'U' || uplo == 'L')
-        throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
-    end
-    uplo
-end
-
 "Check that {c}transpose is correctly specified"
 function chktrans(trans::AbstractChar)
     if !(trans == 'N' || trans == 'C' || trans == 'T')
@@ -523,7 +513,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
                       m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
                 chklapackerror(info[])
                 if i == 1
-                    lwork = BlasInt(real(work[1]))
+                    lwork = max(BlasInt(1),BlasInt(real(work[1])))
                     resize!(work, lwork)
                 end
             end
@@ -552,7 +542,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
                       m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
                 chklapackerror(info[])
                 if i == 1
-                    lwork = BlasInt(real(work[1]))
+                    lwork = max(BlasInt(m), BlasInt(real(work[1])))
                     resize!(work, lwork)
                 end
             end
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index 67ce521a9aa7e..26b3a1210a3f9 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ## This file contains libblastrampoline-specific APIs
 
 # Keep these in sync with `src/libblastrampoline_internal.h`
@@ -17,6 +19,7 @@ const LBT_INTERFACE_MAP = Dict(
     LBT_INTERFACE_ILP64   => :ilp64,
     LBT_INTERFACE_UNKNOWN => :unknown,
 )
+const LBT_INV_INTERFACE_MAP = Dict(v => k for (k, v) in LBT_INTERFACE_MAP)
 
 const LBT_F2C_PLAIN         =  0
 const LBT_F2C_REQUIRED      =  1
@@ -26,6 +29,7 @@ const LBT_F2C_MAP = Dict(
     LBT_F2C_REQUIRED => :required,
     LBT_F2C_UNKNOWN  => :unknown,
 )
+const LBT_INV_F2C_MAP = Dict(v => k for (k, v) in LBT_F2C_MAP)
 
 struct LBTLibraryInfo
     libname::String
@@ -164,14 +168,74 @@ function lbt_get_default_func()
     return ccall((:lbt_get_default_func, libblastrampoline), Ptr{Cvoid}, ())
 end
 
-#=
-Don't define footgun API (yet)
+"""
+    lbt_find_backing_library(symbol_name, interface; config::LBTConfig = lbt_get_config())
 
-function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
-    return ccall((:lbt_get_forward, libblastrampoline), Ptr{Cvoid}, (Cstring, Int32, Int32), symbol_name, interface, f2c)
+Return the `LBTLibraryInfo` that represents the backing library for the given symbol
+exported from libblastrampoline.  This allows us to discover which library will service
+a particular BLAS call from Julia code.  This method returns `nothing` if either of the
+following conditions are met:
+
+ * No loaded library exports the desired symbol (the default function will be called)
+ * The symbol was set via `lbt_set_forward()`, which does not track library provenance.
+
+If the given `symbol_name` is not contained within the list of exported symbols, an
+`ArgumentError` will be thrown.
+"""
+function lbt_find_backing_library(symbol_name, interface::Symbol;
+                                  config::LBTConfig = lbt_get_config())
+    if interface ∉ (:ilp64, :lp64)
+        throw(Argument("Invalid interface specification: '$(interface)'"))
+    end
+    symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
+    if symbol_idx === nothing
+        throw(ArgumentError("Invalid exported symbol name '$(symbol_name)'"))
+    end
+    # Convert to zero-indexed
+    symbol_idx -= 1
+
+    forward_byte_offset = div(symbol_idx, 8)
+    forward_byte_mask = 1 << mod(symbol_idx, 8)
+    for lib in filter(l -> l.interface == interface, config.loaded_libs)
+        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
+            return lib
+        end
+    end
+
+    # No backing library was found
+    return nothing
 end
 
+
+## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
+## bizarre and complex setups to be created.  If you run into strange errors while using
+## it, the first thing you should ask yourself is whether you've set things up properly.
 function lbt_set_forward(symbol_name, addr, interface, f2c = LBT_F2C_PLAIN; verbose::Bool = false)
-    return ccall((:lbt_set_forward, libblastrampoline), Int32, (Cstring, Ptr{Cvoid}, Int32, Int32, Int32), symbol_name, addr, interface, f2c, verbose ? 1 : 0)
+    return ccall(
+        (:lbt_set_forward, libblastrampoline),
+        Int32,
+        (Cstring, Ptr{Cvoid}, Int32, Int32, Int32),
+        string(symbol_name),
+        addr,
+        Int32(interface),
+        Int32(f2c),
+        verbose ? Int32(1) : Int32(0),
+    )
+end
+function lbt_set_forward(symbol_name, addr, interface::Symbol, f2c::Symbol = :plain; kwargs...)
+    return lbt_set_forward(symbol_name, addr, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c]; kwargs...)
+end
+
+function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
+    return ccall(
+        (:lbt_get_forward, libblastrampoline),
+        Ptr{Cvoid},
+        (Cstring, Int32, Int32),
+        string(symbol_name),
+        Int32(interface),
+        Int32(f2c),
+    )
+end
+function lbt_get_forward(symbol_name, interface::Symbol, f2c::Symbol = :plain)
+    return lbt_get_forward(symbol_name, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c])
 end
-=#
\ No newline at end of file
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index fb75669e647fa..0cbfeaf9ed3ea 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -9,7 +9,7 @@ matprod(x, y) = x*y + x*y
 dot(x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where {T<:BlasReal} = BLAS.dot(x, y)
 dot(x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where {T<:BlasComplex} = BLAS.dotc(x, y)
 
-function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector{T}, ry::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasReal,TI<:Integer}
+function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasReal,TI<:Integer}
     if length(rx) != length(ry)
         throw(DimensionMismatch("length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
     end
@@ -22,7 +22,7 @@ function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector
     GC.@preserve x y BLAS.dot(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx), pointer(y)+(first(ry)-1)*sizeof(T), step(ry))
 end
 
-function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector{T}, ry::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasComplex,TI<:Integer}
+function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasComplex,TI<:Integer}
     if length(rx) != length(ry)
         throw(DimensionMismatch("length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
     end
@@ -625,7 +625,7 @@ end
 
 lapack_size(t::AbstractChar, M::AbstractVecOrMat) = (size(M, t=='N' ? 1 : 2), size(M, t=='N' ? 2 : 1))
 
-function copyto!(B::AbstractVecOrMat, ir_dest::UnitRange{Int}, jr_dest::UnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::UnitRange{Int}, jr_src::UnitRange{Int})
+function copyto!(B::AbstractVecOrMat, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
     if tM == 'N'
         copyto!(B, ir_dest, jr_dest, M, ir_src, jr_src)
     else
@@ -635,7 +635,7 @@ function copyto!(B::AbstractVecOrMat, ir_dest::UnitRange{Int}, jr_dest::UnitRang
     B
 end
 
-function copy_transpose!(B::AbstractMatrix, ir_dest::UnitRange{Int}, jr_dest::UnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::UnitRange{Int}, jr_src::UnitRange{Int})
+function copy_transpose!(B::AbstractMatrix, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
     if tM == 'N'
         LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, ir_src, jr_src)
     else
@@ -726,10 +726,6 @@ function generic_matmatmul(tA, tB, A::AbstractVecOrMat{T}, B::AbstractMatrix{S})
 end
 
 const tilebufsize = 10800  # Approximately 32k/3
-# per-thread arrays of buffers resized by __init__ if needed
-const Abuf = [Vector{UInt8}(undef, tilebufsize)]
-const Bbuf = [Vector{UInt8}(undef, tilebufsize)]
-const Cbuf = [Vector{UInt8}(undef, tilebufsize)]
 
 function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
                             _add::MulAddMul=MulAddMul())
@@ -775,9 +771,8 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat
     @inbounds begin
     if tile_size > 0
         sz = (tile_size, tile_size)
-        # FIXME: This code is completely invalid!!!
-        Atile = unsafe_wrap(Array, convert(Ptr{T}, pointer(Abuf[Threads.threadid()])), sz)
-        Btile = unsafe_wrap(Array, convert(Ptr{S}, pointer(Bbuf[Threads.threadid()])), sz)
+        Atile = Array{T}(undef, sz)
+        Btile = Array{S}(undef, sz)
 
         z1 = zero(A[1, 1]*B[1, 1] + A[1, 1]*B[1, 1])
         z = convert(promote_type(typeof(z1), R), z1)
@@ -797,8 +792,7 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat
                 end
             end
         else
-            # FIXME: This code is completely invalid!!!
-            Ctile = unsafe_wrap(Array, convert(Ptr{R}, pointer(Cbuf[Threads.threadid()])), sz)
+            Ctile = Array{R}(undef, sz)
             for jb = 1:tile_size:nB
                 jlim = min(jb+tile_size-1,nB)
                 jlen = jlim-jb+1
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index 15bc61e1b1774..671abc00a6cfe 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -270,13 +270,13 @@ function qrfactPivotedUnblocked!(A::AbstractMatrix)
 
         # Compute reflector of columns j
         x = view(A, j:m, j)
-        τj = LinearAlgebra.reflector!(x)
+        τj = reflector!(x)
         τ[j] = τj
 
         # Update trailing submatrix with reflector
-        LinearAlgebra.reflectorApply!(x, τj, view(A, j:m, j+1:n))
+        reflectorApply!(x, τj, view(A, j:m, j+1:n))
     end
-    return LinearAlgebra.QRPivoted{eltype(A), typeof(A)}(A, τ, piv)
+    return QRPivoted{eltype(A), typeof(A)}(A, τ, piv)
 end
 
 # LAPACK version
@@ -567,14 +567,18 @@ size(F::Union{QR,QRCompactWY,QRPivoted}) = size(getfield(F, :factors))
 size(Q::AbstractQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
 size(Q::AbstractQ) = size(Q, 1), size(Q, 2)
 
-function getindex(Q::AbstractQ, i::Integer, j::Integer)
-    x = zeros(eltype(Q), size(Q, 1))
-    x[i] = 1
+copy(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
+getindex(Q::AbstractQ, inds...) = copy(Q)[inds...]
+getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
+
+function getindex(Q::AbstractQ, ::Colon, j::Int)
     y = zeros(eltype(Q), size(Q, 2))
     y[j] = 1
-    return dot(x, lmul!(Q, y))
+    lmul!(Q, y)
 end
 
+getindex(Q::AbstractQ, i::Int, j::Int) = Q[:, j][i]
+
 # specialization avoiding the fallback using slow `getindex`
 function copyto!(dest::AbstractMatrix, src::AbstractQ)
     copyto!(dest, I)
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 5c25c0993e9cc..b71e588b87feb 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -28,6 +28,9 @@ Tridiagonal(A::Bidiagonal) =
 
 # conversions from SymTridiagonal to other special matrix types
 Diagonal(A::SymTridiagonal) = Diagonal(A.dv)
+
+# These can fail when ev has the same length as dv
+# TODO: Revisit when a good solution for #42477 is found
 Bidiagonal(A::SymTridiagonal) =
     iszero(A.ev) ? Bidiagonal(A.dv, A.ev, :U) :
         throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
@@ -154,10 +157,10 @@ end
 
 # this set doesn't have the aforementioned problem
 
-+(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+B.ev, A.d+B.dv, A.du+B.ev)
--(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-B.ev, A.d-B.dv, A.du-B.ev)
-+(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(A.ev+B.dl, A.dv+B.d, A.ev+B.du)
--(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(A.ev-B.dl, A.dv-B.d, A.ev-B.du)
++(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
+-(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-_evview(B), A.d-B.dv, A.du-_evview(B))
++(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)+B.dl, A.dv+B.d, _evview(A)+B.du)
+-(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)-B.dl, A.dv-B.d, _evview(A)-B.du)
 
 
 function (+)(A::Diagonal, B::Tridiagonal)
@@ -202,22 +205,22 @@ end
 
 function (+)(A::Bidiagonal, B::SymTridiagonal)
     newdv = A.dv+B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(B.ev), A.dv+B.dv, A.ev+B.ev) : (A.ev+B.ev, A.dv+B.dv, typeof(newdv)(B.ev)))...)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview(B)), A.dv+B.dv, A.ev+_evview(B)) : (A.ev+_evview(B), A.dv+B.dv, typeof(newdv)(_evview(B))))...)
 end
 
 function (-)(A::Bidiagonal, B::SymTridiagonal)
     newdv = A.dv-B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-B.ev), newdv, A.ev-B.ev) : (A.ev-B.ev, newdv, typeof(newdv)(-B.ev)))...)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview(B), newdv, typeof(newdv)(-_evview(B))))...)
 end
 
 function (+)(A::SymTridiagonal, B::Bidiagonal)
     newdv = A.dv+B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.ev), newdv, A.ev+B.ev) : (A.ev+B.ev, newdv, typeof(newdv)(A.ev)))...)
+    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)+B.ev) : (_evview(A)+B.ev, newdv, typeof(newdv)(_evview(A))))...)
 end
 
 function (-)(A::SymTridiagonal, B::Bidiagonal)
     newdv = A.dv-B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.ev), newdv, A.ev-B.ev) : (A.ev-B.ev, newdv, typeof(newdv)(A.ev)))...)
+    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)-B.ev) : (_evview(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
 end
 
 # fixing uniform scaling problems from #28994
@@ -308,17 +311,20 @@ function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}, x)
     not be filled with $x, since some of its entries are constrained."))
 end
 
-one(A::Diagonal{T}) where T = Diagonal(fill!(similar(A.diag, typeof(one(T))), one(T)))
+one(D::Diagonal) = Diagonal(one.(D.diag))
 one(A::Bidiagonal{T}) where T = Bidiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))), A.uplo)
 one(A::Tridiagonal{T}) where T = Tridiagonal(fill!(similar(A.du, typeof(one(T))), zero(one(T))), fill!(similar(A.d, typeof(one(T))), one(T)), fill!(similar(A.dl, typeof(one(T))), zero(one(T))))
 one(A::SymTridiagonal{T}) where T = SymTridiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))))
+
+zero(D::Diagonal) = Diagonal(zero.(D.diag))
+oneunit(D::Diagonal) = Diagonal(oneunit.(D.diag))
+
 # equals and approx equals methods for structured matrices
 # SymTridiagonal == Tridiagonal is already defined in tridiag.jl
 
-# SymTridiagonal and Bidiagonal have the same field names
-==(A::Diagonal, B::Union{SymTridiagonal, Bidiagonal}) = iszero(B.ev) && A.diag == B.dv
+==(A::Diagonal, B::Bidiagonal) = iszero(B.ev) && A.diag == B.dv
+==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == B.dv
 ==(B::Bidiagonal, A::Diagonal) = A == B
-
 ==(A::Diagonal, B::Tridiagonal) = iszero(B.dl) && iszero(B.du) && A.diag == B.d
 ==(B::Tridiagonal, A::Diagonal) = A == B
 
@@ -331,5 +337,5 @@ function ==(A::Bidiagonal, B::Tridiagonal)
 end
 ==(B::Tridiagonal, A::Bidiagonal) = A == B
 
-==(A::Bidiagonal, B::SymTridiagonal) = iszero(B.ev) && iszero(A.ev) && A.dv == B.dv
+==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == B.dv
 ==(B::SymTridiagonal, A::Bidiagonal) = A == B
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
index d8e7b6609c055..7347dd6f78639 100644
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ b/stdlib/LinearAlgebra/src/symmetric.jl
@@ -7,6 +7,7 @@ struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
 
     function Symmetric{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}}
         require_one_based_indexing(data)
+        (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,S}(data, uplo)
     end
 end
@@ -88,6 +89,7 @@ struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
 
     function Hermitian{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}}
         require_one_based_indexing(data)
+        (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,S}(data, uplo)
     end
 end
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
index 0f18d290b4d62..f19ab20866923 100644
--- a/stdlib/LinearAlgebra/src/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl
@@ -1,7 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Eigensolvers for symmetric and Hermitian matrices
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) = Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
+eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) =
+    Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
 
 function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
     T = eltype(A)
@@ -9,7 +10,8 @@ function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothin
     eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), sortby=sortby)
 end
 
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) = Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
+eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
+    Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
 
 """
     eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> Eigen
@@ -60,13 +62,16 @@ function eigen(A::RealHermSymComplexHerm, vl::Real, vh::Real)
     eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), vl, vh)
 end
 
-eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}) =
-    LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
+function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing)
+    vals = LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
+    !isnothing(sortby) && sort!(vals, by=sortby)
+    return vals
+end
 
-function eigvals(A::RealHermSymComplexHerm)
+function eigvals(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
     T = eltype(A)
     S = eigtype(T)
-    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A))
+    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), sortby=sortby)
 end
 
 """
@@ -263,9 +268,14 @@ function _UtiAsymUi_diag!(uplo, A, U)
     return A
 end
 
-eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}) where {T<:BlasReal,S<:StridedMatrix} =
-    LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}) where {T<:BlasComplex,S<:StridedMatrix} =
-    LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-
+function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
+    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
+    isnothing(sortby) || sort!(vals, by=sortby)
+    return vals
+end
+function eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
+    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
+    isnothing(sortby) || sort!(vals, by=sortby)
+    return vals
+end
 eigvecs(A::HermOrSym) = eigvecs(eigen(A))
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index cc551e4911acf..ee8e553fd3aaa 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -125,17 +125,13 @@ AbstractMatrix{T}(S::SymTridiagonal) where {T} =
 function Matrix{T}(M::SymTridiagonal) where T
     n = size(M, 1)
     Mf = zeros(T, n, n)
-    if n == 0
-        return Mf
-    end
-    @inbounds begin
-        @simd for i = 1:n-1
-            Mf[i,i] = M.dv[i]
-            Mf[i+1,i] = M.ev[i]
-            Mf[i,i+1] = M.ev[i]
-        end
-        Mf[n,n] = M.dv[n]
+    n == 0 && return Mf
+    @inbounds for i = 1:n-1
+        Mf[i,i] = symmetric(M.dv[i], :U)
+        Mf[i+1,i] = transpose(M.ev[i])
+        Mf[i,i+1] = M.ev[i]
     end
+    Mf[n,n] = symmetric(M.dv[n], :U)
     return Mf
 end
 Matrix(M::SymTridiagonal{T}) where {T} = Matrix{T}(M)
@@ -160,7 +156,7 @@ similar(S::SymTridiagonal, ::Type{T}) where {T} = SymTridiagonal(similar(S.dv, T
 # similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
 
 copyto!(dest::SymTridiagonal, src::SymTridiagonal) =
-    (copyto!(dest.dv, src.dv); copyto!(dest.ev, src.ev); dest)
+    (copyto!(dest.dv, src.dv); copyto!(dest.ev, _evview(src)); dest)
 
 #Elementary operations
 for func in (:conj, :copy, :real, :imag)
@@ -172,19 +168,19 @@ adjoint(S::SymTridiagonal{<:Real}) = S
 adjoint(S::SymTridiagonal) = Adjoint(S)
 Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(adjoint.(x)), (S.parent.dv, S.parent.ev))...)
 
-ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(@view S.ev[begin:length(S.dv) - 1])
+ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(_evview(S))
 issymmetric(S::SymTridiagonal) = true
 
-function diag(M::SymTridiagonal{<:Number}, n::Integer=0)
+function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
     absn = abs(n)
     if absn == 0
         return copyto!(similar(M.dv, length(M.dv)), M.dv)
     elseif absn == 1
-        return copyto!(similar(M.ev, length(M.ev)), M.ev)
+        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
     elseif absn <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-absn), 0)
+        return fill!(similar(M.dv, size(M,1)-absn), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
             "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
@@ -196,9 +192,9 @@ function diag(M::SymTridiagonal, n::Integer=0)
     if n == 0
         return copyto!(similar(M.dv, length(M.dv)), symmetric.(M.dv, :U))
     elseif n == 1
-        return copyto!(similar(M.ev, length(M.ev)), M.ev)
+        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
     elseif n == -1
-        return copyto!(similar(M.ev, length(M.ev)), transpose.(M.ev))
+        return copyto!(similar(M.ev, length(M.dv)-1), transpose.(_evview(M)))
     elseif n <= size(M,1)
         throw(ArgumentError("requested diagonal contains undefined zeros of an array type"))
     else
@@ -207,14 +203,14 @@ function diag(M::SymTridiagonal, n::Integer=0)
     end
 end
 
-+(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv+B.dv, A.ev+B.ev)
--(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv-B.dv, A.ev-B.ev)
++(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv+B.dv, _evview(A)+_evview(B))
+-(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv-B.dv, _evview(A)-_evview(B))
 -(A::SymTridiagonal) = SymTridiagonal(-A.dv, -A.ev)
 *(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv*B, A.ev*B)
 *(B::Number, A::SymTridiagonal) = SymTridiagonal(B*A.dv, B*A.ev)
 /(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv/B, A.ev/B)
 \(B::Number, A::SymTridiagonal) = SymTridiagonal(B\A.dv, B\A.ev)
-==(A::SymTridiagonal, B::SymTridiagonal) = (A.dv==B.dv) && (A.ev==B.ev)
+==(A::SymTridiagonal, B::SymTridiagonal) = (A.dv==B.dv) && (_evview(A)==_evview(B))
 
 @inline mul!(A::StridedVecOrMat, B::SymTridiagonal, C::StridedVecOrMat,
              alpha::Number, beta::Number) =
@@ -359,33 +355,34 @@ function svdvals!(A::SymTridiagonal)
     return sort!(map!(abs, vals, vals); rev=true)
 end
 
-#tril and triu
+# tril and triu
 
 function istriu(M::SymTridiagonal, k::Integer=0)
     if k <= -1
         return true
     elseif k == 0
-        return iszero(M.ev)
+        return iszero(_evview(M))
     else # k >= 1
-        return iszero(M.ev) && iszero(M.dv)
+        return iszero(_evview(M)) && iszero(M.dv)
     end
 end
 istril(M::SymTridiagonal, k::Integer) = istriu(M, -k)
-iszero(M::SymTridiagonal) = iszero(M.ev) && iszero(M.dv)
-isone(M::SymTridiagonal) = iszero(M.ev) && all(isone, M.dv)
-isdiag(M::SymTridiagonal) = iszero(M.ev)
+iszero(M::SymTridiagonal) =  iszero(_evview(M)) && iszero(M.dv)
+isone(M::SymTridiagonal) =  iszero(_evview(M)) && all(isone, M.dv)
+isdiag(M::SymTridiagonal) =  iszero(_evview(M))
+
 
-function tril!(M::SymTridiagonal, k::Integer=0)
+function tril!(M::SymTridiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < -1
-        fill!(M.ev,0)
-        fill!(M.dv,0)
+        fill!(M.ev, zero(T))
+        fill!(M.dv, zero(T))
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     elseif k == -1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
     elseif k == 0
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
@@ -394,17 +391,17 @@ function tril!(M::SymTridiagonal, k::Integer=0)
     end
 end
 
-function triu!(M::SymTridiagonal, k::Integer=0)
+function triu!(M::SymTridiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 1
-        fill!(M.ev,0)
-        fill!(M.dv,0)
+        fill!(M.ev, zero(T))
+        fill!(M.dv, zero(T))
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     elseif k == 1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
     elseif k == 0
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
@@ -447,22 +444,20 @@ end
 det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift)
 logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift))
 
-function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
-    if !(1 <= i <= size(A,2) && 1 <= j <= size(A,2))
-        throw(BoundsError(A, (i,j)))
-    end
+@inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
+    @boundscheck checkbounds(A, i, j)
     if i == j
-        return symmetric(A.dv[i], :U)::symmetric_type(eltype(A.dv))
+        return symmetric((@inbounds A.dv[i]), :U)::symmetric_type(eltype(A.dv))
     elseif i == j + 1
-        return copy(transpose(A.ev[j])) # materialized for type stability
+        return copy(transpose(@inbounds A.ev[j])) # materialized for type stability
     elseif i + 1 == j
-        return A.ev[i]
+        return @inbounds A.ev[i]
     else
         return zero(T)
     end
 end
 
-function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
+@inline function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.dv[i] = x
@@ -613,12 +608,12 @@ transpose(S::Tridiagonal{<:Number}) = Tridiagonal(S.du, S.d, S.dl)
 Base.copy(aS::Adjoint{<:Any,<:Tridiagonal}) = (S = aS.parent; Tridiagonal(map(x -> copy.(adjoint.(x)), (S.du, S.d, S.dl))...))
 Base.copy(tS::Transpose{<:Any,<:Tridiagonal}) = (S = tS.parent; Tridiagonal(map(x -> copy.(transpose.(x)), (S.du, S.d, S.dl))...))
 
-ishermitian(S::Tridiagonal) = isreal(S.d) && S.du == adjoint.(S.dl)
-issymmetric(S::Tridiagonal) = S.du == S.dl
+ishermitian(S::Tridiagonal) = all(ishermitian, S.d) && all(Iterators.map((x, y) -> x == y', S.du, S.dl))
+issymmetric(S::Tridiagonal) = all(issymmetric, S.d) && all(Iterators.map((x, y) -> x == transpose(y), S.du, S.dl))
 
 \(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:StridedVecOrMat}) = copy(A) \ B
 
-function diag(M::Tridiagonal, n::Integer=0)
+function diag(M::Tridiagonal{T}, n::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
     if n == 0
@@ -628,29 +623,27 @@ function diag(M::Tridiagonal, n::Integer=0)
     elseif n == 1
         return copyto!(similar(M.du, length(M.du)), M.du)
     elseif abs(n) <= size(M,1)
-        return fill!(similar(M.d, size(M,1)-abs(n)), 0)
+        return fill!(similar(M.d, size(M,1)-abs(n)), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
             "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
     end
 end
 
-function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
-    if !(1 <= i <= size(A,2) && 1 <= j <= size(A,2))
-        throw(BoundsError(A, (i,j)))
-    end
+@inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
+    @boundscheck checkbounds(A, i, j)
     if i == j
-        return A.d[i]
+        return @inbounds A.d[i]
     elseif i == j + 1
-        return A.dl[j]
+        return @inbounds A.dl[j]
     elseif i + 1 == j
-        return A.du[i]
+        return @inbounds A.du[i]
     else
         return zero(T)
     end
 end
 
-function setindex!(A::Tridiagonal, x, i::Integer, j::Integer)
+@inline function setindex!(A::Tridiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.d[i] = x
@@ -699,38 +692,38 @@ function istril(M::Tridiagonal, k::Integer=0)
 end
 isdiag(M::Tridiagonal) = iszero(M.dl) && iszero(M.du)
 
-function tril!(M::Tridiagonal, k::Integer=0)
+function tril!(M::Tridiagonal{T}, k::Integer=0) where T
     n = length(M.d)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < -1
-        fill!(M.dl,0)
-        fill!(M.d,0)
-        fill!(M.du,0)
+        fill!(M.dl, zero(T))
+        fill!(M.d, zero(T))
+        fill!(M.du, zero(T))
     elseif k == -1
-        fill!(M.d,0)
-        fill!(M.du,0)
+        fill!(M.d, zero(T))
+        fill!(M.du, zero(T))
     elseif k == 0
-        fill!(M.du,0)
+        fill!(M.du, zero(T))
     end
     return M
 end
 
-function triu!(M::Tridiagonal, k::Integer=0)
+function triu!(M::Tridiagonal{T}, k::Integer=0) where T
     n = length(M.d)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 1
-        fill!(M.dl,0)
-        fill!(M.d,0)
-        fill!(M.du,0)
+        fill!(M.dl, zero(T))
+        fill!(M.d, zero(T))
+        fill!(M.du, zero(T))
     elseif k == 1
-        fill!(M.dl,0)
-        fill!(M.d,0)
+        fill!(M.dl, zero(T))
+        fill!(M.d, zero(T))
     elseif k == 0
-        fill!(M.dl,0)
+        fill!(M.dl, zero(T))
     end
     return M
 end
@@ -747,8 +740,12 @@ end
 \(B::Number, A::Tridiagonal) = Tridiagonal(B\A.dl, B\A.d, B\A.du)
 
 ==(A::Tridiagonal, B::Tridiagonal) = (A.dl==B.dl) && (A.d==B.d) && (A.du==B.du)
-==(A::Tridiagonal, B::SymTridiagonal) = (A.dl==A.du==B.ev) && (A.d==B.dv)
-==(A::SymTridiagonal, B::Tridiagonal) = (B.dl==B.du==A.ev) && (B.d==A.dv)
+function ==(A::Tridiagonal, B::SymTridiagonal)
+    iseq = all(Iterators.map((x, y) -> x == transpose(y), A.du, A.dl))
+    iseq = iseq && A.du == _evview(B)
+    iseq && all(Iterators.map((x, y) -> x == symmetric(y, :U), A.d, B.dv))
+end
+==(A::SymTridiagonal, B::Tridiagonal) = B == A
 
 det(A::Tridiagonal) = det_usmani(A.dl, A.d, A.du)
 
@@ -763,7 +760,10 @@ function SymTridiagonal{T}(M::Tridiagonal) where T
 end
 
 Base._sum(A::Tridiagonal, ::Colon) = sum(A.d) + sum(A.dl) + sum(A.du)
-Base._sum(A::SymTridiagonal, ::Colon) = sum(A.dv) + 2sum(A.ev)
+function Base._sum(A::SymTridiagonal, ::Colon)
+    se = sum(_evview(A))
+    symmetric(sum(A.dv), :U) + se + transpose(se)
+end
 
 function Base._sum(A::Tridiagonal, dims::Integer)
     res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
@@ -810,24 +810,24 @@ function Base._sum(A::SymTridiagonal, dims::Integer)
     end
     @inbounds begin
         if dims == 1
-            res[1] = A.ev[1] + A.dv[1]
+            res[1] = transpose(A.ev[1]) + symmetric(A.dv[1], :U)
             for i = 2:n-1
-                res[i] = A.ev[i] + A.dv[i] + A.ev[i-1]
+                res[i] = transpose(A.ev[i]) + symmetric(A.dv[i], :U) + A.ev[i-1]
             end
-            res[n] = A.dv[n] + A.ev[n-1]
+            res[n] = symmetric(A.dv[n], :U) + A.ev[n-1]
         elseif dims == 2
-            res[1] = A.dv[1] + A.ev[1]
+            res[1] = symmetric(A.dv[1], :U) + A.ev[1]
             for i = 2:n-1
-                res[i] = A.ev[i-1] + A.dv[i] + A.ev[i]
+                res[i] = transpose(A.ev[i-1]) + symmetric(A.dv[i], :U) + A.ev[i]
             end
-            res[n] = A.ev[n-1] + A.dv[n]
+            res[n] = transpose(A.ev[n-1]) + symmetric(A.dv[n], :U)
         elseif dims >= 3
             for i = 1:n-1
                 res[i,i+1] = A.ev[i]
-                res[i,i]   = A.dv[i]
-                res[i+1,i] = A.ev[i]
+                res[i,i]   = symmetric(A.dv[i], :U)
+                res[i+1,i] = transpose(A.ev[i])
             end
-            res[n,n] = A.dv[n]
+            res[n,n] = symmetric(A.dv[n], :U)
         end
     end
     res
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index bbcade43c5569..94394c9ba1752 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -88,23 +88,22 @@ ndims(J::UniformScaling) = 2
 Base.has_offset_axes(::UniformScaling) = false
 getindex(J::UniformScaling, i::Integer,j::Integer) = ifelse(i==j,J.λ,zero(J.λ))
 
-getindex(x::UniformScaling, n::Integer, m::AbstractRange{<:Integer}) = getindex(x, m, n)
-function getindex(x::UniformScaling{T}, n::AbstractRange{<:Integer}, m::Integer) where T
-    v = zeros(T, length(n))
-    @inbounds for (i,ii) in enumerate(n)
+getindex(J::UniformScaling, n::Integer, m::AbstractVector{<:Integer}) = getindex(J, m, n)
+function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::Integer) where T
+    v = zeros(T, axes(n))
+    @inbounds for (i,ii) in pairs(n)
         if ii == m
-            v[i] = x.λ
+            v[i] = J.λ
         end
     end
     return v
 end
 
-
-function getindex(x::UniformScaling{T}, n::AbstractRange{<:Integer}, m::AbstractRange{<:Integer}) where T
-    A = zeros(T, length(n), length(m))
-    @inbounds for (j,jj) in enumerate(m), (i,ii) in enumerate(n)
+function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::AbstractVector{<:Integer}) where T
+    A = zeros(T, axes(n)..., axes(m)...)
+    @inbounds for (j,jj) in pairs(m), (i,ii) in pairs(n)
         if ii == jj
-            A[i,j] = x.λ
+            A[i,j] = J.λ
         end
     end
     return A
@@ -404,9 +403,11 @@ promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
     (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
 promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling,Number}}}) = Matrix
 
-for (f,dim,name) in ((:hcat,1,"rows"), (:vcat,2,"cols"))
+for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
     @eval begin
-        function $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...)
+        @inline $f(A::Union{AbstractVecOrMat,UniformScaling}...) = $_f(A...)
+        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $_f(A...)
+        function $_f(A::Union{AbstractVecOrMat,UniformScaling,Number}...)
             n = -1
             for a in A
                 if !isa(a, UniformScaling)
@@ -424,8 +425,9 @@ for (f,dim,name) in ((:hcat,1,"rows"), (:vcat,2,"cols"))
     end
 end
 
-
-function hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...)
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...) = _hvcat(rows, A...)
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...) = _hvcat(rows, A...)
+function _hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...)
     require_one_based_indexing(A...)
     nr = length(rows)
     sum(rows) == length(A) || throw(ArgumentError("mismatch between row sizes and number of arguments"))
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index d9efdc1fd3ee6..7aef50b446c85 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -43,6 +43,7 @@ Random.seed!(1)
             @test ubd.dv === x
             @test lbd.ev === y
             @test_throws ArgumentError Bidiagonal(x, y, :R)
+            @test_throws ArgumentError Bidiagonal(x, y, 'R')
             x == dv0 || @test_throws DimensionMismatch Bidiagonal(x, x, :U)
             @test_throws MethodError Bidiagonal(x, y)
             # from matrix
@@ -269,6 +270,11 @@ Random.seed!(1)
                     end
                 end
             end
+            zdv = Vector{elty}(undef, 0)
+            zev = Vector{elty}(undef, 0)
+            zA  = Bidiagonal(zdv, zev, :U)
+            zb  = Vector{elty}(undef, 0)
+            @test ldiv!(zA, zb) === zb
         end
 
         if elty <: BlasReal
@@ -549,6 +555,14 @@ end
             B = Bidiagonal(dv, ev, uplo)
             @test dot(x, B, y) ≈ dot(B'x, y) ≈ dot(x, Matrix(B), y)
         end
+        dv = Vector{elty}(undef, 0)
+        ev = Vector{elty}(undef, 0)
+        x = Vector{elty}(undef, 0)
+        y = Vector{elty}(undef, 0)
+        for uplo in (:U, :L)
+            B = Bidiagonal(dv, ev, uplo)
+            @test dot(x, B, y) ≈ dot(zero(elty), zero(elty), zero(elty))
+        end
     end
 end
 
@@ -665,4 +679,28 @@ using .Main.ImmutableArrays
     @test convert(AbstractMatrix{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
 end
 
+@testset "block-bidiagonal matrix indexing" begin
+    dv = [ones(4,3), ones(2,2).*2, ones(2,3).*3, ones(4,4).*4]
+    evu = [ones(4,2), ones(2,3).*2, ones(2,4).*3]
+    evl = [ones(2,3), ones(2,2).*2, ones(4,3).*3]
+    BU = Bidiagonal(dv, evu, :U)
+    BL = Bidiagonal(dv, evl, :L)
+    # check that all the matrices along a column have the same number of columns,
+    # and the matrices along a row have the same number of rows
+    for j in axes(BU, 2), i in 2:size(BU, 1)
+        @test size(BU[i,j], 2) == size(BU[1,j], 2)
+        @test size(BU[i,j], 1) == size(BU[i,1], 1)
+        if j < i || j > i + 1
+            @test iszero(BU[i,j])
+        end
+    end
+    for j in axes(BL, 2), i in 2:size(BL, 1)
+        @test size(BL[i,j], 2) == size(BL[1,j], 2)
+        @test size(BL[i,j], 1) == size(BL[i,1], 1)
+        if j < i-1 || j > i
+            @test iszero(BL[i,j])
+        end
+    end
+end
+
 end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
index 911b684b1793b..11e7b89fdb723 100644
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ b/stdlib/LinearAlgebra/test/blas.jl
@@ -314,6 +314,49 @@ Random.seed!(100)
             end
         end
 
+        # spr!
+        if elty in (Float32, Float64)
+            @testset "spr! $elty" begin
+                α = rand(elty)
+                M = rand(elty, n, n)
+                AL = Symmetric(M, :L)
+                AU = Symmetric(M, :U)
+                x = rand(elty, n)
+
+                function pack(A, uplo)
+                    AP = elty[]
+                    for j in 1:n
+                        for i in (uplo==:L ? (j:n) : (1:j))
+                            push!(AP, A[i,j])
+                        end
+                    end
+                    return AP
+                end
+
+                ALP_result_julia_lower = pack(α*x*x' + AL, :L)
+                ALP_result_blas_lower = pack(AL, :L)
+                BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                @test ALP_result_julia_lower ≈ ALP_result_blas_lower
+                ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10))
+                BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10]
+                ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1)
+                BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower)
+
+                AUP_result_julia_upper = pack(α*x*x' + AU, :U)
+                AUP_result_blas_upper = pack(AU, :U)
+                BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                @test AUP_result_julia_upper ≈ AUP_result_blas_upper
+                AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10))
+                BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10]
+                AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1)
+                BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper)
+            end
+        end
+
         #trsm
         A = triu(rand(elty,n,n))
         B = rand(elty,(n,n))
@@ -370,6 +413,41 @@ Random.seed!(100)
         @test all(o4cp .== z4)
         @test all(BLAS.gemv('N', U4, o4) .== v41)
         @test all(BLAS.gemv('N', U4, o4) .== v41)
+        @testset "non-standard strides" begin
+            if elty <: Complex
+                A = elty[1+2im 3+4im 5+6im 7+8im; 2+3im 4+5im 6+7im 8+9im; 3+4im 5+6im 7+8im 9+10im]
+                v = elty[1+2im, 2+3im, 3+4im, 4+5im, 5+6im]
+                dest = view(ones(elty, 7), 6:-2:2)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(v, 1:3:4), elty(3), dest) == elty[-31+154im, -35+178im, -39+202im]
+                @test BLAS.gemv('N', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[15-41im, 17-49im]
+                @test BLAS.gemv('N', view(A, 1:0, 1:2), view(v, 1:2)) == elty[]
+                dest = view(ones(elty, 5), 4:-2:2)
+                @test BLAS.gemv!('T', elty(2), view(A, :, 2:2:4), view(v, 1:2:5), elty(3), dest) == elty[-45+202im, -69+370im]
+                @test BLAS.gemv('T', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[14-38im, 18-54im]
+                @test BLAS.gemv('T', view(A, 2:3, 2:1), view(v, 1:2)) == elty[]
+                dest = view(ones(elty, 5), 4:-2:2)
+                @test BLAS.gemv!('C', elty(2), view(A, :, 2:2:4), view(v, 5:-2:1), elty(3), dest) == elty[179+6im, 347+30im]
+                @test BLAS.gemv('C', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[-40-6im, -56-10im]
+                @test BLAS.gemv('C', view(A, 2:3, 2:1), view(v, 1:2)) == elty[]
+            else
+                A = elty[1 2 3 4; 5 6 7 8; 9 10 11 12]
+                v = elty[1, 2, 3, 4, 5]
+                dest = view(ones(elty, 7), 6:-2:2)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(v, 1:3:4), elty(3), dest) == elty[39, 79, 119]
+                @test BLAS.gemv('N', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[-19, -31]
+                @test BLAS.gemv('N', view(A, 1:0, 1:2), view(v, 1:2)) == elty[]
+                for trans = ('T', 'C')
+                    dest = view(ones(elty, 5), 4:-2:2)
+                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(v, 1:2:5), elty(3), dest) == elty[143, 179]
+                    @test BLAS.gemv(trans, elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[-22, -25]
+                    @test BLAS.gemv(trans, view(A, 2:3, 2:1), view(v, 1:2)) == elty[]
+                end
+            end
+            for trans = ('N', 'T', 'C')
+                @test_throws ErrorException BLAS.gemv(trans, view(A, 1:2:3, 1:2), view(v, 1:2))
+                @test_throws ErrorException BLAS.gemv(trans, view(A, 1:2, 2:-1:1), view(v, 1:2))
+            end
+        end
     end
     @testset "gemm" begin
         @test all(BLAS.gemm('N', 'N', I4, I4) .== I4)
@@ -459,6 +537,7 @@ Base.setindex!(A::WrappedArray{T, N}, v, I::Vararg{Int, N}) where {T, N} = setin
 Base.unsafe_convert(::Type{Ptr{T}}, A::WrappedArray{T}) where T = Base.unsafe_convert(Ptr{T}, A.A)
 
 Base.strides(A::WrappedArray) = strides(A.A)
+Base.elsize(::Type{WrappedArray{T,N}}) where {T,N} = Base.elsize(Array{T,N})
 
 @testset "strided interface adjtrans" begin
     x = WrappedArray([1, 2, 3, 4])
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index c8a566953b6d2..8a83ba768f2f6 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -12,6 +12,10 @@ function unary_ops_tests(a, ca, tol; n=size(a, 1))
     @test abs((det(ca) - det(a))/det(ca)) <= tol # Ad hoc, but statistically verified, revisit
     @test logdet(ca) ≈ logdet(a)
     @test logdet(ca) ≈ log(det(ca))  # logdet is less likely to overflow
+    logabsdet_ca = logabsdet(ca)
+    logabsdet_a = logabsdet(a)
+    @test logabsdet_ca[1] ≈ logabsdet_a[1]
+    @test logabsdet_ca[2] ≈ logabsdet_a[2]
     @test isposdef(ca)
     @test_throws ErrorException ca.Z
     @test size(ca) == size(a)
@@ -507,6 +511,7 @@ end
     @test det(B)  ==  0.0
     @test det(B)  ≈  det(A) atol=eps()
     @test logdet(B)  ==  -Inf
+    @test logabsdet(B)[1] == -Inf
  end
 
 end # module TestCholesky
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
index 57cb06786e994..f03bf4a953ac6 100644
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ b/stdlib/LinearAlgebra/test/dense.jl
@@ -18,27 +18,39 @@ n2 = 2*n1
 Random.seed!(1234323)
 
 @testset "Matrix condition number" begin
-    ainit = rand(n,n)
+    ainit = rand(n, n)
     @testset "for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
         ainit = convert(Matrix{elty}, ainit)
         for a in (copy(ainit), view(ainit, 1:n, 1:n))
-            @test cond(a,1) ≈ 50.60863783272028 atol=0.5
-            @test cond(a,2) ≈ 23.059634761613314 atol=0.5
-            @test cond(a,Inf) ≈ 45.12503933120795 atol=0.4
-            @test cond(a[:,1:5]) ≈ 5.719500544258695 atol=0.01
+            ainv = inv(a)
+            @test cond(a, 1)   == opnorm(a, 1)  *opnorm(ainv, 1)
+            @test cond(a, Inf) == opnorm(a, Inf)*opnorm(ainv, Inf)
+            @test cond(a[:, 1:5]) == (\)(extrema(svdvals(a[:, 1:5]))...)
             @test_throws ArgumentError cond(a,3)
         end
     end
     @testset "Singular matrices" for p in (1, 2, Inf)
         @test cond(zeros(Int, 2, 2), p) == Inf
-        @test cond(zeros(2, 2), p) == Inf
-        @test cond([0 0; 1 1], p) == Inf
-        @test cond([0. 0.; 1. 1.], p) == Inf
+        @test cond(zeros(2, 2), p)      == Inf
+        @test cond([0 0; 1 1], p)       == Inf
+        @test cond([0. 0.; 1. 1.], p)   == Inf
     end
     @testset "Issue #33547, condition number of 2x2 matrix" begin
-        M = [1.0 -2.0; -2.0 -1.5]
+        M = [1.0 -2.0
+            -2.0 -1.5]
         @test cond(M, 1) ≈ 2.227272727272727
     end
+    @testset "Condition numbers of a non-random matrix" begin
+        # To ensure that we detect any regressions in the underlying functions
+        Mars= [11  24   7  20   3
+                4  12  25   8  16
+               17   5  13  21   9
+               10  18   1  14  22
+               23   6  19   2  15]
+        @test cond(Mars, 1)   ≈ 7.1
+        @test cond(Mars, 2)   ≈ 6.181867355918493
+        @test cond(Mars, Inf) ≈ 7.1
+    end
 end
 
 areal = randn(n,n)/2
@@ -105,35 +117,6 @@ bimg  = randn(n,2)/2
         end
     end # for eltyb
 
-@testset "Test diagm for vectors" begin
-    @test diagm(zeros(50)) == diagm(0 => zeros(50))
-    @test diagm(ones(50)) == diagm(0 => ones(50))
-    v = randn(500)
-    @test diagm(v) == diagm(0 => v)
-    @test diagm(500, 501, v) == diagm(500, 501, 0 => v)
-end
-
-@testset "Non-square diagm" begin
-    x = [7, 8]
-    for m=1:4, n=2:4
-        if m < 2 || n < 3
-            @test_throws DimensionMismatch diagm(m,n, 0 => x,  1 => x)
-            @test_throws DimensionMismatch diagm(n,m, 0 => x,  -1 => x)
-        else
-            M = zeros(m,n)
-            M[1:2,1:3] = [7 7 0; 0 8 8]
-            @test diagm(m,n, 0 => x,  1 => x) == M
-            @test diagm(n,m, 0 => x,  -1 => x) == M'
-        end
-    end
-end
-
-@testset "Test pinv (rtol, atol)" begin
-    M = [1 0 0; 0 1 0; 0 0 0]
-    @test pinv(M,atol=1)== zeros(3,3)
-    @test pinv(M,rtol=0.5)== M
-end
-
     for (a, a2) in ((copy(ainit), copy(ainit2)), (view(ainit, 1:n, 1:n), view(ainit2, 1:n, 1:n)))
         @testset "Test pinv" begin
             pinva15 = pinv(a[:,1:n1])
@@ -208,9 +191,41 @@ end
         @test Matrix(factorize(A)) ≈ Matrix(factorize(Tridiagonal(e2,d,e)))
         A = diagm(0 => d, 1 => e, 2 => f)
         @test factorize(A) == UpperTriangular(A)
+
+        x = rand(eltya)
+        @test factorize(x) == x
     end
 end # for eltya
 
+@testset "Test diagm for vectors" begin
+    @test diagm(zeros(50)) == diagm(0 => zeros(50))
+    @test diagm(ones(50)) == diagm(0 => ones(50))
+    v = randn(500)
+    @test diagm(v) == diagm(0 => v)
+    @test diagm(500, 501, v) == diagm(500, 501, 0 => v)
+end
+
+@testset "Non-square diagm" begin
+    x = [7, 8]
+    for m=1:4, n=2:4
+        if m < 2 || n < 3
+            @test_throws DimensionMismatch diagm(m,n, 0 => x,  1 => x)
+            @test_throws DimensionMismatch diagm(n,m, 0 => x,  -1 => x)
+        else
+            M = zeros(m,n)
+            M[1:2,1:3] = [7 7 0; 0 8 8]
+            @test diagm(m,n, 0 => x,  1 => x) == M
+            @test diagm(n,m, 0 => x,  -1 => x) == M'
+        end
+    end
+end
+
+@testset "Test pinv (rtol, atol)" begin
+    M = [1 0 0; 0 1 0; 0 0 0]
+    @test pinv(M,atol=1)== zeros(3,3)
+    @test pinv(M,rtol=0.5)== M
+end
+
 @testset "test out of bounds triu/tril" begin
     local m, n = 5, 7
     ainit = rand(m, n)
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index d782fd358bad5..42355c2b20576 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -3,7 +3,7 @@
 module TestDiagonal
 
 using Test, LinearAlgebra, SparseArrays, Random
-using LinearAlgebra: mul!, mul!, rmul!, lmul!, ldiv!, rdiv!, BlasFloat, BlasComplex, SingularException
+using LinearAlgebra: BlasFloat, BlasComplex
 
 n=12 #Size of matrix problem to test
 Random.seed!(1)
@@ -578,6 +578,41 @@ let D1 = Diagonal(rand(5)), D2 = Diagonal(rand(5))
     @test LinearAlgebra.lmul!(adjoint(D1),copy(D2)) == adjoint(D1)*D2
 end
 
+@testset "multiplication of a Diagonal with a Matrix" begin
+    A = collect(reshape(1:8, 4, 2));
+    B = BigFloat.(A);
+    DL = Diagonal(collect(axes(A, 1)));
+    DR = Diagonal(Float16.(collect(axes(A, 2))));
+
+    @test DL * A == collect(DL) * A
+    @test A * DR == A * collect(DR)
+    @test DL * B == collect(DL) * B
+    @test B * DR == B * collect(DR)
+
+    A = reshape([ones(2,2), ones(2,2)*2, ones(2,2)*3, ones(2,2)*4], 2, 2)
+    Ac = collect(A)
+    D = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
+    Dc = collect(D)
+    @test A * D == Ac * Dc
+    @test D * A == Dc * Ac
+    @test D * D == Dc * Dc
+
+    AS = similar(A)
+    mul!(AS, A, D, true, false)
+    @test AS == A * D
+
+    D2 = similar(D)
+    mul!(D2, D, D)
+    @test D2 == D * D
+
+    D2[diagind(D2)] .= D[diagind(D)]
+    lmul!(D, D2)
+    @test D2 == D * D
+    D2[diagind(D2)] .= D[diagind(D)]
+    rmul!(D2, D)
+    @test D2 == D * D
+end
+
 @testset "multiplication of QR Q-factor and Diagonal (#16615 spot test)" begin
     D = Diagonal(randn(5))
     Q = qr(randn(5, 5)).Q
@@ -686,12 +721,35 @@ end
     xt = transpose(x)
     A = reshape([[1 2; 3 4], zeros(Int,2,2), zeros(Int, 2, 2), [5 6; 7 8]], 2, 2)
     D = Diagonal(A)
-    @test x'*D == x'*A == copy(x')*D == copy(x')*A
-    @test xt*D == xt*A == copy(xt)*D == copy(xt)*A
+    @test x'*D == x'*A == collect(x')*D == collect(x')*A
+    @test xt*D == xt*A == collect(xt)*D == collect(xt)*A
+    outadjxD = similar(x'*D); outtrxD = similar(xt*D);
+    mul!(outadjxD, x', D)
+    @test outadjxD == x'*D
+    mul!(outtrxD, xt, D)
+    @test outtrxD == xt*D
+
+    D1 = Diagonal([[1 2; 3 4]])
+    @test D1 * x' == D1 * collect(x') == collect(D1) * collect(x')
+    @test D1 * xt == D1 * collect(xt) == collect(D1) * collect(xt)
+    outD1adjx = similar(D1 * x'); outD1trx = similar(D1 * xt);
+    mul!(outadjxD, D1, x')
+    @test outadjxD == D1*x'
+    mul!(outtrxD, D1, xt)
+    @test outtrxD == D1*xt
+
     y = [x, x]
     yt = transpose(y)
     @test y'*D*y == (y'*D)*y == (y'*A)*y
     @test yt*D*y == (yt*D)*y == (yt*A)*y
+    outadjyD = similar(y'*D); outtryD = similar(yt*D);
+    outadjyD2 = similar(collect(y'*D)); outtryD2 = similar(collect(yt*D));
+    mul!(outadjyD, y', D)
+    mul!(outadjyD2, y', D)
+    @test outadjyD == outadjyD2 == y'*D
+    mul!(outtryD, yt, D)
+    mul!(outtryD2, yt, D)
+    @test outtryD == outtryD2 == yt*D
 end
 
 @testset "Multiplication of single element Diagonal (#36746, #40726)" begin
@@ -718,6 +776,59 @@ end
     end
 end
 
+@testset "(Sym)Tridiagonal division by Diagonal" begin
+    for K in (5, 1), elty in (Float64, ComplexF32), overlength in (1, 0)
+        S = SymTridiagonal(randn(elty, K), randn(elty, K-overlength))
+        T = Tridiagonal(randn(elty, K-1), randn(elty, K), randn(elty, K-1))
+        D = Diagonal(randn(elty, K))
+        D0 = Diagonal(zeros(elty, K))
+        @test (D \ S)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(S))
+        @test (D \ T)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(T))
+        @test (S / D)::Tridiagonal{elty} == Tridiagonal(Matrix(S) / Matrix(D))
+        @test (T / D)::Tridiagonal{elty} == Tridiagonal(Matrix(T) / Matrix(D))
+        @test_throws SingularException D0 \ S
+        @test_throws SingularException D0 \ T
+        @test_throws SingularException S / D0
+        @test_throws SingularException T / D0
+    end
+    # 0-length case
+    S = SymTridiagonal(Float64[], Float64[])
+    T = Tridiagonal(Float64[], Float64[], Float64[])
+    D = Diagonal(Float64[])
+    @test (D \ S)::Tridiagonal{Float64} == T
+    @test (D \ T)::Tridiagonal{Float64} == T
+    @test (S / D)::Tridiagonal{Float64} == T
+    @test (T / D)::Tridiagonal{Float64} == T
+    # matrix eltype case
+    K = 5
+    for elty in (Float64, ComplexF32), overlength in (1, 0)
+        S = SymTridiagonal([rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-overlength])
+        T = Tridiagonal([rand(elty, 2, 2) for _ in 1:K-1], [rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-1])
+        D = Diagonal(randn(elty, K))
+        SM = fill(zeros(elty, 2, 2), K, K)
+        TM = copy(SM)
+        SM[1,1] = S[1,1]; TM[1,1] = T[1,1]
+        for j in 2:K
+            SM[j,j-1] = S[j,j-1]; SM[j,j] = S[j,j]; SM[j-1,j] = S[j-1,j]
+            TM[j,j-1] = T[j,j-1]; TM[j,j] = T[j,j]; TM[j-1,j] = T[j-1,j]
+        end
+        for (M, Mm) in ((S, SM), (T, TM))
+            DS = D \ M
+            @test DS isa Tridiagonal
+            DM = D \ Mm
+            for i in -1:1; @test diag(DS, i) ≈ diag(DM, i) end
+        end
+    end
+    # eltype promotion case
+    S = SymTridiagonal(rand(-20:20, K), rand(-20:20, K-1))
+    T = Tridiagonal(rand(-20:20, K-1), rand(-20:20, K), rand(-20:20, K-1))
+    D = Diagonal(rand(1:20, K))
+    @test (D \ S)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(S))
+    @test (D \ T)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(T))
+    @test (S / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(S) / Matrix(D))
+    @test (T / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(T) / Matrix(D))
+end
+
 @testset "eigenvalue sorting" begin
     D = Diagonal([0.4, 0.2, -1.3])
     @test eigvals(D) == eigen(D).values == [0.4, 0.2, -1.3] # not sorted by default
@@ -826,4 +937,79 @@ end
     @test \(x, B) == /(B, x)
 end
 
+@testset "promotion" begin
+    for (v1, v2) in (([true], [1]), ([zeros(2,2)], [zeros(Int, 2,2)]))
+        T = promote_type(eltype(v1), eltype(v2))
+        V = promote_type(typeof(v1), typeof(v2))
+        d1 = Diagonal(v1)
+        d2 = Diagonal(v2)
+        v = [d1, d2]
+        @test (@inferred eltype(v)) == Diagonal{T, V}
+    end
+    # test for a type for which promote_type doesn't lead to a concrete eltype
+    struct MyArrayWrapper{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
+       a :: A
+    end
+    Base.size(M::MyArrayWrapper) = size(M.a)
+    Base.axes(M::MyArrayWrapper) = axes(M.a)
+    Base.length(M::MyArrayWrapper) = length(M.a)
+    Base.getindex(M::MyArrayWrapper, i::Int...) = M.a[i...]
+    Base.setindex!(M::MyArrayWrapper, v, i::Int...) = M.a[i...] = v
+    d1 = Diagonal(MyArrayWrapper(1:3))
+    d2 = Diagonal(MyArrayWrapper(1.0:3.0))
+    c = [d1, d2]
+    @test c[1] == d1
+    @test c[2] == d2
+end
+
+@testset "zero and one" begin
+    D1 = Diagonal(rand(3))
+    @test D1 + zero(D1) == D1
+    @test D1 * one(D1) == D1
+    @test D1 * oneunit(D1) == D1
+    @test oneunit(D1) isa typeof(D1)
+    D2 = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
+    @test D2 + zero(D2) == D2
+    @test D2 * one(D2) == D2
+    @test D2 * oneunit(D2) == D2
+    @test oneunit(D2) isa typeof(D2)
+    D3 = Diagonal([D2, D2]);
+    @test D3 + zero(D3) == D3
+    @test D3 * one(D3) == D3
+    @test D3 * oneunit(D3) == D3
+    @test oneunit(D3) isa typeof(D3)
+end
+
+@testset "AbstractTriangular" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular))
+    A = randn(4, 4)
+    TriA = Tri(A)
+    UTriA = UTri(A)
+    D = Diagonal(1.0:4.0)
+    DM = Matrix(D)
+    DMF = factorize(DM)
+    outTri = similar(TriA)
+    out = similar(A)
+    # 2 args
+    for fun in (*, rmul!, rdiv!, /)
+        @test fun(copy(TriA), D)::Tri == fun(Matrix(TriA), D)
+        @test fun(copy(UTriA), D)::Tri == fun(Matrix(UTriA), D)
+    end
+    for fun in (*, lmul!, ldiv!, \)
+        @test fun(D, copy(TriA))::Tri == fun(D, Matrix(TriA))
+        @test fun(D, copy(UTriA))::Tri == fun(D, Matrix(UTriA))
+    end
+    # 3 args
+    @test outTri === ldiv!(outTri, D, TriA)::Tri == ldiv!(out, D, Matrix(TriA))
+    @test outTri === ldiv!(outTri, D, UTriA)::Tri == ldiv!(out, D, Matrix(UTriA))
+    @test outTri === mul!(outTri, D, TriA)::Tri == mul!(out, D, Matrix(TriA))
+    @test outTri === mul!(outTri, D, UTriA)::Tri == mul!(out, D, Matrix(UTriA))
+    @test outTri === mul!(outTri, TriA, D)::Tri == mul!(out, Matrix(TriA), D)
+    @test outTri === mul!(outTri, UTriA, D)::Tri == mul!(out, Matrix(UTriA), D)
+    # 5 args
+    @test outTri === mul!(outTri, D, TriA, 2, 1)::Tri == mul!(out, D, Matrix(TriA), 2, 1)
+    @test outTri === mul!(outTri, D, UTriA, 2, 1)::Tri == mul!(out, D, Matrix(UTriA), 2, 1)
+    @test outTri === mul!(outTri, TriA, D, 2, 1)::Tri == mul!(out, Matrix(TriA), D, 2, 1)
+    @test outTri === mul!(outTri, UTriA, D, 2, 1)::Tri == mul!(out, Matrix(UTriA), D, 2, 1)
+end
+
 end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
index 88a8048b52f31..4ee1845ecc385 100644
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ b/stdlib/LinearAlgebra/test/eigen.jl
@@ -163,6 +163,7 @@ end
 end
 
 @testset "eigen of an Adjoint" begin
+    Random.seed!(4)
     A = randn(3,3)
     @test eigvals(A') == eigvals(copy(A'))
     @test eigen(A')   == eigen(copy(A'))
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
index 8431126c91d58..170542360e4ff 100644
--- a/stdlib/LinearAlgebra/test/factorization.jl
+++ b/stdlib/LinearAlgebra/test/factorization.jl
@@ -42,6 +42,27 @@ using Test, LinearAlgebra
     @test hash(F) == hash(G)
 end
 
+@testset "size for factorizations - $f" for f in Any[
+    bunchkaufman,
+    cholesky,
+    x -> cholesky(x, Val(true)),
+    hessenberg,
+    lq,
+    lu,
+    qr,
+    x -> qr(x, ColumnNorm()),
+    svd,
+]
+    A = randn(3, 3)
+    A = A * A' # ensure A is pos. def. and symmetric
+    F = f(A)
+    tF = Transpose(F)
+    aF = Adjoint(F)
+    @test size(F) == size(A)
+    @test size(tF) == size(Transpose(A))
+    @test size(aF) == size(Adjoint(A))
+end
+
 @testset "equality of QRCompactWY" begin
     A = rand(100, 100)
     F, G = qr(A), qr(A)
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index 489b96be56019..26534a2cdf0cd 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -70,6 +70,11 @@ n = 5 # should be odd
         else
             @test logabsdet(A)[2] ≈ sign(det(A))
         end
+        # logabsdet for Number"
+        x = A[1, 1] # getting a number of type elty
+        X = fill(x, 1, 1)
+        @test logabsdet(x)[1] ≈ logabsdet(X)[1]
+        @test logabsdet(x)[2] ≈ logabsdet(X)[2]
     end
 end
 
@@ -256,6 +261,18 @@ end
     @test_throws DimensionMismatch reflect!([x; x], y, c, s)
 end
 
+@testset "LinearAlgebra.reflectorApply!" begin
+    for T in (Float64, ComplexF64)
+        x = rand(T, 6)
+        τ = rand(T)
+        A = rand(T, 6)
+        B = LinearAlgebra.reflectorApply!(x, τ, copy(A))
+        C = LinearAlgebra.reflectorApply!(x, τ, reshape(copy(A), (length(A), 1)))
+        @test B[1] ≈ C[1] ≈ A[1] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))
+        @test B[2:end] ≈ C[2:end] ≈ A[2:end] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))*x[2:end]
+    end
+end
+
 @testset "LinearAlgebra.axp(b)y! for element type without commutative multiplication" begin
     α = [1 2; 3 4]
     β = [5 6; 7 8]
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index 65dc029060596..9b623273666c2 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -90,14 +90,12 @@ let n = 10
                 @testset "Multiplication/division" begin
                     for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
                              UpperTriangular(A), UnitUpperTriangular(A))
-                        @test H*x == Array(H)*x broken = eltype(H) <: Furlong && x isa Bidiagonal
-                        @test x*H == x*Array(H) broken = eltype(H) <: Furlong && x isa Bidiagonal
-                        @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal, UpperTriangular}
-                        @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal, UpperTriangular}
-                        @test H*x isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Bidiagonal
-                        @test x*H isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Bidiagonal
-                        @test H/x isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal}
-                        @test x\H isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal}
+                        @test (H*x)::UpperHessenberg == Array(H)*x broken = eltype(H) <: Furlong && x isa Bidiagonal
+                        @test (x*H)::UpperHessenberg == x*Array(H) broken = eltype(H) <: Furlong && x isa Bidiagonal
+                        @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, UpperTriangular}
+                        @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, UpperTriangular}
+                        @test H/x isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Bidiagonal
+                        @test x\H isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Bidiagonal
                     end
                     x = Bidiagonal(d, dl, :L)
                     @test H*x == Array(H)*x
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
index 6453f0f521092..284b512d93a18 100644
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ b/stdlib/LinearAlgebra/test/lapack.jl
@@ -705,4 +705,10 @@ end
 # # https://github.com/JuliaLang/julia/pull/39845
 @test LinearAlgebra.LAPACK.liblapack == "libblastrampoline"
 
+# Issue #42762 https://github.com/JuliaLang/julia/issues/42762
+# Tests geqrf! and gerqf! with null column dimensions
+a = zeros(2,0), zeros(0)
+@test LinearAlgebra.LAPACK.geqrf!(a...) === a
+@test LinearAlgebra.LAPACK.gerqf!(a...) === a
+
 end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
index 0dffe7fa1738f..cc3f1be2d1627 100644
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ b/stdlib/LinearAlgebra/test/lu.jl
@@ -175,7 +175,10 @@ dimg  = randn(n)/2
                         end
                     end
                     if eltya <: Complex
-                        @test norm((lud'\bb) - Array(d')\bb, 1) < ε*κd*n*2 # Two because the right hand side has two columns
+                        dummy_factor = 2.5
+                        # TODO: Remove dummy_factor, this test started failing when the RNG stream changed
+                        # so the factor was added.
+                        @test norm((lud'\bb) - Array(d')\bb, 1) < ε*κd*n*2*dummy_factor # Two because the right hand side has two columns
                     end
                 end
             end
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index 1febdfe49fb3b..1017134f2f6d4 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -152,7 +152,7 @@ end
     for vf in (copy(vvf), view(vvf, 1:3)), C in (copy(CC), view(CC, 1:3, 1:3))
         @test mul!(C, vf, transpose(vf)) == vf*vf'
         C .= C0 = rand(eltype(C), size(C))
-        @test mul!(C, vf, transpose(vf), 2, 3) == 2vf*vf' .+ 3C0
+        @test mul!(C, vf, transpose(vf), 2, 3) ≈ 2vf*vf' .+ 3C0
     end
 end
 
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
index d6085565e3c7f..9360d487b6406 100644
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ b/stdlib/LinearAlgebra/test/qr.jl
@@ -415,4 +415,39 @@ end
     @test A.Q' * B ≈ A.Q
 end
 
+@testset "convert between eltypes" begin
+    a = rand(Float64, 10, 5)
+    qra = qr(a)
+    qrwy = LinearAlgebra.QRCompactWY{Float32}(qra.factors, qra.T)
+    @test Array(qrwy) ≈ Array(qr(Float32.(a)))
+    @test eltype(qrwy.factors) == eltype(qrwy.T) == Float32
+    qra = qr(a, ColumnNorm())
+    qrp = QRPivoted{Float32}(qra.factors, qra.τ, qra.jpvt)
+    @test Array(qrp) ≈ Array(qr(Float32.(a), ColumnNorm()))
+    @test eltype(qrp.factors) == eltype(qrp.τ) == Float32
+    a = rand(Float16, 10, 5)
+    qra = qr(a)
+    qrnonblas = QR{ComplexF16}(qra.factors, qra.τ)
+    @test Array(qrnonblas) ≈ Array(qr(ComplexF16.(a)))
+    @test eltype(qrnonblas.factors) == eltype(qrnonblas.τ) == ComplexF16
+end
+
+@testset "optimized getindex for an AbstractQ" begin
+    for T in [Float64, ComplexF64]
+        Q = qr(rand(T, 4, 4))
+        Q2 = Q.Q
+        M = Matrix(Q2)
+        for j in axes(M, 2)
+            @test Q2[:, j] == M[:, j]
+            for i in axes(M, 1)
+                @test Q2[i, :] == M[i, :]
+                @test Q2[i, j] == M[i, j]
+            end
+        end
+        @test Q2[:] == M[:]
+        @test Q2[:, :] == M[:, :]
+        @test Q2[:, :, :] == M[:, :, :]
+    end
+end
+
 end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
index bf4c8dee58977..e0c5f87111b07 100644
--- a/stdlib/LinearAlgebra/test/special.jl
+++ b/stdlib/LinearAlgebra/test/special.jl
@@ -450,4 +450,27 @@ end
     @test A*Sym ≈ A*Matrix(Sym)
 end
 
+@testset "Ops on SymTridiagonal ev has the same length as dv" begin
+    x = rand(3)
+    y = rand(3)
+    z = rand(2)
+
+    S = SymTridiagonal(x, y)
+    T = Tridiagonal(z, x, z)
+    Bu = Bidiagonal(x, z, :U)
+    Bl = Bidiagonal(x, z, :L)
+
+    Ms = Matrix(S)
+    Mt = Matrix(T)
+    Mbu = Matrix(Bu)
+    Mbl = Matrix(Bl)
+
+    @test S + T ≈ Ms + Mt
+    @test T + S ≈ Mt + Ms
+    @test S + Bu ≈ Ms + Mbu
+    @test Bu + S ≈ Mbu + Ms
+    @test S + Bl ≈ Ms + Mbl
+    @test Bl + S ≈ Mbl + Ms
+end
+
 end # module TestSpecial
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index f20b6fe2acc97..169dfb0071718 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -60,6 +60,10 @@ end
                 @test Hermitian(Hermitian(aherm, :U), :U) === Hermitian(aherm, :U)
                 @test_throws ArgumentError Symmetric(Symmetric(asym, :U), :L)
                 @test_throws ArgumentError Hermitian(Hermitian(aherm, :U), :L)
+
+                @test_throws ArgumentError Symmetric(asym, :R)
+                @test_throws ArgumentError Hermitian(asym, :R)
+
                 # mixed cases with Hermitian/Symmetric
                 if eltya <: Real
                     @test Symmetric(Hermitian(aherm, :U))     === Symmetric(aherm, :U)
@@ -268,6 +272,7 @@ end
                         @test abs.(eigen(Symmetric(asym), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                         @test abs.(eigen(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                         @test eigvals(Symmetric(asym), 1:2) ≈ d[1:2]
+                        @test eigvals(Symmetric(asym), sortby= x -> -x) ≈ eigvals(eigen(Symmetric(asym), sortby = x -> -x))
                         @test eigvals(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
                         # eigen doesn't support Symmetric{Complex}
                         @test Matrix(eigen(asym)) ≈ asym
@@ -281,6 +286,7 @@ end
                     @test abs.(eigen(Hermitian(aherm), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                     @test abs.(eigen(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                     @test eigvals(Hermitian(aherm), 1:2) ≈ d[1:2]
+                    @test eigvals(Hermitian(aherm), sortby= x -> -x) ≈ eigvals(eigen(Hermitian(aherm), sortby = x -> -x))
                     @test eigvals(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
                     @test Matrix(eigen(aherm)) ≈ aherm
                     @test eigvecs(Hermitian(aherm)) ≈ eigvecs(aherm)
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index 31e107ddc0e3c..eeb4c615c12ad 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -164,6 +164,19 @@ end
         @test !isdiag(Tridiagonal(dl,d,zerosdu))
         @test !isdiag(Tridiagonal(zerosdl,d,du))
         @test !isdiag(Tridiagonal(dl,d,du))
+
+        # Test methods that could fail due to dv and ev having the same length
+        # see #41089
+
+        badev = zero(d)
+        badev[end] = 1
+        S = SymTridiagonal(d, badev)
+
+        @test istriu(S, -2)
+        @test istriu(S, 0)
+        @test !istriu(S, 2)
+
+        @test isdiag(S)
     end
 
     @testset "iszero and isone" begin
@@ -190,6 +203,12 @@ end
         @test isone(Sone)
         @test !iszero(Smix)
         @test !isone(Smix)
+
+        badev = zeros(elty, 3)
+        badev[end] = 1
+
+        @test isone(SymTridiagonal(ones(elty, 3), badev))
+        @test iszero(SymTridiagonal(zeros(elty, 3), badev))
     end
 
     @testset for mat_type in (Tridiagonal, SymTridiagonal)
@@ -461,7 +480,7 @@ end
     F = lu(Tridiagonal(sparse(1.0I, 3, 3)))
     @test F.L == Matrix(I, 3, 3)
     @test startswith(sprint(show, MIME("text/plain"), F),
-          "LinearAlgebra.LU{Float64, LinearAlgebra.Tridiagonal{Float64, SparseArrays.SparseVector")
+          "$(LinearAlgebra.LU){Float64, $(LinearAlgebra.Tridiagonal){Float64, SparseArrays.SparseVector")
 end
 
 @testset "Issue 29630" begin
@@ -676,4 +695,35 @@ end
     end
 end
 
+isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
+using .Main.SizedArrays
+@testset "non-number eltype" begin
+    @testset "sum for SymTridiagonal" begin
+        dv = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
+        ev = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
+        S = SymTridiagonal(dv, ev)
+        Sdense = Matrix(S)
+        @test Sdense == collect(S)
+        @test sum(S) == sum(Sdense)
+        @test sum(S, dims = 1) == sum(Sdense, dims = 1)
+        @test sum(S, dims = 2) == sum(Sdense, dims = 2)
+    end
+    @testset "issymmetric/ishermitian for Tridiagonal" begin
+        @test !issymmetric(Tridiagonal([[1 2;3 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
+        @test !issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;3 4], [1 2;3 4]], [[1 2;3 4]]))
+        @test issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
+
+        @test ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
+        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].+im]))
+        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
+    end
+    @testset "== between Tridiagonal and SymTridiagonal" begin
+        dv = [SizedArray{(2,2)}([1 2;3 4]) for i in 1:4]
+        ev = [SizedArray{(2,2)}([3 4;1 2]) for i in 1:4]
+        S = SymTridiagonal(dv, ev)
+        Sdense = Matrix(S)
+        @test S == Tridiagonal(diag(Sdense, -1), diag(Sdense),  diag(Sdense, 1)) == S
+        @test S !== Tridiagonal(diag(Sdense, 1), diag(Sdense),  diag(Sdense, 1)) !== S
+    end
+end
 end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
index edbb1789a366b..2080ace77df88 100644
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/test/uniformscaling.jl
@@ -47,8 +47,19 @@ end
         (2:3, 1:2),
         (2:-1:1, 1:2),
         (1:2:9, 5:2:13),
+        (1, [1,2,5]),
+        (1, [1,10,5,2]),
+        (10, [10]),
+        ([1], 1),
+        ([15,1,5,2], 6),
+        ([2], [2]),
+        ([2,9,8,2,1], [2,8,4,3,1]),
+        ([8,3,5,3], 2:9),
     ]
         @test I[a,b] == J[a,b]
+        ndims(a) == 1 && @test I[OffsetArray(a,-10),b] == J[OffsetArray(a,-10),b]
+        ndims(b) == 1 && @test I[a,OffsetArray(b,-9)] == J[a,OffsetArray(b,-9)]
+        ndims(a) == ndims(b) == 1 && @test I[OffsetArray(a,-7),OffsetArray(b,-8)] == J[OffsetArray(a,-7),OffsetArray(b,-8)]
     end
 end
 
@@ -133,7 +144,7 @@ end
 end
 
 @testset "arithmetic with Number" begin
-    α = randn()
+    α = rand()
     @test α + I == α + 1
     @test I + α == α + 1
     @test α - I == α - 1
diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md
index f5367d05b14a5..7a6fbbbdd2081 100644
--- a/stdlib/Logging/docs/src/index.md
+++ b/stdlib/Logging/docs/src/index.md
@@ -217,6 +217,9 @@ julia> foo()
 
 ```
 
+Use a comma separator to enable debug for multiple
+modules: `JULIA_DEBUG=loading,Main`.
+
 ## Examples
 
 ### Example: Writing log events to a file
diff --git a/stdlib/Logging/src/ConsoleLogger.jl b/stdlib/Logging/src/ConsoleLogger.jl
index 04c56c6dbfaae..4e32b6b71f656 100644
--- a/stdlib/Logging/src/ConsoleLogger.jl
+++ b/stdlib/Logging/src/ConsoleLogger.jl
@@ -70,7 +70,7 @@ function default_metafmt(level::LogLevel, _module, group, id, file, line)
     prefix = string(level == Warn ? "Warning" : string(level), ':')
     suffix::String = ""
     Info <= level < Warn && return color, prefix, suffix
-    _module !== nothing && (suffix *= "$(_module)")
+    _module !== nothing && (suffix *= string(_module)::String)
     if file !== nothing
         _module !== nothing && (suffix *= " ")
         suffix *= Base.contractuser(file)::String
@@ -119,7 +119,7 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
     msglines = [(indent=0, msg=l) for l in split(chomp(string(message)::String), '\n')]
     stream = logger.stream
     if !isopen(stream)
-        stream = level < Warn ? stdout : stderr
+        stream = stderr
     end
     dsize = displaysize(stream)::Tuple{Int,Int}
     nkwargs = length(kwargs)::Int
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 38c8c45ac8975..826b3b0c6114e 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -44,23 +44,7 @@ STDLIBS = Artifacts Base64 CRC32c Dates DelimitedFiles Distributed FileWatching
           SharedArrays Sockets SparseArrays SuiteSparse Test TOML Unicode UUIDs \
           $(JLL_NAMES)
 
-STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions SuiteSparse
-PKG_GIT_URL := git://github.com/JuliaLang/Pkg.jl.git
-PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
-STATISTICS_GIT_URL := git://github.com/JuliaLang/Statistics.jl.git
-STATISTICS_TAR_URL = https://api.github.com/repos/JuliaLang/Statistics.jl/tarball/$1
-LIBCURL_GIT_URL := git://github.com/JuliaWeb/LibCURL.jl.git
-LIBCURL_TAR_URL = https://api.github.com/repos/JuliaWeb/LibCURL.jl/tarball/$1
-DOWNLOADS_GIT_URL := git://github.com/JuliaLang/Downloads.jl.git
-DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
-ARGTOOLS_GIT_URL := git://github.com/JuliaIO/ArgTools.jl.git
-ARGTOOLS_TAR_URL = https://api.github.com/repos/JuliaIO/ArgTools.jl/tarball/$1
-TAR_GIT_URL := git://github.com/JuliaIO/Tar.jl.git
-TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
-NETWORKOPTIONS_GIT_URL := git://github.com/JuliaLang/NetworkOptions.jl.git
-NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
-SUITESPARSE_GIT_URL := git://github.com/JuliaLang/SuiteSparse.jl.git
-SUITESPARSE_TAR_URL = https://api.github.com/repos/JuliaLang/SuiteSparse.jl/tarball/$1
+STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions SuiteSparse SHA
 
 $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z))))
 
@@ -73,7 +57,11 @@ getall get: $(addprefix get-, $(STDLIBS_EXT) $(JLL_NAMES))
 install: version-check $(addprefix install-, $(STDLIBS_EXT) $(JLL_NAMES)) $(STDLIBS_LINK_TARGETS)
 version-check: $(addprefix version-check-, $(STDLIBS_EXT))
 uninstall: $(addprefix uninstall-, $(STDLIBS_EXT))
-clean: $(addprefix clean-, $(STDLIBS_EXT)) $(CLEAN_TARGETS)
+extstdlibclean:
+	for module in $(STDLIBS_EXT) ; do \
+		rm -rf $(JULIAHOME)/stdlib/$${module}-*; \
+	done
+clean: $(addprefix clean-, $(STDLIBS_EXT)) $(CLEAN_TARGETS) extstdlibclean
 distclean: $(addprefix distclean-, $(STDLIBS_EXT)) clean
 checksumall: $(addprefix checksum-, $(STDLIBS_EXT))
 
diff --git a/stdlib/Markdown/src/Common/inline.jl b/stdlib/Markdown/src/Common/inline.jl
index d2855f27a7add..fd5134481e113 100644
--- a/stdlib/Markdown/src/Common/inline.jl
+++ b/stdlib/Markdown/src/Common/inline.jl
@@ -146,13 +146,10 @@ function _is_link(s::AbstractString)
 end
 
 # non-normative regex from the HTML5 spec
-const _email_regex = r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
+const _email_regex = r"^mailto\:[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
 
 function _is_mailto(s::AbstractString)
-    length(s) < 6 && return false
-    # slicing strings is a bit risky, but this equality check is safe
-    lowercase(s[1:6]) == "mailto:" || return false
-    return occursin(_email_regex, s[6:end])
+    return occursin(_email_regex, s)
 end
 
 # –––––––––––
diff --git a/stdlib/Markdown/src/render/latex.jl b/stdlib/Markdown/src/render/latex.jl
index ee546be555fa4..d18a2e760ef3d 100644
--- a/stdlib/Markdown/src/render/latex.jl
+++ b/stdlib/Markdown/src/render/latex.jl
@@ -33,8 +33,8 @@ function latex(io::IO, header::Header{l}) where l
 end
 
 function latex(io::IO, code::Code)
+    occursin("\\end{verbatim}", code.code) && error("Cannot include \"\\end{verbatim}\" in a latex code block")
     wrapblock(io, "verbatim") do
-        # TODO latex escape
         println(io, code.code)
     end
 end
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index f90eefb85310e..dfe80430a00d6 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1230,3 +1230,33 @@ end
     @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line"
 end
 
+@testset "pullrequest #41552: a code block has \\end{verbatim}" begin
+    s1 = md"""
+         ```tex
+         \begin{document}
+         \end{document}
+         ```
+         """
+    s2 = md"""
+         ```tex
+         \begin{verbatim}
+         \end{verbatim}
+         ```
+         """
+    @test Markdown.latex(s1) == """
+                                \\begin{verbatim}
+                                \\begin{document}
+                                \\end{document}
+                                \\end{verbatim}
+                                """
+    @test_throws ErrorException Markdown.latex(s2)
+end
+
+@testset "issue #42139: autolink" begin
+    # ok
+    @test md"<mailto:foo@bar.com>" |> html == """<p><a href="mailto:foo@bar.com">mailto:foo@bar.com</a></p>\n"""
+    # not ok
+    @test md"<mailto foo@bar.com>" |> html == """<p>&lt;mailto foo@bar.com&gt;</p>\n"""
+    # see issue #42139
+    @test md"<一轮红日初升>" |> html == """<p>&lt;一轮红日初升&gt;</p>\n"""
+end
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index 6919421b92277..629f53e8371ed 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -191,7 +191,11 @@ function mmap(io::IO,
     isopen(io) || throw(ArgumentError("$io must be open to mmap"))
     isbitstype(T)  || throw(ArgumentError("unable to mmap $T; must satisfy isbitstype(T) == true"))
 
-    len = prod(dims) * sizeof(T)
+    len = sizeof(T)
+    for l in dims
+        len, overflow = Base.Checked.mul_with_overflow(promote(len, l)...)
+        overflow && throw(ArgumentError("requested size prod($((sizeof(T), dims...))) too large, would overflow typeof(size(T)) == $(typeof(len))"))
+    end
     len >= 0 || throw(ArgumentError("requested size must be ≥ 0, got $len"))
     len == 0 && return Array{T}(undef, ntuple(x->0,Val(N)))
     len < typemax(Int) - PAGESIZE || throw(ArgumentError("requested size must be < $(typemax(Int)-PAGESIZE), got $len"))
diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl
index 51bf898e94b48..0b3cb0b9f1a42 100644
--- a/stdlib/Mmap/test/runtests.jl
+++ b/stdlib/Mmap/test/runtests.jl
@@ -133,6 +133,7 @@ c = mmap(s, Vector{UInt8}, (UInt16(11),))
 finalize(c); c=nothing; GC.gc()
 @test_throws ArgumentError mmap(s, Vector{UInt8}, (Int16(-11),))
 @test_throws ArgumentError mmap(s, Vector{UInt8}, (typemax(UInt),))
+@test_throws ArgumentError mmap(s, Matrix{UInt8}, (typemax(Int) - Mmap.PAGESIZE - 1, 2)) # overflow
 close(s)
 s = open(file, "r+")
 @test isreadonly(s) == false
diff --git a/stdlib/NetworkOptions.version b/stdlib/NetworkOptions.version
index 0d398e16dafc1..a07211b9dcfdf 100644
--- a/stdlib/NetworkOptions.version
+++ b/stdlib/NetworkOptions.version
@@ -1,2 +1,4 @@
 NETWORKOPTIONS_BRANCH = master
-NETWORKOPTIONS_SHA1 = 6cea813ca841c400627b6cf4d4f4ec2599965ab6
+NETWORKOPTIONS_SHA1 = 01e6ec17aa4ef74b4a0ea19c193dacf8d2cfc353
+NETWORKOPTIONS_GIT_URL := https://github.com/JuliaLang/NetworkOptions.jl.git
+NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index a879e204dcb3f..3ab110db99410 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.13+6"
+version = "0.3.17+2"
 
 [deps]
 CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
index c69f3a262b298..f656621d957d6 100644
--- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
+++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
@@ -32,6 +32,11 @@ else
 end
 
 function __init__()
+    # make sure OpenBLAS does not set CPU affinity (#1070, #9639)
+    if !haskey(ENV, "OPENBLAS_MAIN_FREE")
+        ENV["OPENBLAS_MAIN_FREE"] = "1"
+    end
+
     global libopenblas_handle = dlopen(libopenblas)
     global libopenblas_path = dlpath(libopenblas_handle)
     global artifact_dir = dirname(Sys.BINDIR)
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 296410c62e300..37d827790ad03 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,2 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = e476cd0c61e19b645cc0e32bb30f8e44f60001f7
+PKG_SHA1 = aec716f4360bc82bd6a7db6d0bd5dd7458dccba5
+PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
+PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index 9226d2c7fa5c6..b7487db017d45 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -292,7 +292,8 @@ fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
     bs = base(T)
     arg2 = toint(arg)
     n = i = ndigits(arg2, base=bs, pad=1)
-    x, neg = arg2 < 0 ? (-arg2, true) : (arg2, false)
+    neg = arg2 < 0
+    x = arg2 isa Base.BitSigned ? unsigned(abs(arg2)) : abs(arg2)
     arglen = n + (neg || (plus | space)) +
         (T == Val{'o'} && hash ? 1 : 0) +
         (T == Val{'x'} && hash ? 2 : 0) + (T == Val{'X'} && hash ? 2 : 0)
@@ -424,27 +425,31 @@ const __BIG_FLOAT_MAX__ = 8192
     elseif T == Val{'f'} || T == Val{'F'}
         newpos = Ryu.writefixed(buf, pos, x, prec, plus, space, hash, UInt8('.'))
     elseif T == Val{'g'} || T == Val{'G'}
-        # C11-compliant general format
-        prec = prec == 0 ? 1 : prec
-        # format the value in scientific notation and parse the exponent part
-        exp = let p = Ryu.writeexp(buf, pos, x, prec)
-            b1, b2, b3, b4 = buf[p-4], buf[p-3], buf[p-2], buf[p-1]
-            Z = UInt8('0')
-            if b1 == UInt8('e')
-                # two-digit exponent
-                sign = b2 == UInt8('+') ? 1 : -1
-                exp = 10 * (b3 - Z) + (b4 - Z)
+        if isinf(x) || isnan(x)
+            newpos = Ryu.writeshortest(buf, pos, x, plus, space)
+        else
+            # C11-compliant general format
+            prec = prec == 0 ? 1 : prec
+            # format the value in scientific notation and parse the exponent part
+            exp = let p = Ryu.writeexp(buf, pos, x, prec)
+                b1, b2, b3, b4 = buf[p-4], buf[p-3], buf[p-2], buf[p-1]
+                Z = UInt8('0')
+                if b1 == UInt8('e')
+                    # two-digit exponent
+                    sign = b2 == UInt8('+') ? 1 : -1
+                    exp = 10 * (b3 - Z) + (b4 - Z)
+                else
+                    # three-digit exponent
+                    sign = b1 == UInt8('+') ? 1 : -1
+                    exp = 100 * (b2 - Z) + 10 * (b3 - Z) + (b4 - Z)
+                end
+                flipsign(exp, sign)
+            end
+            if -4 ≤ exp < prec
+                newpos = Ryu.writefixed(buf, pos, x, prec - (exp + 1), plus, space, hash, UInt8('.'), !hash)
             else
-                # three-digit exponent
-                sign = b1 == UInt8('+') ? 1 : -1
-                exp = 100 * (b2 - Z) + 10 * (b3 - Z) + (b4 - Z)
+                newpos = Ryu.writeexp(buf, pos, x, prec - 1, plus, space, hash, T == Val{'g'} ? UInt8('e') : UInt8('E'), UInt8('.'), !hash)
             end
-            flipsign(exp, sign)
-        end
-        if -4 ≤ exp < prec
-            newpos = Ryu.writefixed(buf, pos, x, prec - (exp + 1), plus, space, hash, UInt8('.'), !hash)
-        else
-            newpos = Ryu.writeexp(buf, pos, x, prec - 1, plus, space, hash, T == Val{'g'} ? UInt8('e') : UInt8('E'), UInt8('.'), !hash)
         end
     elseif T == Val{'a'} || T == Val{'A'}
         x, neg = x < 0 || x === -Base.zero(x) ? (-x, true) : (x, false)
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index 30524d600cdf8..e80cbe9626823 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -94,6 +94,15 @@ end
     @test Printf.@sprintf("%g", 123456.7) == "123457"
     @test Printf.@sprintf("%g", 1234567.8) == "1.23457e+06"
 
+    # %g regression gh #41631
+    for (val, res) in ((Inf, "Inf"),
+                       (-Inf, "-Inf"),
+                       (NaN, "NaN"),
+                       (-NaN, "NaN"))
+        @test Printf.@sprintf("%g", val) == res
+        @test Printf.@sprintf("%G", val) == res
+    end
+
     # zeros
     @test Printf.@sprintf("%.15g", 0) == "0"
     @test Printf.@sprintf("%#.15g", 0) == "0.00000000000000"
@@ -753,6 +762,17 @@ end
     @test Printf.@sprintf("%20.0X",  UInt(3989525555)) == "            EDCB5433"
     @test Printf.@sprintf("%20.X",  UInt(0)) == "                   0"
 
+    # issue #41971
+    @test Printf.@sprintf("%4d", typemin(Int8)) == "-128"
+    @test Printf.@sprintf("%4d", typemax(Int8)) == " 127"
+    @test Printf.@sprintf("%6d", typemin(Int16)) == "-32768"
+    @test Printf.@sprintf("%6d", typemax(Int16)) == " 32767"
+    @test Printf.@sprintf("%11d", typemin(Int32)) == "-2147483648"
+    @test Printf.@sprintf("%11d", typemax(Int32)) == " 2147483647"
+    @test Printf.@sprintf("%20d", typemin(Int64)) == "-9223372036854775808"
+    @test Printf.@sprintf("%20d", typemax(Int64)) == " 9223372036854775807"
+    @test Printf.@sprintf("%40d", typemin(Int128)) == "-170141183460469231731687303715884105728"
+    @test Printf.@sprintf("%40d", typemax(Int128)) == " 170141183460469231731687303715884105727"
 end
 
 @testset "%n" begin
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index 6aca0601439e3..1d13dad22233a 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -5,8 +5,9 @@ uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Serialization"]
+test = ["Logging", "Serialization", "Test"]
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index f297ad12f80a1..3409e79bdb128 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -7,6 +7,8 @@ module Profile
 
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
 
+const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+
 # deprecated functions: use `getdict` instead
 lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
 
@@ -37,29 +39,45 @@ end
 ####
 
 """
-    init(; n::Integer, delay::Real))
+    init(; n::Integer, delay::Real)
+
+Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
+stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
+list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
+NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
+using keywords or in the order `(n, delay)`.
 
-Configure the `delay` between backtraces (measured in seconds), and the number `n` of
-instruction pointers that may be stored. Each instruction pointer corresponds to a single
-line of code; backtraces generally consist of a long list of instruction pointers. Current
-settings can be obtained by calling this function with no arguments, and each can be set
-independently using keywords or in the order `(n, delay)`.
+!!! compat "Julia 1.8"
+    As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
+    Previously this was `n` total.
 """
-function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing)
+function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, limitwarn::Bool = true)
     n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
     delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9
     if n === nothing && delay === nothing
-        return Int(n_cur), delay_cur
+        nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
+        return round(Int, n_cur / nthreads), delay_cur
     end
     nnew = (n === nothing) ? n_cur : n
     delaynew = (delay === nothing) ? delay_cur : delay
-    init(nnew, delaynew)
+    init(nnew, delaynew; limitwarn)
 end
 
-function init(n::Integer, delay::Real)
-    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), n, round(UInt64,10^9*delay))
+function init(n::Integer, delay::Real; limitwarn::Bool = true)
+    nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
+    sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
+    buffer_samples = n * nthreads
+    buffer_size_bytes = buffer_samples * sample_size_bytes
+    if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
+        buffer_size_bytes_per_thread = floor(Int, 2^29 / nthreads)
+        buffer_samples_per_thread = floor(Int, buffer_size_bytes_per_thread / sample_size_bytes)
+        buffer_samples = buffer_samples_per_thread * nthreads
+        buffer_size_bytes = buffer_samples * sample_size_bytes
+        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples_per_thread per thread) given that this system is 32-bit"
+    end
+    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64,10^9*delay))
     if status == -1
-        error("could not allocate space for ", n, " instruction pointers")
+        error("could not allocate space for ", n, " instruction pointers per thread being profiled ($nthreads threads, $(Base.format_bytes(buffer_size_bytes)) total)")
     end
 end
 
@@ -69,9 +87,9 @@ end
 if Sys.iswindows() && Sys.WORD_SIZE == 32
     # The Win32 unwinder is 1000x slower than elsewhere (around 1ms/frame),
     # so we don't want to slow the program down by quite that much
-    __init__() = init(1_000_000, 0.01)
+    __init__() = init(1_000_000, 0.01, limitwarn = false)
 else
-    __init__() = init(10_000_000, 0.001)
+    __init__() = init(10_000_000, 0.001, limitwarn = false)
 end
 
 """
@@ -104,6 +122,12 @@ struct ProfileFormat
     end
 end
 
+# offsets of the metadata in the data stream
+const META_OFFSET_SLEEPSTATE = 2
+const META_OFFSET_CPUCYCLECLOCK = 3
+const META_OFFSET_TASKID = 4
+const META_OFFSET_THREADID = 5
+
 """
     print([io::IO = stdout,] [data::Vector]; kwargs...)
 
@@ -126,6 +150,9 @@ The keyword arguments can be any combination of:
     line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
     incurred by each function by itself.
 
+ - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`,
+    `[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping.
+
  - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
     A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
     where `n` is the number of samples on this line, and `N` is the number of samples for the callee.
@@ -135,9 +162,15 @@ The keyword arguments can be any combination of:
  - `recur` -- Controls the recursion handling in `:tree` format. `:off` (default) prints the tree as normal. `:flat` instead
     compresses any recursion (by ip), showing the approximate effect of converting any self-recursion into an iterator.
     `:flatc` does the same but also includes collapsing of C frames (may do odd things around `jl_apply`).
+
+ - `threads::Union{Int,AbstractVector{Int}}` -- Specify which threads to include snapshots from in the report. Note that
+    this does not control which threads samples are collected on.
+
+ - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
+    does not control which tasks samples are collected within.
 """
 function print(io::IO,
-        data::Vector{<:Unsigned} = fetch(),
+        data::Vector{<:Unsigned} = fetch(include_meta = true),
         lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)
         ;
         format = :tree,
@@ -147,32 +180,144 @@ function print(io::IO,
         mincount::Int = 0,
         noisefloor = 0,
         sortedby::Symbol = :filefuncline,
-        recur::Symbol = :off)
-    print(io, data, lidict, ProfileFormat(
-            C = C,
-            combine = combine,
-            maxdepth = maxdepth,
-            mincount = mincount,
-            noisefloor = noisefloor,
-            sortedby = sortedby,
-            recur = recur),
-        format)
-end
-
-function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat, format::Symbol)
+        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
+        recur::Symbol = :off,
+        threads::Union{Int,AbstractVector{Int}} = 1:Threads.nthreads(),
+        tasks::Union{UInt,AbstractVector{UInt}} = typemin(UInt):typemax(UInt))
+
+    pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
+    if groupby == :none
+        print(io, data, lidict, pf, format, threads, tasks, false)
+    else
+        if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
+            error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
+        elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
+            @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
+        end
+        any_nosamples = false
+        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
+        println(io, "=========================================================")
+        if groupby == [:task, :thread]
+            for taskid in intersect(get_task_ids(data), tasks)
+                threadids = intersect(get_thread_ids(data, taskid), threads)
+                if length(threadids) == 0
+                    any_nosamples = true
+                else
+                    nl = length(threadids) > 1 ? "\n" : ""
+                    printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
+                    for threadid in threadids
+                        printstyled(io, " Thread $threadid\n"; bold=true, color=Base.info_color())
+                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples && (any_nosamples = true)
+                        println(io)
+                    end
+                end
+            end
+        elseif groupby == [:thread, :task]
+            for threadid in intersect(get_thread_ids(data), threads)
+                taskids = intersect(get_task_ids(data, threadid), tasks)
+                if length(taskids) == 0
+                    any_nosamples = true
+                else
+                    nl = length(taskids) > 1 ? "\n" : ""
+                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
+                    for taskid in taskids
+                        printstyled(io, " Task $(Base.repr(taskid))\n"; bold=true, color=Base.debug_color())
+                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples && (any_nosamples = true)
+                        println(io)
+                    end
+                end
+            end
+        elseif groupby == :task
+            threads = 1:typemax(Int)
+            for taskid in intersect(get_task_ids(data), tasks)
+                printstyled(io, "Task $(Base.repr(taskid))\n"; bold=true, color=Base.debug_color())
+                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
+                nosamples && (any_nosamples = true)
+                println(io)
+            end
+        elseif groupby == :thread
+            tasks = 1:typemax(UInt)
+            for threadid in intersect(get_thread_ids(data), threads)
+                printstyled(io, "Thread $threadid\n"; bold=true, color=Base.info_color())
+                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
+                nosamples && (any_nosamples = true)
+                println(io)
+            end
+        end
+        any_nosamples && warning_empty(summary = true)
+    end
+    return
+end
+
+function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
+                format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
+                is_subsection::Bool = false)
     cols::Int = Base.displaysize(io)[2]
     data = convert(Vector{UInt64}, data)
     fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
     if format === :tree
-        tree(io, data, lidict, cols, fmt)
+        nosamples = tree(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
+        return nosamples
     elseif format === :flat
         fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
-        flat(io, data, lidict, cols, fmt)
+        nosamples = flat(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
+        return nosamples
     else
         throw(ArgumentError("output format $(repr(format)) not recognized"))
     end
 end
 
+function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing)
+    taskids = UInt[]
+    for i in length(data):-1:1
+        if is_block_end(data, i)
+            if isnothing(threadid) || data[i - META_OFFSET_THREADID] == threadid
+                taskid = data[i - META_OFFSET_TASKID]
+                !in(taskid, taskids) && push!(taskids, taskid)
+            end
+        end
+    end
+    return taskids
+end
+
+function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
+    threadids = Int[]
+    for i in length(data):-1:1
+        if is_block_end(data, i)
+            if isnothing(taskid) || data[i - META_OFFSET_TASKID] == taskid
+                threadid = data[i - META_OFFSET_THREADID]
+                !in(threadid, threadids) && push!(threadids, threadid)
+            end
+        end
+    end
+    return sort(threadids)
+end
+
+function is_block_end(data, i)
+    i < nmeta + 1 && return false
+    # 32-bit linux has been seen to have rogue NULL ips, so we use two to
+    # indicate block end, where the 2nd is the actual end index.
+    # and we could have (though very unlikely):
+    # 1:<stack><metadata><null><null><NULL><metadata><null><null>:end
+    # and we want to ignore the triple NULL (which is an ip).
+    return data[i] == 0 && data[i - 1] == 0 && data[i - META_OFFSET_SLEEPSTATE] != 0
+end
+
+function has_meta(data)
+    for i in 6:length(data)
+        data[i] == 0 || continue            # first block end null
+        data[i - 1] == 0 || continue        # second block end null
+        data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue
+        data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue
+        data[i - META_OFFSET_TASKID] != 0 || continue
+        data[i - META_OFFSET_THREADID] != 0 || continue
+        return true
+    end
+    return false
+end
+
 """
     print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
 
@@ -182,27 +327,31 @@ a dictionary `lidict` of line information.
 
 See `Profile.print([io], data)` for an explanation of the valid keyword arguments.
 """
-print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
+print(data::Vector{<:Unsigned} = fetch(include_meta = true), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
     print(stdout, data, lidict; kwargs...)
 
 """
-    retrieve() -> data, lidict
+    retrieve(; kwargs...) -> data, lidict
 
 "Exports" profiling results in a portable format, returning the set of all backtraces
 (`data`) and a dictionary that maps the (session-specific) instruction pointers in `data` to
 `LineInfo` values that store the file name, function name, and line number. This function
 allows you to save profiling results for future analysis.
 """
-function retrieve()
-    data = fetch()
+function retrieve(; kwargs...)
+    data = fetch(; kwargs...)
     return (data, getdict(data))
 end
 
 function getdict(data::Vector{UInt})
-    # Lookup is expensive, so do it only once per ip.
-    udata = unique(data)
     dict = LineInfoDict()
-    for ip in udata
+    return getdict!(dict, data)
+end
+
+function getdict!(dict::LineInfoDict, data::Vector{UInt})
+    for ip in data
+        # Lookup is expensive, so do it only once per ip.
+        haskey(dict, UInt64(ip)) && continue
         st = lookup(convert(Ptr{Cvoid}, ip))
         # To correct line numbers for moving code, put it in the form expected by
         # Base.update_stackframes_callback[]
@@ -369,14 +518,15 @@ error_codes = Dict(
 
 
 """
-    fetch() -> data
+    fetch(;include_meta = true) -> data
 
 Returns a copy of the buffer of profile backtraces. Note that the
 values in `data` have meaning only on this machine in the current session, because it
 depends on the exact memory addresses used in JIT-compiling. This function is primarily for
 internal use; [`retrieve`](@ref) may be a better choice for most users.
+By default metadata such as threadid and taskid is included. Set `include_meta` to `false` to strip metadata.
 """
-function fetch()
+function fetch(;include_meta = true)
     maxlen = maxlen_data()
     len = len_data()
     if is_buffer_full()
@@ -386,14 +536,60 @@ function fetch()
     end
     data = Vector{UInt}(undef, len)
     GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
-    return data
+    if include_meta || isempty(data)
+        return data
+    end
+    return strip_meta(data)
+end
+
+function strip_meta(data)
+    nblocks = count(Base.Fix1(is_block_end, data), eachindex(data))
+    data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
+    j = length(data_stripped)
+    i = length(data)
+    while i > 0 && j > 0
+        data_stripped[j] = data[i]
+        if is_block_end(data, i)
+            i -= (nmeta + 1) # metadata fields and the extra NULL IP
+        end
+        i -= 1
+        j -= 1
+    end
+    @assert i == j == 0 "metadata stripping failed"
+    return data_stripped
 end
 
+"""
+    Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta
+
+The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used
+for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal
+details of the metadata format.
+"""
+function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
+    threadid == 0 && error("Fake threadid cannot be 0")
+    taskid == 0 && error("Fake taskid cannot be 0")
+    !isempty(data) && has_meta(data) && error("input already has metadata")
+    cpu_clock_cycle = UInt64(99)
+    data_with_meta = similar(data, 0)
+    for i = 1:length(data)
+        val = data[i]
+        if iszero(val)
+            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+            push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
+        else
+            push!(data_with_meta, val)
+        end
+    end
+    return data_with_meta
+end
 
 ## Print as a flat list
 # Counts the number of times each line appears, at any nesting level and at the topmost level
 # Merging multiple equivalent entries and recursive calls
-function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool) where {T}
+function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool,
+                    threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T}
+    !isempty(data) && !has_meta(data) && error("Profile data is missing required metadata")
     lilist = StackFrame[]
     n = Int[]
     m = Int[]
@@ -401,44 +597,71 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     recursive = Set{T}()
     first = true
     totalshots = 0
-    for ip in data
-        if ip == 0
+    startframe = length(data)
+    skip = false
+    nsleeping = 0
+    for i in startframe:-1:1
+        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
+        ip = data[i]
+        if is_block_end(data, i)
+            # read metadata
+            thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK]
+            taskid = data[i - META_OFFSET_TASKID]
+            threadid = data[i - META_OFFSET_THREADID]
+            if !in(threadid, threads) || !in(taskid, tasks)
+                skip = true
+                continue
+            end
+            if thread_sleeping == 1
+                nsleeping += 1
+            end
+            skip = false
             totalshots += 1
             empty!(recursive)
             first = true
-            continue
-        end
-        frames = lidict[ip]
-        nframes = (frames isa Vector ? length(frames) : 1)
-        for i = 1:nframes
-            frame = (frames isa Vector ? frames[i] : frames)
-            !C && frame.from_c && continue
-            key = (T === UInt64 ? ip : frame)
-            idx = get!(lilist_idx, key, length(lilist) + 1)
-            if idx > length(lilist)
-                push!(recursive, key)
-                push!(lilist, frame)
-                push!(n, 1)
-                push!(m, 0)
-            elseif !(key in recursive)
-                push!(recursive, key)
-                n[idx] += 1
-            end
-            if first
-                m[idx] += 1
-                first = false
+            startframe = i
+        elseif !skip
+            frames = lidict[ip]
+            nframes = (frames isa Vector ? length(frames) : 1)
+            for j = 1:nframes
+                frame = (frames isa Vector ? frames[j] : frames)
+                !C && frame.from_c && continue
+                key = (T === UInt64 ? ip : frame)
+                idx = get!(lilist_idx, key, length(lilist) + 1)
+                if idx > length(lilist)
+                    push!(recursive, key)
+                    push!(lilist, frame)
+                    push!(n, 1)
+                    push!(m, 0)
+                elseif !(key in recursive)
+                    push!(recursive, key)
+                    n[idx] += 1
+                end
+                if first
+                    m[idx] += 1
+                    first = false
+                end
             end
         end
     end
     @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
-    return (lilist, n, m, totalshots)
+    return (lilist, n, m, totalshots, nsleeping)
 end
 
-function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat)
-    lilist, n, m, totalshots = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C)
+function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
+                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
+    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
+    util_perc = (1 - (nsleeping / totalshots)) * 100
     if isempty(lilist)
-        warning_empty()
-        return
+        if is_subsection
+            Base.print(io, "Total snapshots: ")
+            printstyled(io, "$(totalshots)", color=Base.warn_color())
+            Base.println(io, " (", round(Int, util_perc), "% utilization)")
+        else
+            warning_empty()
+        end
+        return true
     end
     if false # optional: drop the "non-interpretable" ones
         keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
@@ -448,8 +671,13 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
     end
     filenamemap = Dict{Symbol,String}()
     print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    Base.println(io, "Total snapshots: ", totalshots)
-    nothing
+    Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization")
+    if is_subsection
+        println(io, ")")
+    else
+        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
+    end
+    return false
 end
 
 function print_flat(io::IO, lilist::Vector{StackFrame},
@@ -612,14 +840,33 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
 end
 
 # turn a list of backtraces into a tree (implicitly separated by NULL markers)
-function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol) where {T}
+function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol,
+                threads::Union{Int,AbstractVector{Int},Nothing}=nothing, tasks::Union{UInt,AbstractVector{UInt},Nothing}=nothing) where {T}
+    !isempty(all) && !has_meta(all) && error("Profile data is missing required metadata")
     parent = root
     tops = Vector{StackFrameTree{T}}()
     build = Vector{StackFrameTree{T}}()
     startframe = length(all)
+    skip = false
+    nsleeping = 0
     for i in startframe:-1:1
+        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
         ip = all[i]
-        if ip == 0
+        if is_block_end(all, i)
+            # read metadata
+            thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK]
+            taskid = all[i - META_OFFSET_TASKID]
+            threadid = all[i - META_OFFSET_THREADID]
+            if (threads !== nothing && !in(threadid, threads)) ||
+               (tasks !== nothing && !in(taskid, tasks))
+                skip = true
+                continue
+            end
+            if thread_sleeping == 1
+                nsleeping += 1
+            end
+            skip = false
             # sentinel value indicates the start of a new backtrace
             empty!(build)
             root.recur = 0
@@ -646,7 +893,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
             parent = root
             root.count += 1
             startframe = i
-        else
+        elseif !skip
             pushfirst!(build, parent)
             if recur === :flat || recur === :flatc
                 # Rewind the `parent` tree back, if this exact ip was already present *higher* in the current tree
@@ -687,6 +934,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                 parent = this
                 continue
             end
+
             frames = lidict[ip]
             nframes = (frames isa Vector ? length(frames) : 1)
             this = parent
@@ -721,7 +969,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
         nothing
     end
     cleanup!(root)
-    return root
+    return root, nsleeping
 end
 
 function maxstats(root::StackFrameTree)
@@ -743,12 +991,14 @@ end
 
 # Print the stack frame tree starting at a particular root. Uses a worklist to
 # avoid stack overflows.
-function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat) where T
+function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
     maxes = maxstats(bt)
     filenamemap = Dict{Symbol,String}()
     worklist = [(bt, 0, 0, "")]
-    println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
-    println(io, "=========================================================")
+    if !is_subsection
+        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
+        println(io, "=========================================================")
+    end
     while !isempty(worklist)
         (bt, level, noisefloor, str) = popfirst!(worklist)
         isempty(str) || println(io, str)
@@ -782,21 +1032,35 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
             pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
         end
     end
+    return
 end
 
-function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat)
+function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
+                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     if fmt.combine
-        root = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur)
+        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     else
-        root = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur)
+        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     end
+    util_perc = (1 - (nsleeping / root.count)) * 100
     if isempty(root.down)
-        warning_empty()
-        return
+        if is_subsection
+            Base.print(io, "Total snapshots: ")
+            printstyled(io, "$(root.count)", color=Base.warn_color())
+            Base.println(io, " (", round(Int, util_perc), "% utilization)")
+        else
+            warning_empty()
+        end
+        return true
     end
-    print_tree(io, root, cols, fmt)
-    Base.println(io, "Total snapshots: ", root.count)
-    nothing
+    print_tree(io, root, cols, fmt, is_subsection)
+    Base.print(io, "Total snapshots: ", root.count, " (", round(Int, util_perc), "% utilization")
+    if is_subsection
+        println(io, ")")
+    else
+        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
+    end
+    return false
 end
 
 function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
@@ -860,9 +1124,19 @@ function liperm(lilist::Vector{StackFrame})
     return sortperm(lilist, lt = lt)
 end
 
-warning_empty() = @warn """
-            There were no samples collected. Run your program longer (perhaps by
-            running it multiple times), or adjust the delay between samples with
-            `Profile.init()`."""
+function warning_empty(;summary = false)
+    if summary
+        @warn """
+        There were no samples collected in one or more groups.
+        This may be due to idle threads, or you may need to run your
+        program longer (perhaps by running it multiple times),
+        or adjust the delay between samples with `Profile.init()`."""
+    else
+        @warn """
+        There were no samples collected.
+        Run your program longer (perhaps by running it multiple times),
+        or adjust the delay between samples with `Profile.init()`."""
+    end
+end
 
 end # module
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index 76f8a3a1b8ca8..ac7c8baefe09e 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Profile, Serialization
+using Test, Profile, Serialization, Logging
+using Base.StackTraces: StackFrame
 
 Profile.clear()
 Profile.init()
@@ -8,7 +9,7 @@ Profile.init()
 let iobuf = IOBuffer()
     for fmt in (:tree, :flat)
         Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, format=fmt, C=true)
-        Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, [0x0000000000000001], Dict(0x0000000000000001 => [Base.StackTraces.UNKNOWN]), format=fmt, C=false)
+        Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, Profile.add_fake_meta([0x0000000000000001, 0x0000000000000000]), Dict(0x0000000000000001 => [Base.StackTraces.UNKNOWN]), format=fmt, C=false)
     end
 end
 
@@ -59,6 +60,38 @@ let iobuf = IOBuffer()
     truncate(iobuf, 0)
 end
 
+@testset "Profile.print() groupby options" begin
+    iobuf = IOBuffer()
+    with_logger(NullLogger()) do
+        @testset for format in [:flat, :tree]
+            @testset for threads in [1:Threads.nthreads(), 1, 1:1, 1:2, [1,2]]
+                @testset for groupby in [:none, :thread, :task, [:thread, :task], [:task, :thread]]
+                    Profile.print(iobuf; groupby, threads, format)
+                    @test !isempty(String(take!(iobuf)))
+                end
+            end
+        end
+    end
+end
+
+@testset "Profile.fetch() with and without meta" begin
+    data_without = Profile.fetch(include_meta = false)
+    data_with = Profile.fetch()
+    @test data_without[1] == data_with[1]
+    @test data_without[end] == data_with[end]
+    nblocks = count(Base.Fix1(Profile.is_block_end, data_with), eachindex(data_with))
+    @test length(data_without) == length(data_with) - nblocks * (Profile.nmeta + 1)
+
+    data_with_fake = Profile.add_fake_meta(data_without)
+    @test_throws "input already has metadata" Profile.add_fake_meta(data_with)
+    data_stripped = Profile.strip_meta(data_with_fake)
+    @test data_stripped == data_without
+    # ideally the test below would be a test for equality, but real sample ips can be nulls, and thus
+    # adding metadata back in can convert those ips to new block ends, and the length is then longer
+    @test length(data_with_fake) >= length(data_with)
+
+end
+
 Profile.clear()
 @test isempty(Profile.fetch())
 
@@ -84,15 +117,27 @@ end
 
 @testset "setting sample count and delay in init" begin
     n_, delay_ = Profile.init()
+    n_original = n_
+    nthreads = Sys.iswindows() ? 1 : Threads.nthreads()
+    sample_size_bytes = sizeof(Ptr)
     def_n = Sys.iswindows() && Sys.WORD_SIZE == 32 ? 1_000_000 : 10_000_000
-    @test n_ == def_n
+    if Sys.WORD_SIZE == 32 && (def_n * nthreads * sample_size_bytes) > 2^29
+        @test n_ * nthreads * sample_size_bytes <= 2^29
+    else
+        @test n_ == def_n
+    end
+
     def_delay = Sys.iswindows() && Sys.WORD_SIZE == 32 ? 0.01 : 0.001
     @test delay_ == def_delay
     Profile.init(n=1_000_001, delay=0.0005)
     n_, delay_ = Profile.init()
-    @test n_ == 1_000_001
+    if Sys.WORD_SIZE == 32 && (1_000_001 * nthreads * sample_size_bytes) > 2^29
+        @test n_ * nthreads * sample_size_bytes <= 2^29
+    else
+        @test n_ == 1_000_001
+    end
     @test delay_ == 0.0005
-    Profile.init(n=def_n, delay=def_delay)
+    Profile.init(n=n_original, delay=def_delay)
 end
 
 @testset "warning for buffer full" begin
@@ -141,3 +186,36 @@ let cmd = Base.julia_cmd()
     @test success(p)
     @test parse(Int, s) > 100
 end
+
+@testset "FlameGraphs" begin
+    # FlameGraphs makes use of some Profile's internals. Detect possible breakage by mimicking some of its tests.
+    # Breakage is acceptable since these internals are not part of the stable API, but it's better to know, and ideally
+    # should be paired with an issue or PR in FlameGraphs.
+    #
+    # This also improves the thoroughness of our overall Profile tests.
+    stackframe(func, file, line; C=false) = StackFrame(Symbol(func), Symbol(file), line, nothing, C, false, 0)
+
+    backtraces = UInt64[   4, 3, 2, 1,   # order: callees then caller
+                        0, 6, 5, 1,
+                        0, 8, 7,
+                        0, 4, 3, 2, 1,
+                        0]
+    backtraces = Profile.add_fake_meta(backtraces)
+    lidict = Dict{UInt64,StackFrame}(1=>stackframe(:f1, :file1, 1),
+                                     2=>stackframe(:f2, :file1, 5),
+                                     3=>stackframe(:f3, :file2, 1),
+                                     4=>stackframe(:f2, :file1, 15),
+                                     5=>stackframe(:f4, :file1, 20),
+                                     6=>stackframe(:f5, :file3, 1),
+                                     7=>stackframe(:f1, :file1, 2),
+                                     8=>stackframe(:f6, :file3, 10))
+    root = Profile.StackFrameTree{StackFrame}()
+    Profile.tree!(root, backtraces, lidict, #= C =# true, :off)
+    @test length(root.down) == 2
+    for k in keys(root.down)
+        @test k.file == :file1
+        @test k.line ∈ (1, 2)
+    end
+    node = root.down[stackframe(:f1, :file1, 2)]
+    @test only(node.down).first == lidict[8]
+end
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index 168d3e963b589..1d1feea6d5a09 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -135,6 +135,8 @@ REPL.stripmd
 Base.Docs.apropos
 ```
 
+Another feature of help mode is the ability to access extended docstrings. You can do this by typing something like `??Print` rather than `?Print` which will display the `# Extended help` section from the source codes documentation.
+
 Help mode can be exited by pressing backspace at the beginning of the line.
 
 ### [Shell mode](@id man-shell-mode)
@@ -209,6 +211,10 @@ Just as `^R` is a reverse search, `^S` is a forward search, with the prompt ```(
  The two may be used in conjunction with each other to move through the previous or next matching
 results, respectively.
 
+All executed commands in the Julia REPL are logged into `~/.julia/logs/repl_history.jl` along with a timestamp of when it was executed
+and the current REPL mode you were in. Search mode queries this log file in order to find the commands which you previously ran.
+This can be disabled at startup by passing the `--history-file=no` flag to Julia.
+
 ## Key bindings
 
 The Julia REPL makes great use of key bindings. Several control-key bindings were already introduced
@@ -307,6 +313,27 @@ Users should refer to `LineEdit.jl` to discover the available actions on key inp
 In both the Julian and help modes of the REPL, one can enter the first few characters of a function
 or type and then press the tab key to get a list all matches:
 
+```julia-repl
+julia> x[TAB]
+julia> xor
+```
+
+In some cases it only completes part of the name, up to the next ambiguity:
+
+```julia-repl
+julia> mapf[TAB]
+julia> mapfold
+```
+
+If you hit tab again, then you get the list of things that might complete this:
+
+```julia-repl
+julia> mapfold[TAB]
+mapfoldl mapfoldr
+```
+
+Like other components of the REPL, the search is case-sensitive:
+
 ```julia-repl
 julia> stri[TAB]
 stride     strides     string      strip
@@ -365,6 +392,46 @@ shell> /[TAB]
 .dockerinit bin/         dev/         home/        lib64/       mnt/         proc/        run/         srv/         tmp/         var/
 ```
 
+Dictionary keys can also be tab completed:
+
+```julia-repl
+julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3)
+Dict{String,Int64} with 3 entries:
+  "qwer2" => 2
+  "asdf"  => 3
+  "qwer1" => 1
+
+julia> foo["q[TAB]
+
+"qwer1" "qwer2"
+julia> foo["qwer
+```
+
+Tab completion can also help completing fields:
+
+```julia-repl
+julia> x = 3 + 4im;
+
+julia> julia> x.[TAB][TAB]
+im re
+
+julia> import UUIDs
+
+julia> UUIDs.uuid[TAB][TAB]
+uuid1        uuid4         uuid5        uuid_version
+```
+
+Fields for output from functions can also be completed:
+
+```julia-repl
+julia> split("","")[1].[TAB]
+lastindex  offset  string
+```
+
+The completion of fields for output from functions uses type inference, and it can only suggest
+fields if the function is type stable.
+
+
 Tab completion can help with investigation of the available methods matching the input arguments:
 
 ```julia-repl
@@ -392,38 +459,54 @@ The completion of the methods uses type inference and can therefore see if the a
 even if the arguments are output from functions. The function needs to be type stable for the
 completion to be able to remove non-matching methods.
 
-Tab completion can also help completing fields:
+If you wonder which methods can be used with particular argument types, use `?` as the function name.
+This shows an example of looking for functions in InteractiveUtils that accept a single string:
 
 ```julia-repl
-julia> import UUIDs
-
-julia> UUIDs.uuid[TAB]
-uuid1        uuid4         uuid_version
+julia> InteractiveUtils.?("somefile")[TAB]
+edit(path::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:197
+less(file::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:266
 ```
 
-Fields for output from functions can also be completed:
+This listed methods in the `InteractiveUtils` module that can be called on a string.
+By default, this excludes methods where all arguments are typed as `Any`,
+but you can see those too by holding down SHIFT-TAB instead of TAB:
 
 ```julia-repl
-julia> split("","")[1].[TAB]
-lastindex  offset  string
+julia> InteractiveUtils.?("somefile")[SHIFT-TAB]
+apropos(string) in REPL at REPL/src/docview.jl:796
+clipboard(x) in InteractiveUtils at InteractiveUtils/src/clipboard.jl:64
+code_llvm(f) in InteractiveUtils at InteractiveUtils/src/codeview.jl:221
+code_native(f) in InteractiveUtils at InteractiveUtils/src/codeview.jl:243
+edit(path::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:197
+edit(f) in InteractiveUtils at InteractiveUtils/src/editless.jl:225
+eval(x) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:3
+include(x) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:3
+less(file::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:266
+less(f) in InteractiveUtils at InteractiveUtils/src/editless.jl:274
+report_bug(kind) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:391
+separate_kwargs(args...; kwargs...) in InteractiveUtils at InteractiveUtils/src/macros.jl:7
 ```
 
-The completion of fields for output from functions uses type inference, and it can only suggest
-fields if the function is type stable.
+You can also use ` ?("somefile")[TAB]`  and look across all modules, but the method lists can be long.
 
-Dictionary keys can also be tab completed:
+By omitting the closing parenthesis, you can include functions that might require additional arguments:
 
 ```julia-repl
-julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3)
-Dict{String,Int64} with 3 entries:
-  "qwer2" => 2
-  "asdf"  => 3
-  "qwer1" => 1
-
-julia> foo["q[TAB]
-
-"qwer1" "qwer2"
-julia> foo["qwer
+julia> using Mmap
+
+help?> Mmap.?("file",[TAB]
+Mmap.Anonymous(name::String, readonly::Bool, create::Bool) in Mmap at Mmap/src/Mmap.jl:16
+mmap(file::AbstractString) in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}) where T<:Array in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}) where {T<:Array, N} in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}, offset::Integer; grow, shared) where {T<:Array, N} in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, len::Integer) where T<:Array in Mmap at Mmap/src/Mmap.jl:251
+mmap(file::AbstractString, ::Type{T}, len::Integer, offset::Integer; grow, shared) where T<:Array in Mmap at Mmap/src/Mmap.jl:251
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}) where {T<:BitArray, N} in Mmap at Mmap/src/Mmap.jl:316
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}, offset::Integer; grow, shared) where {T<:BitArray, N} in Mmap at Mmap/src/Mmap.jl:316
+mmap(file::AbstractString, ::Type{T}, len::Integer) where T<:BitArray in Mmap at Mmap/src/Mmap.jl:322
+mmap(file::AbstractString, ::Type{T}, len::Integer, offset::Integer; grow, shared) where T<:BitArray in Mmap at Mmap/src/Mmap.jl:322
 ```
 
 ## Customizing Colors
@@ -586,7 +669,7 @@ v  [ ] blueberry
 
 can instead be rendered with Unicode selection and navigation characters with
 
-```julia
+```julia-repl
 julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode);
 
 julia> request(menu)
@@ -600,7 +683,7 @@ julia> request(menu)
 
 More fine-grained configuration is also possible:
 
-```julia
+```julia-repl
 julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode, checked="YEP!", unchecked="NOPE", cursor='⧐');
 
 julia> request(menu)
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index 9a6160c960fe3..5f5d81ead637e 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -106,6 +106,11 @@ mutable struct PromptState <: ModeState
     refresh_wait::Union{Timer,Nothing}
 end
 
+struct Modifiers
+    shift::Bool
+end
+Modifiers() = Modifiers(false)
+
 options(s::PromptState) =
     if isdefined(s.p, :repl) && isdefined(s.p.repl, :options)
         # we can't test isa(s.p.repl, LineEditREPL) as LineEditREPL is defined
@@ -321,7 +326,7 @@ function show_completions(s::PromptState, completions::Vector{String})
         for col = 0:num_cols
             idx = row + col*entries_per_col
             if idx <= length(completions)
-                cmove_col(terminal(s), (colmax+2)*col)
+                cmove_col(terminal(s), (colmax+2)*col+1)
                 print(terminal(s), completions[idx])
             end
         end
@@ -1907,6 +1912,10 @@ mode(s::PromptState) = s.p          # ::Prompt
 mode(s::SearchState) = @assert false
 mode(s::PrefixSearchState) = s.histprompt.parent_prompt   # ::Prompt
 
+setmodifiers!(s::MIState, m::Modifiers) = setmodifiers!(mode(s), m)
+setmodifiers!(p::Prompt, m::Modifiers) = setmodifiers!(p.complete, m)
+setmodifiers!(c) = nothing
+
 # Search Mode completions
 function complete_line(s::SearchState, repeats)
     completions, partial, should_complete = complete_line(s.histprompt.complete, s)
@@ -2174,6 +2183,11 @@ function edit_tab(s::MIState, jump_spaces::Bool=false, delete_trailing::Bool=jum
     return refresh_line(s)
 end
 
+function shift_tab_completion(s::MIState)
+    setmodifiers!(s, Modifiers(true))
+    return complete_line(s)
+end
+
 # return true iff the content of the buffer is modified
 # return false when only the position changed
 function edit_insert_tab(buf::IOBuffer, jump_spaces::Bool=false, delete_trailing::Bool=jump_spaces)
@@ -2209,6 +2223,8 @@ const default_keymap =
 AnyDict(
     # Tab
     '\t' => (s::MIState,o...)->edit_tab(s, true),
+    # Shift-tab
+    "\e[Z" => (s::MIState,o...)->shift_tab_completion(s),
     # Enter
     '\r' => (s::MIState,o...)->begin
         if on_enter(s) || (eof(buffer(s)) && s.key_repeats > 1)
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index a661ffa218e97..a450e2eb7c1bd 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -55,6 +55,7 @@ import ..LineEdit:
     history_last,
     history_search,
     accept_result,
+    setmodifiers!,
     terminal,
     MIState,
     PromptState,
@@ -167,6 +168,7 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend)
 end
 
 function check_for_missing_packages_and_run_hooks(ast)
+    isa(ast, Expr) || return
     mods = modules_to_be_loaded(ast)
     filter!(mod -> isnothing(Base.identify_package(String(mod))), mods) # keep missing modules
     if !isempty(mods)
@@ -176,15 +178,18 @@ function check_for_missing_packages_and_run_hooks(ast)
     end
 end
 
-function modules_to_be_loaded(ast, mods = Symbol[])
+function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+    ast.head == :quote && return mods # don't search if it's not going to be run during this eval
     if ast.head in [:using, :import]
         for arg in ast.args
-            if first(arg.args) isa Symbol # i.e. `Foo`
-                if first(arg.args) != :. # don't include local imports
-                    push!(mods, first(arg.args))
+            arg = arg::Expr
+            arg1 = first(arg.args)
+            if arg1 isa Symbol # i.e. `Foo`
+                if arg1 != :. # don't include local imports
+                    push!(mods, arg1)
                 end
             else # i.e. `Foo: bar`
-                push!(mods, first(first(arg.args).args))
+                push!(mods, first((arg1::Expr).args))
             end
         end
     end
@@ -192,9 +197,8 @@ function modules_to_be_loaded(ast, mods = Symbol[])
         arg isa Expr && modules_to_be_loaded(arg, mods)
     end
     filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
-    return mods
+    return unique(mods)
 end
-modules_to_be_loaded(::Nothing) = Symbol[] # comments are parsed as nothing
 
 """
     start_repl_backend(repl_channel::Channel, response_channel::Channel)
@@ -280,6 +284,8 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
         try
             Base.sigatomic_end()
             if iserr
+                val = Base.scrub_repl_backtrace(val)
+                Base.istrivialerror(val) || ccall(:jl_set_global, Cvoid, (Any, Any, Any), Main, :err, val)
                 Base.invokelatest(Base.display_error, errio, val)
             else
                 if val !== nothing && show_value
@@ -301,7 +307,9 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
                 println(errio) # an error during printing is likely to leave us mid-line
                 println(errio, "SYSTEM (REPL): showing an error caused an error")
                 try
-                    Base.invokelatest(Base.display_error, errio, current_exceptions())
+                    excs = Base.scrub_repl_backtrace(current_exceptions())
+                    ccall(:jl_set_global, Cvoid, (Any, Any, Any), Main, :err, excs)
+                    Base.invokelatest(Base.display_error, errio, excs)
                 catch e
                     # at this point, only print the name of the type as a Symbol to
                     # minimize the possibility of further errors.
@@ -470,16 +478,30 @@ LineEditREPL(t::TextTerminal, hascolor::Bool, envcolors::Bool=false) =
         false, false, false, envcolors
     )
 
-mutable struct REPLCompletionProvider <: CompletionProvider end
+mutable struct REPLCompletionProvider <: CompletionProvider
+    modifiers::LineEdit.Modifiers
+end
+REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
+setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers = m
+
 beforecursor(buf::IOBuffer) = String(buf.data[1:buf.ptr-1])
 
 function complete_line(c::REPLCompletionProvider, s::PromptState)
     partial = beforecursor(s.input_buffer)
     full = LineEdit.input_string(s)
     ret, range, should_complete = completions(full, lastindex(partial))
+    if !c.modifiers.shift
+        # Filter out methods where all arguments are `Any`
+        filter!(ret) do c
+            isa(c, REPLCompletions.MethodCompletion) || return true
+            sig = Base.unwrap_unionall(c.method.sig)::DataType
+            return !all(T -> T === Any || T === Vararg{Any}, sig.parameters[2:end])
+        end
+    end
+    c.modifiers = LineEdit.Modifiers()
     return unique!(map(completion_text, ret)), partial[range], should_complete
 end
 
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index 44b3e6a3a4158..fb54fa4b73d43 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -144,7 +144,7 @@ function complete_symbol(sym::String, ffunc, context_module::Module=Main)
             if isa(b, Module)
                 mod = b
                 lookup_module = true
-            elseif Base.isstructtype(typeof(b))
+            else
                 lookup_module = false
                 t = typeof(b)
             end
@@ -478,17 +478,59 @@ function get_type(sym, fn::Module)
     return found ? Core.Typeof(val) : Any, found
 end
 
+function get_type(T, found::Bool, default_any::Bool)
+    return found ? T :
+           default_any ? Any : throw(ArgumentError("argument not found"))
+end
+
 # Method completion on function call expression that look like :(max(1))
 function complete_methods(ex_org::Expr, context_module::Module=Main)
     func, found = get_value(ex_org.args[1], context_module)::Tuple{Any,Bool}
     !found && return Completion[]
 
-    funargs = ex_org.args[2:end]
-    # handle broadcasting, but only handle number of arguments instead of
-    # argument types
+    args_ex, kwargs_ex = complete_methods_args(ex_org.args[2:end], ex_org, context_module, true, true)
+
+    out = Completion[]
+    complete_methods!(out, func, args_ex, kwargs_ex)
+    return out
+end
+
+function complete_any_methods(ex_org::Expr, callee_module::Module, context_module::Module, moreargs::Bool)
+    out = Completion[]
+    args_ex, kwargs_ex = try
+        complete_methods_args(ex_org.args[2:end], ex_org, context_module, false, false)
+    catch
+        return out
+    end
+
+    for name in names(callee_module; all=true)
+        if !Base.isdeprecated(callee_module, name) && isdefined(callee_module, name)
+            func = getfield(callee_module, name)
+            if !isa(func, Module)
+                complete_methods!(out, func, args_ex, kwargs_ex, moreargs)
+            elseif callee_module === Main::Module && isa(func, Module)
+                callee_module2 = func
+                for name in names(callee_module2)
+                    if isdefined(callee_module2, name)
+                        func = getfield(callee_module, name)
+                        if !isa(func, Module)
+                            complete_methods!(out, func, args_ex, kwargs_ex, moreargs)
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    return out
+end
+
+function complete_methods_args(funargs::Vector{Any}, ex_org::Expr, context_module::Module, default_any::Bool, allow_broadcasting::Bool)
     args_ex = Any[]
     kwargs_ex = Pair{Symbol,Any}[]
-    if ex_org.head === :. && ex_org.args[2] isa Expr
+    if allow_broadcasting && ex_org.head === :. && ex_org.args[2] isa Expr
+        # handle broadcasting, but only handle number of arguments instead of
+        # argument types
         for _ in (ex_org.args[2]::Expr).args
             push!(args_ex, Any)
         end
@@ -497,18 +539,20 @@ function complete_methods(ex_org::Expr, context_module::Module=Main)
             if isexpr(ex, :parameters)
                 for x in ex.args
                     n, v = isexpr(x, :kw) ? (x.args...,) : (x, x)
-                    push!(kwargs_ex, n => first(get_type(v, context_module)))
+                    push!(kwargs_ex, n => get_type(get_type(v, context_module)..., default_any))
                 end
             elseif isexpr(ex, :kw)
                 n, v = (ex.args...,)
-                push!(kwargs_ex, n => first(get_type(v, context_module)))
+                push!(kwargs_ex, n => get_type(get_type(v, context_module)..., default_any))
             else
-                push!(args_ex, first(get_type(ex, context_module)))
+                push!(args_ex, get_type(get_type(ex, context_module)..., default_any))
             end
         end
     end
+    return args_ex, kwargs_ex
+end
 
-    out = Completion[]
+function complete_methods!(out::Vector{Completion}, @nospecialize(func), args_ex::Vector{Any}, kwargs_ex::Vector{Pair{Symbol,Any}}, moreargs::Bool=true)
     ml = methods(func)
     # Input types and number of arguments
     if isempty(kwargs_ex)
@@ -525,6 +569,9 @@ function complete_methods(ex_org::Expr, context_module::Module=Main)
         ml = methods(kwfunc)
         func = kwfunc
     end
+    if !moreargs
+        na = typemax(Int)
+    end
 
     for (method::Method, orig_method) in zip(ml, orig_ml)
         ms = method.sig
@@ -534,7 +581,6 @@ function complete_methods(ex_org::Expr, context_module::Module=Main)
             push!(out, MethodCompletion(func, t_in, method, orig_method))
         end
     end
-    return out
 end
 
 include("latex_symbols.jl")
@@ -652,6 +698,36 @@ function completions(string::String, pos::Int, context_module::Module=Main)
     partial = string[1:pos]
     inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false))
 
+    # ?(x, y)TAB lists methods you can call with these objects
+    # ?(x, y TAB lists methods that take these objects as the first two arguments
+    # MyModule.?(x, y)TAB restricts the search to names in MyModule
+    rexm = match(r"(\w+\.|)\?\((.*)$", partial)
+    if rexm !== nothing
+        # Get the module scope
+        if isempty(rexm.captures[1])
+            callee_module = context_module
+        else
+            modname = Symbol(rexm.captures[1][1:end-1])
+            if isdefined(context_module, modname)
+                callee_module = getfield(context_module, modname)
+                if !isa(callee_module, Module)
+                    callee_module = context_module
+                end
+            else
+                callee_module = context_module
+            end
+        end
+        moreargs = !endswith(rexm.captures[2], ')')
+        callstr = "_(" * rexm.captures[2]
+        if moreargs
+            callstr *= ')'
+        end
+        ex_org = Meta.parse(callstr, raise=false, depwarn=false)
+        if isa(ex_org, Expr)
+            return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs), (0:length(rexm.captures[1])+1) .+ rexm.offset, false
+        end
+    end
+
     # if completing a key in a Dict
     identifier, partial_key, loc = dict_identifier_key(partial, inc_tag, context_module)
     if identifier !== nothing
@@ -828,9 +904,15 @@ end
 function UndefVarError_hint(io::IO, ex::UndefVarError)
     var = ex.var
     if var === :or
-        print("\nsuggestion: Use `||` for short-circuiting boolean OR.")
+        print(io, "\nsuggestion: Use `||` for short-circuiting boolean OR.")
     elseif var === :and
-        print("\nsuggestion: Use `&&` for short-circuiting boolean AND.")
+        print(io, "\nsuggestion: Use `&&` for short-circuiting boolean AND.")
+    elseif var === :help
+        println(io)
+        # Show friendly help message when user types help or help() and help is undefined
+        show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
+    elseif var === :quit
+        print(io, "\nsuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
     end
 end
 
diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index ee5bd4d426795..f01df5c389324 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -203,9 +203,9 @@ function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Unio
             lastoption = numoptions(m)
             c = readkey(term.in_stream)
 
-            if c == Int(ARROW_UP) || c == Int('k')
+            if c == Int(ARROW_UP)
                 cursor[] = move_up!(m, cursor[], lastoption)
-            elseif c == Int(ARROW_DOWN) || c == Int('j')
+            elseif c == Int(ARROW_DOWN)
                 cursor[] = move_down!(m, cursor[], lastoption)
             elseif c == Int(PAGE_UP)
                 cursor[] = page_up!(m, cursor[], lastoption)
@@ -217,7 +217,7 @@ function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Unio
             elseif c == Int(END_KEY)
                 cursor[] = lastoption
                 m.pageoffset = lastoption - m.pagesize
-            elseif c == 13 || c == Int(' ') # <enter> or <space>
+            elseif c == 13 # <enter>
                 # will break if pick returns true
                 pick(m, cursor[]) && break
             elseif c == UInt32('q')
diff --git a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
index bf686dec28d19..bcca3bd8d851e 100644
--- a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
@@ -8,7 +8,7 @@ A menu that allows a user to select a multiple options from a list.
 
 # Sample Output
 
-```julia
+```julia-repl
 julia> request(MultiSelectMenu(options))
 Select the fruits you like:
 [press: d=done, a=all, n=none]
diff --git a/stdlib/REPL/src/TerminalMenus/Pager.jl b/stdlib/REPL/src/TerminalMenus/Pager.jl
index af49c3aa63440..c823a5dedd1ba 100644
--- a/stdlib/REPL/src/TerminalMenus/Pager.jl
+++ b/stdlib/REPL/src/TerminalMenus/Pager.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 mutable struct Pager{C} <: _ConfiguredMenu{C}
     lines::Vector{String}
     pagesize::Int
diff --git a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
index 2060af2e14623..32a6373b719d7 100644
--- a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
@@ -8,7 +8,7 @@ A menu that allows a user to select a single option from a list.
 
 # Sample Output
 
-```julia
+```julia-repl
 julia> request(RadioMenu(options, pagesize=4))
 Choose your favorite fruit:
 ^  grape
@@ -31,7 +31,9 @@ end
 
 """
 
-    RadioMenu(options::Array{String,1}; pagesize::Int=10, kwargs...)
+    RadioMenu(options::Array{String,1}; pagesize::Int=10,
+                                        keybindings::Vector{Char}=Char[],
+                                        kwargs...)
 
 Create a RadioMenu object. Use `request(menu::RadioMenu)` to get user input.
 `request()` returns an `Int` which is the index of the option selected by the
@@ -41,8 +43,12 @@ user.
 
   - `options::Array{String, 1}`: Options to be displayed
   - `pagesize::Int=10`: The number of options to be displayed at one time, the menu will scroll if length(options) > pagesize
+  - `keybindings::Vector{Char}=Char[]`: Shortcuts to pick corresponding entry from `options`
 
 Any additional keyword arguments will be passed to [`TerminalMenus.Config`](@ref).
+
+!!! compat "Julia 1.8"
+    The `keybindings` argument requires Julia 1.8 or later.
 """
 function RadioMenu(options::Array{String,1}; pagesize::Int=10, warn::Bool=true, keybindings::Vector{Char}=Char[], kwargs...)
     length(options) < 1 && error("RadioMenu must have at least one option")
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index 2e5fc361d8cbc..a9ecd1e4bb1e4 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -402,10 +402,18 @@ function symbol_latex(s::String)
 
     return get(symbols_latex, s, "")
 end
-function repl_latex(io::IO, s::String)
-    # decompose NFC-normalized identifier to match tab-completion input
-    s = normalize(s, :NFD)
-    latex = symbol_latex(s)
+function repl_latex(io::IO, s0::String)
+    # This has rampant `Core.Box` problems (#15276). Use the tricks of
+    # https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-captured
+    # We're changing some of the values so the `let` trick isn't applicable.
+    s::String = s0
+    latex::String = symbol_latex(s)
+    if isempty(latex)
+        # Decompose NFC-normalized identifier to match tab-completion
+        # input if the first search came up empty.
+        s = normalize(s, :NFD)
+        latex = symbol_latex(s)
+    end
     if !isempty(latex)
         print(io, "\"")
         printstyled(io, s, color=:cyan)
@@ -416,7 +424,7 @@ function repl_latex(io::IO, s::String)
         print(io, "\"")
         printstyled(io, s, color=:cyan)
         print(io, "\" can be typed by ")
-        state = '\0'
+        state::Char = '\0'
         with_output_color(:cyan, io) do io
             for c in s
                 cstr = string(c)
@@ -792,6 +800,11 @@ stripmd(x::Markdown.Table) =
 Search available docstrings for entries containing `pattern`.
 
 When `pattern` is a string, case is ignored. Results are printed to `io`.
+
+`apropos` can be called from the help mode in the REPL by wrapping the query in double quotes:
+```
+help?> "pattern"
+```
 """
 apropos(string) = apropos(stdout, string)
 apropos(io::IO, string) = apropos(io, Regex("\\Q$string", "i"))
diff --git a/stdlib/REPL/test/TerminalMenus/radio_menu.jl b/stdlib/REPL/test/TerminalMenus/radio_menu.jl
index 28a19fa7d9ac0..696be1324a8e3 100644
--- a/stdlib/REPL/test/TerminalMenus/radio_menu.jl
+++ b/stdlib/REPL/test/TerminalMenus/radio_menu.jl
@@ -50,3 +50,5 @@ radio_menu = RadioMenu(["single option"], charset=:ascii)
 @test simulate_input(1, radio_menu, :up, :up, :down, :up, :enter)
 radio_menu = RadioMenu(string.(1:3), pagesize=1, charset=:ascii)
 @test simulate_input(3, radio_menu, :down, :down, :down, :down, :enter)
+radio_menu = RadioMenu(["apple", "banana", "cherry"]; keybindings=collect('a':'c'), charset=:ascii)
+@test simulate_input(2, radio_menu, 'b')
diff --git a/stdlib/REPL/test/TerminalMenus/runtests.jl b/stdlib/REPL/test/TerminalMenus/runtests.jl
index ac577dfd9ab27..62a91cc0a1256 100644
--- a/stdlib/REPL/test/TerminalMenus/runtests.jl
+++ b/stdlib/REPL/test/TerminalMenus/runtests.jl
@@ -6,22 +6,10 @@ using Test
 
 function simulate_input(expected, menu::TerminalMenus.AbstractMenu, keys...;
                         kwargs...)
-    keydict = Dict(:up => "\e[A",
-                   :down => "\e[B",
-                   :enter => "\r")
-    vimdict = Dict(:up => "k",
-                   :down => "j",
-                   :enter => " ")
-    errs = []
-    got = _simulate_input(keydict, deepcopy(menu), keys...; kwargs...)
-    got == expected || push!(errs, :arrows => got)
-    got = _simulate_input(vimdict, menu, keys...; kwargs...)
-    got == expected || push!(errs, :vim => got)
-    isempty(errs) || return errs
-end
+    keydict =  Dict(:up => "\e[A",
+                    :down => "\e[B",
+                    :enter => "\r")
 
-function _simulate_input(keydict, menu::TerminalMenus.AbstractMenu, keys...;
-                         kwargs...)
     for key in keys
         if isa(key, Symbol)
             write(stdin.buffer, keydict[key])
@@ -30,7 +18,7 @@ function _simulate_input(keydict, menu::TerminalMenus.AbstractMenu, keys...;
         end
     end
 
-    request(menu; suppress_output=true, kwargs...)
+    request(menu; suppress_output=true, kwargs...) == expected
 end
 
 include("radio_menu.jl")
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 8c4ee75850fbf..c2e1d45dd7af5 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -885,13 +885,13 @@ end
 
 # Test containers in error messages are limited #18726
 let io = IOBuffer()
-    Base.display_error(io,
-        try
+    Base.display_error(io, Base.ExceptionStack(Any[(exception =
+        (try
             [][trues(6000)]
             @assert false
         catch e
             e
-        end, [])
+        end), backtrace = [])]))
     @test length(String(take!(io))) < 1500
 end
 
@@ -1332,6 +1332,8 @@ end
         @test mods == [:Foo, :Bar]
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo, Bar"))
         @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo.bar, Foo.baz"))
+        @test mods == [:Foo]
 
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo end"))
         @test mods == [:Foo]
@@ -1355,9 +1357,51 @@ end
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Core"))
         @test isempty(mods)
 
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("# comment"))
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line(":(using Foo)"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("ex = :(using Foo)"))
         @test isempty(mods)
+
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("Foo"))
         @test isempty(mods)
     end
 end
+
+# err should reprint error if deeper than top-level
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    # initialize `err` to `nothing`
+    write(stdin_write, "global err = nothing\n")
+    readline(stdout_read)
+    readline(stdout_read) == "\e[0m"
+    readuntil(stdout_read, "julia> ", keep=true)
+    # generate top-level error
+    write(stdin_write, "foobar\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    @test readline(stdout_read) == ""
+    readuntil(stdout_read, "julia> ", keep=true)
+    # check that top-level error did not change `err`
+    write(stdin_write, "err\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0m"
+    readuntil(stdout_read, "julia> ", keep=true)
+    # generate deeper error
+    write(stdin_write, "foo() = foobar\n")
+    readline(stdout_read)
+    readuntil(stdout_read, "julia> ", keep=true)
+    write(stdin_write, "foo()\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    readuntil(stdout_read, "julia> ", keep=true)
+    # check that deeper error did set `err`
+    write(stdin_write, "err\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0m1-element ExceptionStack:"
+    @test readline(stdout_read) == "UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "Stacktrace:"
+    write(stdin_write, '\x04')
+    Base.wait(repltask)
+end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index 545e81a27968d..671afd6c30073 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -32,6 +32,10 @@ let ex = quote
             :()
         end
 
+        primitive type NonStruct 8 end
+        Base.propertynames(::NonStruct) = (:a, :b, :c)
+        x = reinterpret(NonStruct, 0x00)
+
         # Support non-Dict AbstractDicts, #19441
         mutable struct CustomDict{K, V} <: AbstractDict{K, V}
             mydict::Dict{K, V}
@@ -64,6 +68,8 @@ let ex = quote
         test6()=[a, a]
         test7() = rand(Bool) ? 1 : 1.0
         test8() = Any[1][1]
+        test9(x::Char) = pass
+        test9(x::Char, i::Int) = pass
         kwtest(; x=1, y=2, w...) = pass
         kwtest2(a; x=1, y=2, w...) = pass
 
@@ -516,6 +522,34 @@ for s in ("CompletionFoo.kwtest2(1; x=1,",
     @test occursin("a; x, y, w...", c[1])
 end
 
+#################################################################
+
+# method completion with `?` (arbitrary method with given argument types)
+let s = "CompletionFoo.?([1,2,3], 2.0)"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test(x::AbstractArray{T}, y) where T<:Real", str), c)
+    @test  any(str->occursin("test(args...)", str), c)
+    @test !any(str->occursin("test3(x::AbstractArray{Int", str), c)
+    @test !any(str->occursin("test4", str), c)
+end
+
+let s = "CompletionFoo.?('c')"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test9(x::Char)", str), c)
+    @test !any(str->occursin("test9(x::Char, i::Int", str), c)
+end
+
+let s = "CompletionFoo.?('c'"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test9(x::Char)", str), c)
+    @test  any(str->occursin("test9(x::Char, i::Int", str), c)
+end
+
+#################################################################
+
 # Test of inference based getfield completion
 let s = "(1+2im)."
     c,r = test_complete(s)
@@ -1115,6 +1149,11 @@ let s = "test_dict[\"ab"
     @test c == Any["\"abc\"", "\"abcd\""]
 end
 
+let s = "CompletionFoo.x."
+    c, r = test_complete(s)
+    @test "a" in c
+end
+
 # https://github.com/JuliaLang/julia/issues/27184
 let
     (test_complete("@noexist."); @test true)
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
index 6958e618d3ea8..199dcab940c86 100644
--- a/stdlib/Random/Project.toml
+++ b/stdlib/Random/Project.toml
@@ -3,6 +3,7 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [deps]
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md
index f5508781ef27b..059cd8f600e7d 100644
--- a/stdlib/Random/docs/src/index.md
+++ b/stdlib/Random/docs/src/index.md
@@ -8,9 +8,12 @@ Random number generation in Julia uses the [Xoshiro256++](https://prng.di.unimi.
 by default, with per-`Task` state.
 Other RNG types can be plugged in by inheriting the `AbstractRNG` type; they can then be used to
 obtain multiple streams of random numbers.
-Besides the default `TaskLocalRNG` type, the `Random` package also provides `MersenneTwister`,
-`RandomDevice` (which exposes OS-provided entropy), and `Xoshiro` (for explicitly-managed
-Xoshiro256++ streams).
+
+The PRNGs (pseudorandom number generators) exported by the `Random` package are:
+* `TaskLocalRNG`: a token that represents use of the currently active Task-local stream, deterministically seeded from the parent task, or by `RandomDevice` (with system randomness) at program start
+* `Xoshiro`: generates a high-quality stream of random numbers with a small state vector and high performance using the Xoshiro256++ algorithm
+* `RandomDevice`: for OS-provided entropy. This may be used for cryptographically secure random numbers (CS(P)RNG).
+* `MersenneTwister`: an alternate high-quality PRNG which was the default in older versions of Julia, and is also quite fast, but requires much more space to store the state vector and generate a random sequence.
 
 Most functions related to random generation accept an optional `AbstractRNG` object as first argument.
 Some also accept dimension specifications `dims...` (which can also be given as a tuple) to generate
@@ -151,22 +154,22 @@ Scalar and array methods for `Die` now work as expected:
 
 ```jldoctest Die; setup = :(Random.seed!(1))
 julia> rand(Die)
-Die(6)
+Die(5)
 
 julia> rand(MersenneTwister(0), Die)
 Die(11)
 
 julia> rand(Die, 3)
 3-element Vector{Die}:
+ Die(9)
  Die(15)
- Die(19)
- Die(4)
+ Die(14)
 
 julia> a = Vector{Die}(undef, 3); rand!(a)
 3-element Vector{Die}:
+ Die(19)
+ Die(7)
  Die(17)
- Die(20)
- Die(15)
 ```
 
 #### A simple sampler without pre-computed data
@@ -183,9 +186,9 @@ julia> rand(Die(4))
 
 julia> rand(Die(4), 3)
 3-element Vector{Any}:
+ 2
+ 3
  3
- 4
- 1
 ```
 
 Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`.
diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
index c483296fe3af1..13e5a8c778b16 100644
--- a/stdlib/Random/src/RNGs.jl
+++ b/stdlib/Random/src/RNGs.jl
@@ -13,6 +13,9 @@ if Sys.iswindows()
         rand!(rd, rd.buffer)
         @inbounds return rd.buffer[1] % sp[]
     end
+
+    show(io::IO, ::RandomDevice) = print(io, RandomDevice, "()")
+
 else # !windows
     struct RandomDevice <: AbstractRNG
         unlimited::Bool
@@ -23,16 +26,29 @@ else # !windows
     rand(rd::RandomDevice, sp::SamplerBoolBitInteger) = read(getfile(rd), sp[])
     rand(rd::RandomDevice, ::SamplerType{Bool}) = read(getfile(rd), UInt8) % Bool
 
+    mutable struct FileRef
+        @atomic file::Union{IOStream, Nothing}
+    end
+
+    const DEV_RANDOM  = FileRef(nothing)
+    const DEV_URANDOM = FileRef(nothing)
+
     function getfile(rd::RandomDevice)
-        devrandom = rd.unlimited ? DEV_URANDOM : DEV_RANDOM
-        # TODO: there is a data-race, this can leak up to nthreads() copies of the file descriptors,
-        # so use a "thread-once" utility once available
-        isassigned(devrandom) || (devrandom[] = open(rd.unlimited ? "/dev/urandom" : "/dev/random"))
-        devrandom[]
+        ref = rd.unlimited ? DEV_URANDOM : DEV_RANDOM
+        fd = ref.file
+        if fd === nothing
+            fd = open(rd.unlimited ? "/dev/urandom" : "/dev/random")
+            old, ok = @atomicreplace ref.file nothing => fd
+            if !ok
+                close(fd)
+                fd = old::IOStream
+            end
+        end
+        return fd
     end
 
-    const DEV_RANDOM  = Ref{IOStream}()
-    const DEV_URANDOM = Ref{IOStream}()
+    show(io::IO, rd::RandomDevice) =
+        print(io, RandomDevice, rd.unlimited ? "()" : "(unlimited=false)")
 
 end # os-test
 
@@ -376,6 +392,7 @@ copy!(::_GLOBAL_RNG, src::Xoshiro) = copy!(default_rng(), src)
 copy(::_GLOBAL_RNG) = copy(default_rng())
 
 GLOBAL_SEED = 0
+set_global_seed!(seed) = global GLOBAL_SEED = seed
 
 function seed!(::_GLOBAL_RNG, seed=rand(RandomDevice(), UInt64, 4))
     global GLOBAL_SEED = seed
@@ -384,7 +401,7 @@ end
 
 seed!(rng::_GLOBAL_RNG, ::Nothing) = seed!(rng)  # to resolve ambiguity
 
-seed!(seed::Union{Nothing,Integer,Vector{UInt32},Vector{UInt64},NTuple{4,UInt64}}=nothing) =
+seed!(seed::Union{Nothing,Integer,Vector{UInt32},Vector{UInt64}}=nothing) =
     seed!(GLOBAL_RNG, seed)
 
 rng_native_52(::_GLOBAL_RNG) = rng_native_52(default_rng())
@@ -411,6 +428,10 @@ for T in BitInteger_types
 end
 
 function __init__()
+    @static if !Sys.iswindows()
+        @atomic DEV_RANDOM.file = nothing
+        @atomic DEV_URANDOM.file = nothing
+    end
     seed!(GLOBAL_RNG)
 end
 
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 45aa6442eed7e..432fab1638dda 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -13,6 +13,7 @@ include("DSFMT.jl")
 using .DSFMT
 using Base.GMP.MPZ
 using Base.GMP: Limb
+import SHA
 
 using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing
 
@@ -351,7 +352,7 @@ julia> rand(Float64, (2, 3))
     The complexity of `rand(rng, s::Union{AbstractDict,AbstractSet})`
     is linear in the length of `s`, unless an optimized method with
     constant complexity is available, which is the case for `Dict`,
-    `Set` and `BitSet`. For more than a few calls, use `rand(rng,
+    `Set` and dense `BitSet`s. For more than a few calls, use `rand(rng,
     collect(s))` instead, or either `rand(rng, Dict(s))` or `rand(rng,
     Set(s))` as appropriate.
 """
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
index 40da3c5ff1722..a1cb42665329a 100644
--- a/stdlib/Random/src/Xoshiro.jl
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -4,7 +4,8 @@
 # Lots of implementation is shared with TaskLocalRNG
 
 """
-    Xoshiro
+    Xoshiro(seed)
+    Xoshiro()
 
 Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
 Sebastiano Vigna in "Scrambled Linear Pseudorandom Number Generators",
@@ -54,7 +55,7 @@ rng_native_52(::Xoshiro) = UInt64
 @inline function rand(rng::Xoshiro, ::SamplerType{UInt64})
     s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
     tmp = s0 + s3
-    res = tmp << 23 | tmp >> 41
+    res = ((tmp << 23) | (tmp >> 41)) + s0
     t = s1 << 17
     s2 = xor(s2, s0)
     s3 = xor(s3, s1)
@@ -103,7 +104,7 @@ end
     task = current_task()
     s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
     tmp = s0 + s3
-    res = tmp << 23 | tmp >> 41
+    res = ((tmp << 23) | (tmp >> 41)) + s0
     t = s1 << 17
     s2 = xor(s2, s0)
     s3 = xor(s3, s1)
@@ -117,65 +118,21 @@ end
 
 # Shared implementation between Xoshiro and TaskLocalRNG -- seeding
 
-function seed!(x::Union{TaskLocalRNG,Xoshiro})
+function seed!(rng::Union{TaskLocalRNG,Xoshiro})
     # as we get good randomness from RandomDevice, we can skip hashing
-    parent = RandomDevice()
-    # Constants have nothing up their sleeve, see task.c
-    # 0x02011ce34bce797f == hash(UInt(1))|0x01
-    # 0x5a94851fb48a6e05 == hash(UInt(2))|0x01
-    # 0x3688cf5d48899fa7 == hash(UInt(3))|0x01
-    # 0x867b4bb4c42e5661 == hash(UInt(4))|0x01
-    setstate!(x,
-              0x02011ce34bce797f * rand(parent, UInt64),
-              0x5a94851fb48a6e05 * rand(parent, UInt64),
-              0x3688cf5d48899fa7 * rand(parent, UInt64),
-              0x867b4bb4c42e5661 * rand(parent, UInt64))
+    rd = RandomDevice()
+    setstate!(rng, rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64))
 end
 
-function seed!(rng::Union{TaskLocalRNG,Xoshiro}, seed::NTuple{4,UInt64})
-    # TODO: Consider a less ad-hoc construction
-    # We can afford burning a handful of cycles here, and we don't want any
-    # surprises with respect to bad seeds / bad interactions.
-
-    s0 = s  = Base.hash_64_64(seed[1])
-    s1 = s += Base.hash_64_64(seed[2])
-    s2 = s += Base.hash_64_64(seed[3])
-    s3 = s += Base.hash_64_64(seed[4])
-
+function seed!(rng::Union{TaskLocalRNG,Xoshiro}, seed::Union{Vector{UInt32}, Vector{UInt64}})
+    c = SHA.SHA2_256_CTX()
+    SHA.update!(c, reinterpret(UInt8, seed))
+    s0, s1, s2, s3 = reinterpret(UInt64, SHA.digest!(c))
     setstate!(rng, s0, s1, s2, s3)
-
-    rand(rng, UInt64)
-    rand(rng, UInt64)
-    rand(rng, UInt64)
-    rand(rng, UInt64)
-    rng
 end
 
-function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::UInt128)
-    seed0 = seed % UInt64
-    seed1 = (seed>>>64) % UInt64
-    seed!(rng, (seed0, seed1, zero(UInt64), zero(UInt64)))
-end
-seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, UInt128(seed))
-
-function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::AbstractVector{UInt64})
-    if length(seed) > 4
-        throw(ArgumentError("seed should have no more than 256 bits"))
-    end
-    seed0 = length(seed)>0 ? seed[1] : UInt64(0)
-    seed1 = length(seed)>1 ? seed[2] : UInt64(0)
-    seed2 = length(seed)>2 ? seed[3] : UInt64(0)
-    seed3 = length(seed)>3 ? seed[4] : UInt64(0)
-    seed!(rng, (seed0, seed1, seed2, seed3))
-end
+seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, make_seed(seed))
 
-function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::AbstractVector{UInt32})
-    if iseven(length(seed))
-        seed!(rng, reinterpret(UInt64, seed))
-    else
-        seed!(rng, UInt64[reinterpret(UInt64, @view(seed[begin:end-1])); seed[end] % UInt64])
-    end
-end
 
 @inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128})
     first = rand(rng, UInt64)
diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl
index e115533bb6fef..9fb03f9572688 100644
--- a/stdlib/Random/src/XoshiroSimd.jl
+++ b/stdlib/Random/src/XoshiroSimd.jl
@@ -158,7 +158,7 @@ end
 
     i = 0
     while i+8 <= len
-        res = _rotl23(_plus(s0,s3))
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
         unsafe_store!(reinterpret(Ptr{UInt64}, dst + i), f(res, T))
         t = _shl17(s1)
         s2 = _xor(s2, s0)
@@ -170,7 +170,7 @@ end
         i += 8
     end
     if i < len
-        res = _rotl23(_plus(s0,s3))
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
         t = _shl17(s1)
         s2 = _xor(s2, s0)
         s3 = _xor(s3, s1)
@@ -200,7 +200,7 @@ end
 
     i = 0
     while i+8 <= len
-        res = _rotl23(_plus(s0,s3))
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
         shift = 0
         while i+8 <= len && shift < 8
             resLoc = _and(_lshr(res, shift), 0x0101010101010101)
@@ -219,7 +219,7 @@ end
     end
     if i < len
         # we may overgenerate some bytes here, if len mod 64 <= 56 and len mod 8 != 0
-        res = _rotl23(_plus(s0,s3))
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
         resLoc = _and(res, 0x0101010101010101)
         ref = Ref(resLoc)
         ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i)
@@ -245,7 +245,7 @@ end
 
     i = 0
     while i + 8*N <= len
-        res = _rotl23(_plus(s0,s3))
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
         t = _shl17(s1)
         s2 = _xor(s2, s0)
         s3 = _xor(s3, s1)
@@ -264,7 +264,7 @@ end
     msk = ntuple(i->VecElement(0x0101010101010101), Val(N))
     i = 0
     while i + 64*N <= len
-        res = _rotl23(_plus(s0,s3))
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
         t = _shl17(s1)
         s2 = _xor(s2, s0)
         s3 = _xor(s3, s1)
diff --git a/stdlib/Random/src/misc.jl b/stdlib/Random/src/misc.jl
index 674c1d3bfe571..0d6e06c444a09 100644
--- a/stdlib/Random/src/misc.jl
+++ b/stdlib/Random/src/misc.jl
@@ -53,13 +53,13 @@ number generator, see [Random Numbers](@ref).
 # Examples
 ```jldoctest
 julia> Random.seed!(3); randstring()
-"vZmAMp3z"
+"Lxz5hUwn"
 
 julia> randstring(MersenneTwister(3), 'a':'z', 6)
 "ocucay"
 
 julia> randstring("ACGT")
-"CAAACACC"
+"TGCTCCTC"
 ```
 
 !!! note
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
index 1995a9efbc471..c8be4c95cdaf2 100644
--- a/stdlib/Random/test/runtests.jl
+++ b/stdlib/Random/test/runtests.jl
@@ -688,7 +688,7 @@ end
 @testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
     seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
     if RNG == Xoshiro
-        push!(seeds, rand(UInt64, rand(1:4)), Tuple(rand(UInt64, 4)))
+        push!(seeds, rand(UInt64, rand(1:4)))
     end
     for seed=seeds
         m = RNG(seed)
@@ -699,7 +699,7 @@ end
 end
 
 @testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
-    seeds = Any[0, rand(UInt128), rand(UInt64, 4), Tuple(rand(UInt64, 4))]
+    seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
 
     for seed=seeds
         Random.seed!(seed)
@@ -885,28 +885,37 @@ end
 end
 
 @testset "show" begin
-    m = MersenneTwister(123)
-    @test string(m) == "MersenneTwister(123)"
-    Random.jump!(m, 2*big(10)^20)
-    @test string(m) == "MersenneTwister(123, (200000000000000000000, 0))"
-    @test m == MersenneTwister(123, (200000000000000000000, 0))
-    rand(m)
-    @test string(m) == "MersenneTwister(123, (200000000000000000000, 1002, 0, 1))"
-
-    @test m == MersenneTwister(123, (200000000000000000000, 1002, 0, 1))
-    rand(m, Int64)
-    @test string(m) == "MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))"
-    @test m == MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))
-
-    m = MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)
-    @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b)"
-    rand(m, Int64)
-    @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
-    @test m == MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))
-
-    m = MersenneTwister(0); rand(m, Int64); rand(m)
-    @test string(m) == "MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))"
-    @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
+    @testset "MersenneTwister" begin
+        m = MersenneTwister(123)
+        @test string(m) == "MersenneTwister(123)"
+        Random.jump!(m, 2*big(10)^20)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 0))"
+        @test m == MersenneTwister(123, (200000000000000000000, 0))
+        rand(m)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 1002, 0, 1))"
+
+        @test m == MersenneTwister(123, (200000000000000000000, 1002, 0, 1))
+        rand(m, Int64)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))"
+        @test m == MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))
+
+        m = MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)
+        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b)"
+        rand(m, Int64)
+        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
+        @test m == MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))
+
+        m = MersenneTwister(0); rand(m, Int64); rand(m)
+        @test string(m) == "MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))"
+        @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
+    end
+
+    @testset "RandomDevice" begin
+        @test string(RandomDevice()) == "$RandomDevice()"
+        if !Sys.iswindows()
+            @test string(RandomDevice(unlimited=false)) == "$RandomDevice(unlimited=false)"
+        end
+    end
 end
 
 @testset "rand[!] for BigInt/BigFloat" begin
diff --git a/stdlib/SHA.version b/stdlib/SHA.version
new file mode 100644
index 0000000000000..0cac01d958702
--- /dev/null
+++ b/stdlib/SHA.version
@@ -0,0 +1,4 @@
+SHA_BRANCH = master
+SHA_SHA1 = d30dbf6f75b30b84c678208bb93438bd75a5f3ef
+SHA_GIT_URL := https://github.com/JuliaCrypto/SHA.jl.git
+SHA_TAR_URL = https://api.github.com/repos/JuliaCrypto/SHA.jl/tarball/$1
diff --git a/stdlib/SHA/LICENSE.md b/stdlib/SHA/LICENSE.md
deleted file mode 100644
index eec075ce6f2ff..0000000000000
--- a/stdlib/SHA/LICENSE.md
+++ /dev/null
@@ -1,58 +0,0 @@
-The SHA.jl package is licensed under the MIT "Expat" License:
-
-> Copyright (c) 2014: Elliot Saba.
->
-> Permission is hereby granted, free of charge, to any person obtaining
-> a copy of this software and associated documentation files (the
-> "Software"), to deal in the Software without restriction, including
-> without limitation the rights to use, copy, modify, merge, publish,
-> distribute, sublicense, and/or sell copies of the Software, and to
-> permit persons to whom the Software is furnished to do so, subject to
-> the following conditions:
->
-> The above copyright notice and this permission notice shall be
-> included in all copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-This package was inspired by the SHA2 [source code from Minix](https://github.com/minix3/minix/blob/b6cbf7203b080219de306404f8022a65b7884f33/common/lib/libc/hash/sha2/sha2.c), itself released under the BSD license:
-
-> sha2.c
->
-> Version 1.0.0beta1
->
-> Written by Aaron D. Gifford <me@aarongifford.com>
->
-> Copyright 2000 Aaron D. Gifford.  All rights reserved.
->
-> Redistribution and use in source and binary forms, with or without
-> modification, are permitted provided that the following conditions
-> are met:
->
-> 1. Redistributions of source code must retain the above copyright
->    notice, this list of conditions and the following disclaimer.
->
-> 2. Redistributions in binary form must reproduce the above copyright
->    notice, this list of conditions and the following disclaimer in the
->    documentation and/or other materials provided with the distribution.
->
-> 3. Neither the name of the copyright holder nor the names of contributors
->    may be used to endorse or promote products derived from this software
->    without specific prior written permission.
->
-> THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
-> ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-> IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-> ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
-> FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-> DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-> OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-> HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-> LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-> OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
diff --git a/stdlib/SHA/Project.toml b/stdlib/SHA/Project.toml
deleted file mode 100644
index 7fe8ff0d5c192..0000000000000
--- a/stdlib/SHA/Project.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-name = "SHA"
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
diff --git a/stdlib/SHA/docs/src/index.md b/stdlib/SHA/docs/src/index.md
deleted file mode 100644
index 30c88e5cd8757..0000000000000
--- a/stdlib/SHA/docs/src/index.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# SHA
-
-
-Usage is very straightforward:
-```julia
-julia> using SHA
-
-julia> bytes2hex(sha256("test"))
-"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
-```
-
-Each exported function (at the time of this writing, SHA-1, SHA-2 224, 256, 384 and 512, and SHA-3 224, 256, 384 and 512 functions are implemented) takes in either an `AbstractVector{UInt8}`, an `AbstractString` or an `IO` object.  This makes it trivial to checksum a file:
-
-```julia
-shell> cat /tmp/test.txt
-test
-julia> using SHA
-
-julia> open("/tmp/test.txt") do f
-           sha2_256(f)
-       end
-32-element Array{UInt8,1}:
- 0x9f
- 0x86
- 0xd0
- 0x81
- 0x88
- 0x4c
- 0x7d
- 0x65
-    ⋮
- 0x5d
- 0x6c
- 0x15
- 0xb0
- 0xf0
- 0x0a
- 0x08
-```
-
-Due to the colloquial usage of `sha256` to refer to `sha2_256`, convenience functions are provided, mapping `shaxxx()` function calls to `sha2_xxx()`.  For SHA-3, no such colloquialisms exist and the user must use the full `sha3_xxx()` names.
-
-`shaxxx()` takes `AbstractString` and array-like objects (`NTuple` and `Array`) with elements of type `UInt8`.
-
-To create a hash from multiple items the `SHAX_XXX_CTX()` types can be used to create a stateful hash object that
-is updated with `update!` and finalized with `digest!`
-
-```julia
-julia> ctx = SHA2_256_CTX()
-SHA2 256-bit hash state
-
-julia> update!(ctx, b"some data")
-0x0000000000000009
-
-julia> update!(ctx, b"some more data")
-0x0000000000000017
-
-julia> digest!(ctx)
-32-element Vector{UInt8}:
- 0xbe
- 0xcf
- 0x23
- 0xda
- 0xaf
- 0x02
-    ⋮
- 0x25
- 0x52
- 0x19
- 0xa0
- 0x8b
- 0xc5
-```
-
-Note that, at the time of this writing, the SHA3 code is not optimized, and as such is roughly an order of magnitude slower than SHA2.
diff --git a/stdlib/SHA/src/SHA.jl b/stdlib/SHA/src/SHA.jl
deleted file mode 100644
index ac93fdf13f442..0000000000000
--- a/stdlib/SHA/src/SHA.jl
+++ /dev/null
@@ -1,137 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    SHA
-
-The SHA module provides hashing functionality for SHA1, SHA2 and SHA3 algorithms.
-
-They are implemented as both pure functions for hashing single pieces of data,
-or a stateful context which can be updated with the `update!` function and
-finalized with `digest!`.
-
-```julia-repl
-julia> sha1(b"some data")
-20-element Vector{UInt8}:
- 0xba
- 0xf3
-    ⋮
- 0xe3
- 0x56
-
-
-julia> ctx = SHA1_CTX()
-SHA1 hash state
-
-julia> update!(ctx, b"some data")
-0x0000000000000009
-
-julia> digest!(ctx)
-20-element Vector{UInt8}:
- 0xba
- 0xf3
-    ⋮
- 0xe3
- 0x56
-"""
-module SHA
-
-# Export convenience functions, context types, update!() and digest!() functions
-export sha1, SHA1_CTX, update!, digest!
-export sha224, sha256, sha384, sha512
-export sha2_224, sha2_256, sha2_384, sha2_512
-export sha3_224, sha3_256, sha3_384, sha3_512
-export SHA224_CTX, SHA256_CTX, SHA384_CTX, SHA512_CTX
-export SHA2_224_CTX, SHA2_256_CTX, SHA2_384_CTX, SHA2_512_CTX
-export SHA3_224_CTX, SHA3_256_CTX, SHA3_384_CTX, SHA3_512_CTX
-export HMAC_CTX, hmac_sha1
-export hmac_sha224, hmac_sha256, hmac_sha384, hmac_sha512
-export hmac_sha2_224, hmac_sha2_256, hmac_sha2_384, hmac_sha2_512
-export hmac_sha3_224, hmac_sha3_256, hmac_sha3_384, hmac_sha3_512
-
-# data to be hashed:
-const AbstractBytes = Union{AbstractVector{UInt8},NTuple{N,UInt8} where N}
-
-include("constants.jl")
-include("types.jl")
-include("base_functions.jl")
-include("sha1.jl")
-include("sha2.jl")
-include("sha3.jl")
-include("common.jl")
-include("hmac.jl")
-
-# Create data types and convenience functions for each hash implemented
-for (f, ctx) in [(:sha1, :SHA1_CTX),
-                 (:sha224, :SHA224_CTX),
-                 (:sha256, :SHA256_CTX),
-                 (:sha384, :SHA384_CTX),
-                 (:sha512, :SHA512_CTX),
-                 (:sha2_224, :SHA2_224_CTX),
-                 (:sha2_256, :SHA2_256_CTX),
-                 (:sha2_384, :SHA2_384_CTX),
-                 (:sha2_512, :SHA2_512_CTX),
-                 (:sha3_224, :SHA3_224_CTX),
-                 (:sha3_256, :SHA3_256_CTX),
-                 (:sha3_384, :SHA3_384_CTX),
-                 (:sha3_512, :SHA3_512_CTX),]
-    g = Symbol(:hmac_, f)
-
-    @eval begin
-        # Our basic function is to process arrays of bytes
-        """
-            $($f)(data)
-
-        Hash data using the $($f) algorithm and return the resulting digest.
-        See also [`$($ctx)`](@ref).
-        """
-        function $f(data::AbstractBytes)
-            ctx = $ctx()
-            update!(ctx, data)
-            return digest!(ctx)
-
-        """
-            $($g)(key, data)
-
-        Hash data using the $($f) algorithm using the passed key
-        See also [`HMAC_CTX`](@ref).
-        """
-        end
-        function $g(key::Vector{UInt8}, data::AbstractBytes)
-            ctx = HMAC_CTX($ctx(), key)
-            update!(ctx, data)
-            return digest!(ctx)
-        end
-
-        # AbstractStrings are a pretty handy thing to be able to crunch through
-        $f(str::AbstractString) = $f(String(str)) # always crunch UTF-8 repr
-        $f(str::String) = $f(codeunits(str))
-        $g(key::Vector{UInt8}, str::AbstractString) = $g(key, String(str))
-        $g(key::Vector{UInt8}, str::String) = $g(key, codeunits(str))
-
-        """
-            $($f)(io::IO)
-
-        Hash data from io using $($f) algorithm from io.
-        """
-        function $f(io::IO, chunk_size=4*1024)
-            ctx = $ctx()
-            buff = Vector{UInt8}(undef, chunk_size)
-            while !eof(io)
-                num_read = readbytes!(io, buff)
-                update!(ctx, buff, num_read)
-            end
-            return digest!(ctx)
-        end
-        function $g(key::Vector{UInt8}, io::IO, chunk_size=4*1024)
-            ctx = HMAC_CTX($ctx(), key)
-            buff = Vector{UInt8}(undef, chunk_size)
-            while !eof(io)
-                num_read = readbytes!(io, buff)
-                update!(ctx, buff, num_read)
-            end
-            return digest!(ctx)
-        end
-    end
-end
-
-end #module SHA
diff --git a/stdlib/SHA/src/base_functions.jl b/stdlib/SHA/src/base_functions.jl
deleted file mode 100644
index 0b6216fdbdf18..0000000000000
--- a/stdlib/SHA/src/base_functions.jl
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# THE SIX LOGICAL FUNCTIONS
-#
-# Bit shifting and rotation (used by the six SHA-XYZ logical functions:
-#
-#   NOTE:  The naming of R and S appears backwards here (R is a SHIFT and
-#   S is a ROTATION) because the SHA2-256/384/512 description document
-#   (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this
-#   same "backwards" definition.
-
-# 32-bit Rotate-right (equivalent to S32 in SHA-256) and rotate-left
-rrot(b,x,width) = ((x >> b) | (x << (width - b)))
-lrot(b,x,width) = ((x << b) | (x >> (width - b)))
-
-# Shift-right (used in SHA-256, SHA-384, and SHA-512):
-R(b,x)   = (x >> b)
-# 32-bit Rotate-right (used in SHA-256):
-S32(b,x) = rrot(b,x,32)
-# 64-bit Rotate-right (used in SHA-384 and SHA-512):
-S64(b,x) = rrot(b,x,64)
-# 64-bit Rotate-left (used in SHA3)
-L64(b,x) = lrot(b,x,64)
-
-# Two of six logical functions used in SHA-256, SHA-384, and SHA-512:
-Ch(x,y,z)  = ((x & y) ⊻ (~x & z))
-Maj(x,y,z) = ((x & y) ⊻ (x & z) ⊻ (y & z))
-
-# Four of six logical functions used in SHA-256:
-Sigma0_256(x) = (S32(2,  UInt32(x)) ⊻ S32(13, UInt32(x)) ⊻ S32(22, UInt32(x)))
-Sigma1_256(x) = (S32(6,  UInt32(x)) ⊻ S32(11, UInt32(x)) ⊻ S32(25, UInt32(x)))
-sigma0_256(x) = (S32(7,  UInt32(x)) ⊻ S32(18, UInt32(x)) ⊻ R(3 ,   UInt32(x)))
-sigma1_256(x) = (S32(17, UInt32(x)) ⊻ S32(19, UInt32(x)) ⊻ R(10,   UInt32(x)))
-
-# Four of six logical functions used in SHA-384 and SHA-512:
-Sigma0_512(x) = (S64(28, UInt64(x)) ⊻ S64(34, UInt64(x)) ⊻ S64(39, UInt64(x)))
-Sigma1_512(x) = (S64(14, UInt64(x)) ⊻ S64(18, UInt64(x)) ⊻ S64(41, UInt64(x)))
-sigma0_512(x) = (S64( 1, UInt64(x)) ⊻ S64( 8, UInt64(x)) ⊻ R( 7,   UInt64(x)))
-sigma1_512(x) = (S64(19, UInt64(x)) ⊻ S64(61, UInt64(x)) ⊻ R( 6,   UInt64(x)))
-
-# Let's be able to bswap arrays of these types as well
-bswap!(x::Vector{<:Integer}) = map!(bswap, x, x)
diff --git a/stdlib/SHA/src/common.jl b/stdlib/SHA/src/common.jl
deleted file mode 100644
index 5500a372f5fa2..0000000000000
--- a/stdlib/SHA/src/common.jl
+++ /dev/null
@@ -1,116 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Common update and digest functions which work across SHA1 and SHA2
-
-# update! takes in variable-length data, buffering it into blocklen()-sized pieces,
-# calling transform!() when necessary to update the internal hash state.
-"""
-    update!(context, data[, datalen])
-
-Update the SHA context with the bytes in data. See also [`digest!`](@ref) for
-finalizing the hash.
-
-# Examples
-```julia-repl
-julia> ctx = SHA1_CTX()
-SHA1 hash state
-
-julia> update!(ctx, b"data to to be hashed")
-```
-"""
-function update!(context::T, data::U, datalen=length(data)) where {T<:SHA_CTX, U<:AbstractBytes}
-    # We need to do all our arithmetic in the proper bitwidth
-    UIntXXX = typeof(context.bytecount)
-
-    # Process as many complete blocks as possible
-    0 ≤ datalen ≤ length(data) || throw(BoundsError(data, firstindex(data)+datalen-1))
-    len = convert(UIntXXX, datalen)
-    data_idx = convert(UIntXXX, firstindex(data)-1)
-    usedspace = context.bytecount % blocklen(T)
-    while len - data_idx + usedspace >= blocklen(T)
-        # Fill up as much of the buffer as we can with the data given us
-        copyto!(context.buffer, usedspace + 1, data, data_idx + 1, blocklen(T) - usedspace)
-
-        transform!(context)
-        context.bytecount += blocklen(T) - usedspace
-        data_idx += blocklen(T) - usedspace
-        usedspace = convert(UIntXXX, 0)
-    end
-
-    # There is less than a complete block left, but we need to save the leftovers into context.buffer:
-    if len > data_idx
-        copyto!(context.buffer, usedspace + 1, data, data_idx + 1, len - data_idx)
-        context.bytecount += len - data_idx
-    end
-end
-
-# Pad the remainder leaving space for the bitcount
-function pad_remainder!(context::T) where T<:SHA_CTX
-    usedspace = context.bytecount % blocklen(T)
-    # If we have anything in the buffer still, pad and transform that data
-    if usedspace > 0
-        # Begin padding with a 1 bit:
-        context.buffer[usedspace+1] = 0x80
-        usedspace += 1
-
-        # If we have room for the bitcount, then pad up to the short blocklen
-        if usedspace <= short_blocklen(T)
-            for i = 1:(short_blocklen(T) - usedspace)
-                context.buffer[usedspace + i] = 0x0
-            end
-        else
-            # Otherwise, pad out this entire block, transform it, then pad up to short blocklen
-            for i = 1:(blocklen(T) - usedspace)
-                context.buffer[usedspace + i] = 0x0
-            end
-            transform!(context)
-            for i = 1:short_blocklen(T)
-                context.buffer[i] = 0x0
-            end
-        end
-    else
-        # If we don't have anything in the buffer, pad an entire shortbuffer
-        context.buffer[1] = 0x80
-        for i = 2:short_blocklen(T)
-            context.buffer[i] = 0x0
-        end
-    end
-end
-
-
-# Clear out any saved data in the buffer, append total bitlength, and return our precious hash!
-# Note: SHA3_CTX has a more specialised method
-"""
-    digest!(context)
-
-Finalize the SHA context and return the hash as array of bytes (Array{Uint8, 1}).
-
-# Examples
-```julia-repl
-julia> ctx = SHA1_CTX()
-SHA1 hash state
-
-julia> update!(ctx, b"data to to be hashed")
-
-julia> digest!(ctx)
-20-element Array{UInt8,1}:
- 0x83
- 0xe4
- ⋮
- 0x89
- 0xf5
-```
-"""
-function digest!(context::T) where T<:SHA_CTX
-    pad_remainder!(context)
-    # Store the length of the input data (in bits) at the end of the padding
-    bitcount_idx = div(short_blocklen(T), sizeof(context.bytecount)) + 1
-    pbuf = Ptr{typeof(context.bytecount)}(pointer(context.buffer))
-    unsafe_store!(pbuf, bswap(context.bytecount * 8), bitcount_idx)
-
-    # Final transform:
-    transform!(context)
-
-    # Return the digest
-    return reinterpret(UInt8, bswap!(context.state))[1:digestlen(T)]
-end
diff --git a/stdlib/SHA/src/constants.jl b/stdlib/SHA/src/constants.jl
deleted file mode 100644
index 3c5fde92d3863..0000000000000
--- a/stdlib/SHA/src/constants.jl
+++ /dev/null
@@ -1,131 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# SHA initial hash values and constants
-
-# Hash constant words K for SHA1
-const K1 = UInt32[
-    0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
-]
-
-# Initial hash value H for SHA1
-const SHA1_initial_hash_value = UInt32[
-    0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
-]
-
-
-
-# Hash constant words K for SHA-256:
-const K256 = UInt32[
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-]
-
-# Initial hash value H for SHA-224:
-const SHA2_224_initial_hash_value = UInt32[
-    0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
-    0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
-]
-
-
-const SHA2_256_initial_hash_value = UInt32[
-    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
-    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-]
-
-# Hash constant words K for SHA-384 and SHA-512:
-const K512 = UInt64[
-    0x428a2f98d728ae22, 0x7137449123ef65cd,
-    0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
-    0x3956c25bf348b538, 0x59f111f1b605d019,
-    0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
-    0xd807aa98a3030242, 0x12835b0145706fbe,
-    0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
-    0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
-    0x9bdc06a725c71235, 0xc19bf174cf692694,
-    0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
-    0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
-    0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
-    0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
-    0x983e5152ee66dfab, 0xa831c66d2db43210,
-    0xb00327c898fb213f, 0xbf597fc7beef0ee4,
-    0xc6e00bf33da88fc2, 0xd5a79147930aa725,
-    0x06ca6351e003826f, 0x142929670a0e6e70,
-    0x27b70a8546d22ffc, 0x2e1b21385c26c926,
-    0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
-    0x650a73548baf63de, 0x766a0abb3c77b2a8,
-    0x81c2c92e47edaee6, 0x92722c851482353b,
-    0xa2bfe8a14cf10364, 0xa81a664bbc423001,
-    0xc24b8b70d0f89791, 0xc76c51a30654be30,
-    0xd192e819d6ef5218, 0xd69906245565a910,
-    0xf40e35855771202a, 0x106aa07032bbd1b8,
-    0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
-    0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
-    0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
-    0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
-    0x748f82ee5defb2fc, 0x78a5636f43172f60,
-    0x84c87814a1f0ab72, 0x8cc702081a6439ec,
-    0x90befffa23631e28, 0xa4506cebde82bde9,
-    0xbef9a3f7b2c67915, 0xc67178f2e372532b,
-    0xca273eceea26619c, 0xd186b8c721c0c207,
-    0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
-    0x06f067aa72176fba, 0x0a637dc5a2c898a6,
-    0x113f9804bef90dae, 0x1b710b35131c471b,
-    0x28db77f523047d84, 0x32caab7b40c72493,
-    0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
-    0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
-    0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-]
-
-# Initial hash value H for SHA-384
-const SHA2_384_initial_hash_value = UInt64[
-    0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
-    0x9159015a3070dd17, 0x152fecd8f70e5939,
-    0x67332667ffc00b31, 0x8eb44a8768581511,
-    0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4
-]
-
-# Initial hash value H for SHA-512
-const SHA2_512_initial_hash_value = UInt64[
-    0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
-    0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
-    0x510e527fade682d1, 0x9b05688c2b3e6c1f,
-    0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
-]
-
-# Round constants for SHA3 rounds
-const SHA3_ROUND_CONSTS = UInt64[
-    0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-    0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-    0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-    0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-    0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-    0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
-    0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-    0x8000000000008080, 0x0000000080000001, 0x8000000080008008
-]
-
-# Rotation constants for SHA3 rounds
-const SHA3_ROTC = UInt64[
-    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
-    27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
-]
-
-# Permutation indices for SHA3 rounds (+1'ed so as to work with julia's 1-based indexing)
-const SHA3_PILN = Int[
-    11, 8,  12, 18, 19, 4, 6,  17, 9,  22, 25, 5,
-    16, 24, 20, 14, 13, 3, 21, 15, 23, 10,  7,  2
-]
diff --git a/stdlib/SHA/src/hmac.jl b/stdlib/SHA/src/hmac.jl
deleted file mode 100644
index 1ba9b95c6109d..0000000000000
--- a/stdlib/SHA/src/hmac.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-struct HMAC_CTX{CTX<:SHA_CTX}
-    context::CTX
-    outer::Vector{UInt8}
-
-    function HMAC_CTX(ctx::CTX, key::Vector{UInt8}, blocksize::Integer=blocklen(CTX)) where CTX
-        if length(key) > blocksize
-            _ctx = CTX()
-            update!(_ctx, key)
-            key = digest!(_ctx)
-        end
-
-        pad = blocksize - length(key)
-
-        if pad > 0
-            key = [key; fill(0x00, pad)]
-        end
-
-        update!(ctx, key .⊻ 0x36)
-        new{CTX}(ctx, key .⊻ 0x5c)
-    end
-end
-
-function update!(ctx::HMAC_CTX, data, datalen=length(data))
-    update!(ctx.context, data, datalen)
-end
-
-function digest!(ctx::HMAC_CTX{CTX}) where CTX
-    digest = digest!(ctx.context)
-    _ctx = CTX()
-    update!(_ctx, ctx.outer)
-    update!(_ctx, digest)
-    digest!(_ctx)
-end
diff --git a/stdlib/SHA/src/sha1.jl b/stdlib/SHA/src/sha1.jl
deleted file mode 100644
index 71fd55e20fc1e..0000000000000
--- a/stdlib/SHA/src/sha1.jl
+++ /dev/null
@@ -1,95 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Nonlinear functions, in order to encourage inlining, these sadly are not an array of lambdas
-function Round0(b,c,d)
-    return UInt32((b & c) | (~b & d))
-end
-
-function Round1And3(b,c,d)
-    return UInt32(b ⊻ c ⊻ d)
-end
-
-function Round2(b,c,d)
-    return UInt32((b & c) | (b & d) | (c & d))
-end
-
-function transform!(context::SHA1_CTX)
-    # Buffer is 16 elements long, we expand to 80
-    pbuf = buffer_pointer(context)
-    for i in 1:16
-        context.W[i] = bswap(unsafe_load(pbuf, i))
-    end
-
-    # First round of expansions
-    for i in 17:32
-        @inbounds begin
-            context.W[i] = lrot(1, context.W[i-3] ⊻ context.W[i-8] ⊻ context.W[i-14] ⊻ context.W[i-16], 32)
-        end
-    end
-
-    # Second round of expansions (possibly 4-way SIMD-able)
-    for i in 33:80
-        @inbounds begin
-            context.W[i] = lrot(2, context.W[i-6] ⊻ context.W[i-16] ⊻ context.W[i-28] ⊻ context.W[i-32], 32)
-        end
-    end
-
-    # Initialize registers with the previous intermediate values (our state)
-    a = context.state[1]
-    b = context.state[2]
-    c = context.state[3]
-    d = context.state[4]
-    e = context.state[5]
-
-    # Run our rounds, manually separated into the four rounds, unfortunately using an array of lambdas
-    # really kills performance and causes a huge number of allocations, so we make it easy on the compiler
-    for i = 1:20
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round0(b,c,d) + e + context.W[i] + K1[1])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    for i = 21:40
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round1And3(b,c,d) + e + context.W[i] + K1[2])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    for i = 41:60
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round2(b,c,d) + e + context.W[i] + K1[3])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    for i = 61:80
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round1And3(b,c,d) + e + context.W[i] + K1[4])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    context.state[1] += a
-    context.state[2] += b
-    context.state[3] += c
-    context.state[4] += d
-    context.state[5] += e
-end
diff --git a/stdlib/SHA/src/sha2.jl b/stdlib/SHA/src/sha2.jl
deleted file mode 100644
index 5cc4363786e39..0000000000000
--- a/stdlib/SHA/src/sha2.jl
+++ /dev/null
@@ -1,136 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function transform!(context::T) where {T<:Union{SHA2_224_CTX,SHA2_256_CTX}}
-    pbuf = buffer_pointer(context)
-    # Initialize registers with the previous intermediate values (our state)
-    a = context.state[1]
-    b = context.state[2]
-    c = context.state[3]
-    d = context.state[4]
-    e = context.state[5]
-    f = context.state[6]
-    g = context.state[7]
-    h = context.state[8]
-
-    # Run initial rounds
-    for j = 1:16
-        @inbounds begin
-            # We bitswap every input byte
-            v = bswap(unsafe_load(pbuf, j))
-            unsafe_store!(pbuf, v, j)
-
-            # Apply the SHA-256 compression function to update a..h
-            T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + v
-            T2 = Sigma0_256(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = UInt32(d + T1)
-            d = c
-            c = b
-            b = a
-            a = UInt32(T1 + T2)
-        end
-    end
-
-    for j = 17:64
-        @inbounds begin
-            # Implicit message block expansion:
-            s0 = unsafe_load(pbuf, mod1(j + 1, 16))
-            s0 = sigma0_256(s0)
-            s1 = unsafe_load(pbuf, mod1(j + 14, 16))
-            s1 = sigma1_256(s1)
-
-            # Apply the SHA-256 compression function to update a..h
-            v = unsafe_load(pbuf, mod1(j, 16)) + s1 + unsafe_load(pbuf, mod1(j + 9, 16)) + s0
-            unsafe_store!(pbuf, v, mod1(j, 16))
-            T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + v
-            T2 = Sigma0_256(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = UInt32(d + T1)
-            d = c
-            c = b
-            b = a
-            a = UInt32(T1 + T2)
-        end
-    end
-
-    # Compute the current intermediate hash value
-    context.state[1] += a
-    context.state[2] += b
-    context.state[3] += c
-    context.state[4] += d
-    context.state[5] += e
-    context.state[6] += f
-    context.state[7] += g
-    context.state[8] += h
-end
-
-
-function transform!(context::Union{SHA2_384_CTX,SHA2_512_CTX})
-    pbuf = buffer_pointer(context)
-    # Initialize registers with the prev. intermediate value
-    a = context.state[1]
-    b = context.state[2]
-    c = context.state[3]
-    d = context.state[4]
-    e = context.state[5]
-    f = context.state[6]
-    g = context.state[7]
-    h = context.state[8]
-
-    for j = 1:16
-        @inbounds begin
-            v = bswap(unsafe_load(pbuf, j))
-            unsafe_store!(pbuf, v, j)
-
-            # Apply the SHA-512 compression function to update a..h
-            T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + v
-            T2 = Sigma0_512(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = d + T1
-            d = c
-            c = b
-            b = a
-            a = T1 + T2
-        end
-    end
-
-    for j = 17:80
-        @inbounds begin
-            # Implicit message block expansion:
-            s0 = unsafe_load(pbuf, mod1(j + 1, 16))
-            s0 = sigma0_512(s0)
-            s1 = unsafe_load(pbuf, mod1(j + 14, 16))
-            s1 = sigma1_512(s1)
-
-            # Apply the SHA-512 compression function to update a..h
-            v = unsafe_load(pbuf, mod1(j, 16)) + s1 + unsafe_load(pbuf, mod1(j + 9, 16)) + s0
-            unsafe_store!(pbuf, v, mod1(j, 16))
-            T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + v
-            T2 = Sigma0_512(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = d + T1
-            d = c
-            c = b
-            b = a
-            a = T1 + T2
-        end
-    end
-
-    # Compute the current intermediate hash value
-    context.state[1] += a
-    context.state[2] += b
-    context.state[3] += c
-    context.state[4] += d
-    context.state[5] += e
-    context.state[6] += f
-    context.state[7] += g
-    context.state[8] += h
-end
diff --git a/stdlib/SHA/src/sha3.jl b/stdlib/SHA/src/sha3.jl
deleted file mode 100644
index 6f94495630742..0000000000000
--- a/stdlib/SHA/src/sha3.jl
+++ /dev/null
@@ -1,83 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function transform!(context::T) where {T<:SHA3_CTX}
-    # First, update state with buffer
-    pbuf = Ptr{eltype(context.state)}(pointer(context.buffer))
-    for idx in 1:div(blocklen(T),8)
-        context.state[idx] = context.state[idx] ⊻ unsafe_load(pbuf, idx)
-    end
-    bc = context.bc
-    state = context.state
-
-    # We always assume 24 rounds
-    @inbounds for round in 0:23
-        # Theta function
-        for i in 1:5
-            bc[i] = state[i] ⊻ state[i + 5] ⊻ state[i + 10] ⊻ state[i + 15] ⊻ state[i + 20]
-        end
-
-        for i in 0:4
-            temp = bc[rem(i + 4, 5) + 1] ⊻ L64(1, bc[rem(i + 1, 5) + 1])
-            j = 0
-            while j <= 20
-                state[Int(i + j + 1)] = state[i + j + 1] ⊻ temp
-                j += 5
-            end
-        end
-
-        # Rho Pi
-        temp = state[2]
-        for i in 1:24
-            j = SHA3_PILN[i]
-            bc[1] = state[j]
-            state[j] = L64(SHA3_ROTC[i], temp)
-            temp = bc[1]
-        end
-
-        # Chi
-        j = 0
-        while j <= 20
-            for i in 1:5
-                bc[i] = state[i + j]
-            end
-            for i in 0:4
-                state[j + i + 1] = state[j + i + 1] ⊻ (~bc[rem(i + 1, 5) + 1] & bc[rem(i + 2, 5) + 1])
-            end
-            j += 5
-        end
-
-        # Iota
-        state[1] = state[1] ⊻ SHA3_ROUND_CONSTS[round+1]
-    end
-
-    return context.state
-end
-
-
-
-# Finalize data in the buffer, append total bitlength, and return our precious hash!
-function digest!(context::T) where {T<:SHA3_CTX}
-    usedspace = context.bytecount % blocklen(T)
-    # If we have anything in the buffer still, pad and transform that data
-    if usedspace < blocklen(T) - 1
-        # Begin padding with a 0x06
-        context.buffer[usedspace+1] = 0x06
-        # Fill with zeros up until the last byte
-        context.buffer[usedspace+2:end-1] .= 0x00
-        # Finish it off with a 0x80
-        context.buffer[end] = 0x80
-    else
-        # Otherwise, we have to add on a whole new buffer just for the zeros and 0x80
-        context.buffer[end] = 0x06
-        transform!(context)
-
-        context.buffer[1:end-1] = 0x0
-        context.buffer[end] = 0x80
-    end
-
-    # Final transform:
-    transform!(context)
-
-    # Return the digest
-    return reinterpret(UInt8, context.state)[1:digestlen(T)]
-end
diff --git a/stdlib/SHA/src/types.jl b/stdlib/SHA/src/types.jl
deleted file mode 100644
index 3534be4fafc96..0000000000000
--- a/stdlib/SHA/src/types.jl
+++ /dev/null
@@ -1,230 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Type hierarchy to aid in splitting up of SHA2 algorithms
-# as SHA224/256 are similar, and SHA-384/512 are similar
-abstract type SHA_CTX end
-abstract type SHA2_CTX <: SHA_CTX end
-abstract type SHA3_CTX <: SHA_CTX end
-import Base: copy
-
-# We derive SHA1_CTX straight from SHA_CTX since it doesn't have a
-# family of types like SHA2 or SHA3 do
-mutable struct SHA1_CTX <: SHA_CTX
-    state::Array{UInt32,1}
-    bytecount::UInt64
-    buffer::Array{UInt8,1}
-    W::Array{UInt32,1}
-end
-
-# SHA2 224/256/384/512-bit Context Structures
-mutable struct SHA2_224_CTX <: SHA2_CTX
-    state::Array{UInt32,1}
-    bytecount::UInt64
-    buffer::Array{UInt8,1}
-end
-
-mutable struct SHA2_256_CTX <: SHA2_CTX
-    state::Array{UInt32,1}
-    bytecount::UInt64
-    buffer::Array{UInt8,1}
-end
-
-mutable struct SHA2_384_CTX <: SHA2_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-end
-
-mutable struct SHA2_512_CTX <: SHA2_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-end
-
-function Base.getproperty(ctx::SHA2_CTX, fieldname::Symbol)
-    if fieldname === :state
-        return getfield(ctx, :state)::Union{Vector{UInt32},Vector{UInt64}}
-    elseif fieldname === :bytecount
-        return getfield(ctx, :bytecount)::Union{UInt64,UInt128}
-    elseif fieldname === :buffer
-        return getfield(ctx, :buffer)::Vector{UInt8}
-    elseif fieldname === :W
-        return getfield(ctx, :W)::Vector{UInt32}
-    else
-        error("SHA2_CTX has no field ", fieldname)
-    end
-end
-
-
-# Typealias common nicknames for SHA2 family of functions
-const SHA224_CTX = SHA2_224_CTX
-const SHA256_CTX = SHA2_256_CTX
-const SHA384_CTX = SHA2_384_CTX
-const SHA512_CTX = SHA2_512_CTX
-
-
-# SHA3 224/256/384/512-bit context structures
-mutable struct SHA3_224_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-mutable struct SHA3_256_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-mutable struct SHA3_384_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-mutable struct SHA3_512_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-
-function Base.getproperty(ctx::SHA3_CTX, fieldname::Symbol)
-    if fieldname === :state
-        return getfield(ctx, :state)::Vector{UInt64}
-    elseif fieldname === :bytecount
-        return getfield(ctx, :bytecount)::UInt128
-    elseif fieldname === :buffer
-        return getfield(ctx, :buffer)::Vector{UInt8}
-    elseif fieldname === :bc
-        return getfield(ctx, :bc)::Vector{UInt64}
-    else
-        error("type ", typeof(ctx), " has no field ", fieldname)
-    end
-end
-
-# Define constants via functions so as not to bloat context objects.  Yay dispatch!
-
-# Digest lengths for SHA1, SHA2 and SHA3.  This is easy to figure out from the typename
-digestlen(::Type{SHA1_CTX}) = 20
-digestlen(::Type{SHA2_224_CTX}) = 28
-digestlen(::Type{SHA3_224_CTX}) = 28
-digestlen(::Type{SHA2_256_CTX}) = 32
-digestlen(::Type{SHA3_256_CTX}) = 32
-digestlen(::Type{SHA2_384_CTX}) = 48
-digestlen(::Type{SHA3_384_CTX}) = 48
-digestlen(::Type{SHA2_512_CTX}) = 64
-digestlen(::Type{SHA3_512_CTX}) = 64
-
-# SHA1 and SHA2 have differing element types for the internal state objects
-state_type(::Type{SHA1_CTX}) = UInt32
-state_type(::Type{SHA2_224_CTX}) = UInt32
-state_type(::Type{SHA2_256_CTX}) = UInt32
-state_type(::Type{SHA2_384_CTX}) = UInt64
-state_type(::Type{SHA2_512_CTX}) = UInt64
-state_type(::Type{SHA3_CTX}) = UInt64
-
-# blocklen is the number of bytes of data processed by the transform!() function at once
-blocklen(::Type{SHA1_CTX}) = UInt64(64)
-blocklen(::Type{SHA2_224_CTX}) = UInt64(64)
-blocklen(::Type{SHA2_256_CTX}) = UInt64(64)
-blocklen(::Type{SHA2_384_CTX}) = UInt64(128)
-blocklen(::Type{SHA2_512_CTX}) = UInt64(128)
-
-blocklen(::Type{SHA3_224_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_224_CTX))
-blocklen(::Type{SHA3_256_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_256_CTX))
-blocklen(::Type{SHA3_384_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_384_CTX))
-blocklen(::Type{SHA3_512_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_512_CTX))
-
-
-# short_blocklen is the size of a block minus the width of bytecount
-short_blocklen(::Type{T}) where {T<:SHA_CTX} = blocklen(T) - 2*sizeof(state_type(T))
-
-# Once the "blocklen" methods are defined, we can define our outer constructors for SHA types:
-
-"""
-    SHA2_224_CTX()
-
-Construct an empty SHA2_224 context.
-"""
-SHA2_224_CTX() = SHA2_224_CTX(copy(SHA2_224_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_224_CTX)))
-"""
-    SHA2_256_CTX()
-
-Construct an empty SHA2_256 context.
-"""
-SHA2_256_CTX() = SHA2_256_CTX(copy(SHA2_256_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_256_CTX)))
-"""
-    SHA2_384()
-
-Construct an empty SHA2_384 context.
-"""
-SHA2_384_CTX() = SHA2_384_CTX(copy(SHA2_384_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_384_CTX)))
-"""
-    SHA2_512_CTX()
-
-Construct an empty SHA2_512 context.
-"""
-SHA2_512_CTX() = SHA2_512_CTX(copy(SHA2_512_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_512_CTX)))
-
-"""
-    SHA3_224_CTX()
-
-Construct an empty SHA3_224 context.
-"""
-SHA3_224_CTX() = SHA3_224_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_224_CTX)), Vector{UInt64}(undef, 5))
-"""
-    SHA3_256_CTX()
-
-Construct an empty SHA3_256 context.
-"""
-SHA3_256_CTX() = SHA3_256_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_256_CTX)), Vector{UInt64}(undef, 5))
-"""
-    SHA3_384_CTX()
-
-Construct an empty SHA3_384 context.
-"""
-SHA3_384_CTX() = SHA3_384_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_384_CTX)), Vector{UInt64}(undef, 5))
-"""
-    SHA3_512_CTX()
-
-Construct an empty SHA3_512 context.
-"""
-SHA3_512_CTX() = SHA3_512_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_512_CTX)), Vector{UInt64}(undef, 5))
-
-# Nickname'd outer constructor methods for SHA2
-const SHA224_CTX = SHA2_224_CTX
-const SHA256_CTX = SHA2_256_CTX
-const SHA384_CTX = SHA2_384_CTX
-const SHA512_CTX = SHA2_512_CTX
-
-# SHA1 is special; he needs extra workspace
-"""
-    SHA1_CTX()
-
-Construct an empty SHA1 context.
-"""
-SHA1_CTX() = SHA1_CTX(copy(SHA1_initial_hash_value), 0, zeros(UInt8, blocklen(SHA1_CTX)), Vector{UInt32}(undef, 80))
-
-
-# Copy functions
-copy(ctx::T) where {T<:SHA1_CTX} = T(copy(ctx.state), ctx.bytecount, copy(ctx.buffer), copy(ctx.W))
-copy(ctx::T) where {T<:SHA2_CTX} = T(copy(ctx.state), ctx.bytecount, copy(ctx.buffer))
-copy(ctx::T) where {T<:SHA3_CTX} = T(copy(ctx.state), ctx.bytecount, copy(ctx.buffer), Vector{UInt64}(undef, 5))
-
-
-# Make printing these types a little friendlier
-import Base.show
-show(io::IO, ::SHA1_CTX) = print(io, "SHA1 hash state")
-show(io::IO, ::SHA2_224_CTX) = print(io, "SHA2 224-bit hash state")
-show(io::IO, ::SHA2_256_CTX) = print(io, "SHA2 256-bit hash state")
-show(io::IO, ::SHA2_384_CTX) = print(io, "SHA2 384-bit hash state")
-show(io::IO, ::SHA2_512_CTX) = print(io, "SHA2 512-bit hash state")
-show(io::IO, ::SHA3_224_CTX) = print(io, "SHA3 224-bit hash state")
-show(io::IO, ::SHA3_256_CTX) = print(io, "SHA3 256-bit hash state")
-show(io::IO, ::SHA3_384_CTX) = print(io, "SHA3 384-bit hash state")
-show(io::IO, ::SHA3_512_CTX) = print(io, "SHA3 512-bit hash state")
-
-
-# use our types to define a method to get a pointer to the state buffer
-buffer_pointer(ctx::T) where {T<:SHA_CTX} = Ptr{state_type(T)}(pointer(ctx.buffer))
diff --git a/stdlib/SHA/test/perf.jl b/stdlib/SHA/test/perf.jl
deleted file mode 100644
index 08dbbe9b1cb96..0000000000000
--- a/stdlib/SHA/test/perf.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SHA
-
-if isempty(ARGS)
-    error("need file to test sha perf")
-elseif !isfile(ARGS[1])
-    error("file $(ARGS[1]) does not exist")
-end
-
-
-function do_tests(filepath)
-    # test performance
-    print("read:    ")
-    @time begin
-        fh = open(filepath, "r")
-        bytes = read(fh)
-    end
-    GC.gc()
-
-    print("SHA-1:   ")
-    sha1(bytes)
-    GC.gc()
-    @time sha1(bytes)
-
-    print("SHA2-256: ")
-    sha256(bytes)
-    GC.gc()
-    @time sha256(bytes)
-
-    print("SHA2-512: ")
-    sha512(bytes)
-    GC.gc()
-    @time sha512(bytes)
-
-    print("SHA3-256: ")
-    sha3_256(bytes)
-    GC.gc()
-    @time sha3_256(bytes)
-
-    print("SHA3-512: ")
-    sha3_512(bytes)
-    GC.gc()
-    @time sha3_512(bytes)
-end
-
-do_tests(ARGS[1])
diff --git a/stdlib/SHA/test/runtests.jl b/stdlib/SHA/test/runtests.jl
deleted file mode 100644
index 49ab05e47b739..0000000000000
--- a/stdlib/SHA/test/runtests.jl
+++ /dev/null
@@ -1,304 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SHA
-using Test
-
-const VERBOSE = false
-
-# Define some data we will run our tests on
-lorem = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
-so_many_as_array = repeat([0x61], 1000000)
-so_many_as_tuple = ntuple((i) -> 0x61, 1000000)
-tempdir = mktempdir()
-file = joinpath(tempdir, ".sha")
-fIO = open(file, "w")
-write(fIO, '\0')
-close(fIO)
-data = Any["", "test", lorem, file, so_many_as_array, so_many_as_tuple]
-
-# Descriptions of the data, the SHA functions we'll run on the data, etc...
-data_desc = ["the empty string", "the string \"test\"", "lorem ipsum",
-             "0 file", "one million a's Array", "one million a's Tuple"]
-sha_types = Dict(sha1 => SHA.SHA1_CTX,
-            sha2_224 => SHA.SHA2_224_CTX, sha2_256 => SHA.SHA2_256_CTX, sha2_384 => SHA.SHA2_384_CTX, sha2_512 => SHA.SHA2_512_CTX,
-            sha3_224 => SHA.SHA3_224_CTX, sha3_256 => SHA.SHA3_256_CTX, sha3_384 => SHA.SHA3_384_CTX, sha3_512 => SHA.SHA3_512_CTX)
-sha_funcs = [sha1,
-             sha2_224, sha2_256, sha2_384, sha2_512,
-             sha3_224, sha3_256, sha3_384, sha3_512]
-ctxs = [SHA1_CTX,
-        SHA2_224_CTX, SHA2_256_CTX, SHA2_384_CTX, SHA2_512_CTX,
-        SHA3_224_CTX, SHA3_256_CTX, SHA3_384_CTX, SHA3_512_CTX]
-shws = ["SHA1 hash state",
-        "SHA2 224-bit hash state", "SHA2 256-bit hash state", "SHA2 384-bit hash state", "SHA2 512-bit hash state",
-        "SHA3 224-bit hash state", "SHA3 256-bit hash state", "SHA3 384-bit hash state", "SHA3 512-bit hash state"]
-
-answers = Dict(
-sha1 => [
-"da39a3ee5e6b4b0d3255bfef95601890afd80709",
-"a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
-"19afa2a4a37462c7b940a6c4c61363d49c3a35f4",
-"5ba93c9db0cff93f52b521d7420e43f6eda2784f",
-"34aa973cd4c4daa4f61eeb2bdbad27316534016f",
-"34aa973cd4c4daa4f61eeb2bdbad27316534016f"
-],
-sha2_224 => [
-"d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f",
-"90a3ed9e32b2aaf4c61c410eb925426119e1a9dc53d4286ade99a809",
-"6a0644abcf1e2cecbec2814443dab5f24b7ad8ebb66c75667ab67959",
-"fff9292b4201617bdc4d3053fce02734166a683d7d858a7f5f59b073",
-"20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67",
-"20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67"
-],
-sha2_256 => [
-"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
-"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08",
-"2c7c3d5f244f1a40069a32224215e0cf9b42485c99d80f357d76f006359c7a18",
-"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d",
-"cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0",
-"cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0"
-],
-sha2_384 => [
-"38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b",
-"768412320f7b0aa5812fce428dc4706b3cae50e02a64caa16a782249bfe8efc4b7ef1ccb126255d196047dfedf17a0a9",
-"63980fd0425cd2c3d8a400ee0f2671ef135db03b947ec1af21b6e28f19c16ca272036469541f4d8e336ac6d1da50580f",
-"bec021b4f368e3069134e012c2b4307083d3a9bdd206e24e5f0d86e13d6636655933ec2b413465966817a9c208a11717",
-"9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b07b8b3dc38ecc4ebae97ddd87f3d8985",
-"9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b07b8b3dc38ecc4ebae97ddd87f3d8985"
-],
-sha2_512 => [
-"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
-"ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff",
-"f41d92bc9fc1157a0d1387e67f3d0893b70f7039d3d46d8115b5079d45ad601159398c79c281681e2da09bf7d9f8c23b41d1a0a3c5b528a7f2735933a4353194",
-"b8244d028981d693af7b456af8efa4cad63d282e19ff14942c246e50d9351d22704a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee",
-"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973ebde0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b",
-"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973ebde0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b"
-],
-sha3_224 => [
-"6b4e03423667dbb73b6e15454f0eb1abd4597f9a1b078e3f5b5a6bc7",
-"3797bf0afbbfca4a7bbba7602a2b552746876517a7f9b7ce2db0ae7b",
-"ea5395370949ad8c7d2ca3e7c045ef3306fe3a3f4740de452ef87a28",
-"bdd5167212d2dc69665f5a8875ab87f23d5ce7849132f56371a19096",
-"d69335b93325192e516a912e6d19a15cb51c6ed5c15243e7a7fd653c",
-"d69335b93325192e516a912e6d19a15cb51c6ed5c15243e7a7fd653c"
-],
-sha3_256 => [
-"a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a",
-"36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80",
-"8c8142d2ca964ab307ace567ddd5764f17ebb76eb8ff25543ab54c14fe2ab139",
-"5d53469f20fef4f8eab52b88044ede69c77a6a68a60728609fc4a65ff531e7d0",
-"5c8875ae474a3634ba4fd55ec85bffd661f32aca75c6d699d0cdcb6c115891c1",
-"5c8875ae474a3634ba4fd55ec85bffd661f32aca75c6d699d0cdcb6c115891c1",
-],
-sha3_384 => [
-"0c63a75b845e4f7d01107d852e4c2485c51a50aaaa94fc61995e71bbee983a2ac3713831264adb47fb6bd1e058d5f004",
-"e516dabb23b6e30026863543282780a3ae0dccf05551cf0295178d7ff0f1b41eecb9db3ff219007c4e097260d58621bd",
-"eb9fbba3eb916a4efe384b3125f5d03ceb9c5c1b94431ac30fa86c54408b92701ca5d2628cd7113aa5541177ec3ccd1d",
-"127677f8b66725bbcb7c3eae9698351ca41e0eb6d66c784bd28dcdb3b5fb12d0c8e840342db03ad1ae180b92e3504933",
-"eee9e24d78c1855337983451df97c8ad9eedf256c6334f8e948d252d5e0e76847aa0774ddb90a842190d2c558b4b8340",
-"eee9e24d78c1855337983451df97c8ad9eedf256c6334f8e948d252d5e0e76847aa0774ddb90a842190d2c558b4b8340",
-],
-sha3_512 => [
-"a69f73cca23a9ac5c8b567dc185a756e97c982164fe25859e0d1dcc1475c80a615b2123af1f5f94c11e3e9402c3ac558f500199d95b6d3e301758586281dcd26",
-"9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14",
-"3a4318353396a12dfd20442cfce1d8ad4d7e732e85cc56b01b4cf9057a41c8827c0a03c70812e76ace68d776759225c213b4f581aac0dba5dd43b785b1a33fe5",
-"7127aab211f82a18d06cf7578ff49d5089017944139aa60d8bee057811a15fb55a53887600a3eceba004de51105139f32506fe5b53e1913bfa6b32e716fe97da",
-"3c3a876da14034ab60627c077bb98f7e120a2a5370212dffb3385a18d4f38859ed311d0a9d5141ce9cc5c66ee689b266a8aa18ace8282a0e0db596c90b0a7b87",
-"3c3a876da14034ab60627c077bb98f7e120a2a5370212dffb3385a18d4f38859ed311d0a9d5141ce9cc5c66ee689b266a8aa18ace8282a0e0db596c90b0a7b87",
-]
-)
-
-function describe_hash(T::Type{S}) where {S <: SHA.SHA_CTX}
-    if T <: SHA.SHA1_CTX return "SHA1" end
-    if T <: SHA.SHA2_CTX return "SHA2-$(SHA.digestlen(T)*8)" end
-    if T <: SHA.SHA3_CTX return "SHA3-$(SHA.digestlen(T)*8)" end
-end
-
-VERBOSE && println("Loaded hash types: $(join(sort([describe_hash(t[2]) for t in sha_types]), ", ", " and "))")
-
-# First, test processing the data in one go
-nerrors = 0
-for idx in 1:length(data)
-    global nerrors
-
-    desc = data_desc[idx]
-    VERBOSE && print("Testing on $desc$(join(["." for z in 1:(34-length(desc))]))")
-    nerrors_old = nerrors
-    for sha_idx in 1:length(sha_funcs)
-        sha_func = sha_funcs[sha_idx]
-
-        if idx == 4
-            open(data[idx]) do f
-                hash = bytes2hex(sha_func(f))
-            end
-        else
-            hash = bytes2hex(sha_func(data[idx]))
-        end
-
-        if hash != answers[sha_func][idx]
-            print("\n")
-            @warn(
-            """
-            For $(describe_hash(sha_types[sha_func])) expected:
-                $(answers[sha_func][idx])
-            Calculated:
-                $(hash)
-            """)
-            nerrors += 1
-        else
-            VERBOSE && print(".")
-        end
-    end
-    VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-end
-
-# Do another test on the "so many a's" data where we chunk up the data into
-# two chunks, (sized appropriately to AVOID overflow from one update to another)
-# in order to test multiple update!() calls
-VERBOSE && print("Testing on one million a's (chunked properly)")
-nerrors_old = nerrors
-for sha_idx in 1:length(sha_funcs)
-    global nerrors
-
-    ctx = sha_types[sha_funcs[sha_idx]]()
-    SHA.update!(ctx, so_many_as_array[1:2*SHA.blocklen(typeof(ctx))])
-    SHA.update!(ctx, so_many_as_array[2*SHA.blocklen(typeof(ctx))+1:end])
-    hash = bytes2hex(SHA.digest!(ctx))
-    if hash != answers[sha_funcs[sha_idx]][end]
-        print("\n")
-        @warn(
-        """
-        For $(describe_hash(sha_types[sha_funcs[sha_idx]])) expected:
-            $(answers[sha_funcs[sha_idx]][end-1])
-        Calculated:
-            $(hash)
-        """)
-        nerrors += 1
-    else
-        VERBOSE && print(".")
-    end
-end
-VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-# Do another test on the "so many a's" data where we chunk up the data into
-# three chunks, (sized appropriately to CAUSE overflow from one update to another)
-# in order to test multiple update!() calls as well as the overflow codepaths
-VERBOSE && print("Testing on one million a's (chunked clumsily)")
-nerrors_old = nerrors
-for sha_idx in 1:length(sha_funcs)
-    global nerrors
-    ctx = sha_types[sha_funcs[sha_idx]]()
-
-    # Get indices awkwardly placed for the blocklength of this hash type
-    idx0 = round(Int, 0.3*SHA.blocklen(typeof(ctx)))
-    idx1 = round(Int, 1.7*SHA.blocklen(typeof(ctx)))
-    idx2 = round(Int, 2.6*SHA.blocklen(typeof(ctx)))
-
-    # Feed data in according to our dastardly blocking scheme
-    SHA.update!(ctx, so_many_as_array[0      + 1:1*idx0])
-    SHA.update!(ctx, so_many_as_array[1*idx0 + 1:2*idx0])
-    SHA.update!(ctx, so_many_as_array[2*idx0 + 1:3*idx0])
-    SHA.update!(ctx, so_many_as_array[3*idx0 + 1:4*idx0])
-    SHA.update!(ctx, so_many_as_array[4*idx0 + 1:idx1])
-    SHA.update!(ctx, so_many_as_array[idx1 + 1:idx2])
-    SHA.update!(ctx, so_many_as_array[idx2 + 1:end])
-
-    # Ensure the hash is the appropriate one
-    hash = bytes2hex(SHA.digest!(ctx))
-    if hash != answers[sha_funcs[sha_idx]][end]
-        print("\n")
-        @warn(
-        """
-        For $(describe_hash(sha_types[sha_funcs[sha_idx]])) expected:
-            $(answers[sha_funcs[sha_idx]][end-1])
-        Calculated:
-            $(hash)
-        """)
-        nerrors += 1
-    else
-        VERBOSE && print(".")
-    end
-end
-VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-# test hmac correctness using the examples on [wiki](https://en.wikipedia.org/wiki/Hash-based_message_authentication_code#Examples)
-VERBOSE && print("Testing on the hmac functions")
-nerrors_old = nerrors
-for (key, msg, fun, hash) in (
-    (b"", b"", hmac_sha1, "fbdb1d1b18aa6c08324b7d64b71fb76370690e1d"),
-    (b"", b"", hmac_sha256, "b613679a0814d9ec772f95d778c35fc5ff1697c493715653c6c712144292c5ad"),
-    (b"key", b"The quick brown fox jumps over the lazy dog", hmac_sha1, "de7c9b85b8b78aa6bc8a7a36f70a90701c9db4d9"),
-    (b"key", b"The quick brown fox jumps over the lazy dog", hmac_sha256, "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8"),
-)
-    global nerrors
-    digest1 = bytes2hex(fun(Vector(key), Vector(msg)))
-    digest2 = bytes2hex(fun(Vector(key), IOBuffer(Vector(msg))))
-    if digest1 != hash || digest2 != hash
-        print("\n")
-        @warn(
-        """
-        For $fun($(String(key)), $(String(msg))) expected:
-            $hash
-        Calculated:
-            $digest1
-            $digest2
-        """)
-        nerrors += 1
-    else
-        VERBOSE && print(".")
-    end
-end
-VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-replstr(x) = sprint((io, x) -> show(IOContext(io, :limit => true), MIME("text/plain"), x), x)
-
-for idx in 1:length(ctxs)
-    global nerrors
-    # Part #1: copy
-    VERBOSE && print("Testing copy function @ $(ctxs[idx]) ...")
-    try
-        copy(ctxs[idx]())
-    catch
-        print("\n")
-        @warn("Some weird copy error happened with $(ctxs[idx])")
-        nerrors += 1
-    end
-    VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-    # Part #2: show
-    VERBOSE && print("Testing show function @ $(ctxs[idx]) ...")
-    if replstr(ctxs[idx]()) != shws[idx]
-        print("\n")
-        @warn("Some weird show error happened with $(ctxs[idx])")
-        nerrors += 1
-    end
-    VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-end
-
-# test error if eltype of input is not UInt8
-for f in sha_funcs
-    global nerrors
-    local data = UInt32[0x23467, 0x324775]
-    try
-        f(data)
-    catch ex
-        if ex isa MethodError &&
-            ex.f === f &&
-            ex.args === (data,)
-            continue
-        end
-        rethrow()
-    end
-    @warn("Non-UInt8 Arrays should fail")
-    nerrors += 1
-end
-
-# Clean up the I/O mess
-rm(file)
-rm(tempdir)
-
-if nerrors == 0
-    VERBOSE && println("ALL OK")
-else
-    println("Failed with $nerrors failures")
-end
-@test nerrors == 0
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index 592db96565c7a..2889072bbdc8b 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -79,7 +79,7 @@ const TAGS = Any[
 
 @assert length(TAGS) == 255
 
-const ser_version = 15 # do not make changes without bumping the version #!
+const ser_version = 16 # do not make changes without bumping the version #!
 
 format_version(::AbstractSerializer) = ser_version
 format_version(s::Serializer) = s.version
@@ -418,7 +418,7 @@ function serialize(s::AbstractSerializer, meth::Method)
     serialize(s, meth.nargs)
     serialize(s, meth.isva)
     serialize(s, meth.is_for_opaque_closure)
-    serialize(s, meth.aggressive_constprop)
+    serialize(s, meth.constprop)
     if isdefined(meth, :source)
         serialize(s, Base._uncompressed_ast(meth, meth.source))
     else
@@ -749,14 +749,25 @@ end
     serialize(stream::IO, value)
 
 Write an arbitrary value to a stream in an opaque format, such that it can be read back by
-[`deserialize`](@ref). The read-back value will be as identical as possible to the original.
-In general, this process will not work if the reading and writing are done by different
-versions of Julia, or an instance of Julia with a different system image. `Ptr` values are
-serialized as all-zero bit patterns (`NULL`).
+[`deserialize`](@ref). The read-back value will be as identical as possible to the original,
+but note that `Ptr` values are serialized as all-zero bit patterns (`NULL`).
 
 An 8-byte identifying header is written to the stream first. To avoid writing the header,
 construct a `Serializer` and use it as the first argument to `serialize` instead.
 See also [`Serialization.writeheader`](@ref).
+
+The data format can change in minor (1.x) Julia releases, but files written by prior 1.x
+versions will remain readable. The main exception to this is when the definition of a
+type in an external package changes. If that occurs, it may be necessary to specify
+an explicit compatible version of the affected package in your environment.
+Renaming functions, even private functions, inside packages can also put existing files
+out of sync. Anonymous functions require special care: because their names are automatically
+generated, minor code changes can cause them to be renamed.
+Serializing anonymous functions should be avoided in files intended for long-term storage.
+
+In some cases, the word size (32- or 64-bit) of the reading and writing machines must match.
+In rarer cases the OS or architecture must also match, for example when using packages
+that contain platform-dependent code.
 """
 function serialize(s::IO, x)
     ss = Serializer(s)
@@ -781,8 +792,8 @@ serialize(filename::AbstractString, x) = open(io->serialize(io, x), filename, "w
 
 Read a value written by [`serialize`](@ref). `deserialize` assumes the binary data read from
 `stream` is correct and has been serialized by a compatible implementation of [`serialize`](@ref).
-It has been designed with simplicity and performance as a goal and does not validate
-the data read. Malformed data can result in process termination. The caller has to ensure
+`deserialize` is designed for simplicity and performance, and so does not validate
+the data read. Malformed data can result in process termination. The caller must ensure
 the integrity and correctness of data read from `stream`.
 """
 deserialize(s::IO) = deserialize(Serializer(s))
@@ -1014,12 +1025,12 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     nargs = deserialize(s)::Int32
     isva = deserialize(s)::Bool
     is_for_opaque_closure = false
-    aggressive_constprop = false
+    constprop = 0x00
     template_or_is_opaque = deserialize(s)
     if isa(template_or_is_opaque, Bool)
         is_for_opaque_closure = template_or_is_opaque
         if format_version(s) >= 14
-            aggressive_constprop = deserialize(s)::Bool
+            constprop = deserialize(s)::UInt8
         end
         template = deserialize(s)
     else
@@ -1039,7 +1050,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         meth.nargs = nargs
         meth.isva = isva
         meth.is_for_opaque_closure = is_for_opaque_closure
-        meth.aggressive_constprop = aggressive_constprop
+        meth.constprop = constprop
         if template !== nothing
             # TODO: compress template
             meth.source = template::CodeInfo
@@ -1135,7 +1146,13 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
         ci.ssavaluetypes = deserialize(s)
         ci.linetable = deserialize(s)
     end
-    ci.ssaflags = deserialize(s)
+    ssaflags = deserialize(s)
+    if length(ssaflags) ≠ length(code)
+        # make sure the length of `ssaflags` matches that of `code`
+        # so that the latest inference doesn't throw on IRs serialized from old versions
+        ssaflags = UInt8[0x00 for _ in 1:length(code)]
+    end
+    ci.ssaflags = ssaflags
     if pre_12
         ci.slotflags = deserialize(s)
     else
@@ -1163,7 +1180,7 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     ci.propagate_inbounds = deserialize(s)
     ci.pure = deserialize(s)
     if format_version(s) >= 14
-        ci.aggressive_constprop = deserialize(s)::Bool
+        ci.constprop = deserialize(s)::UInt8
     end
     return ci
 end
diff --git a/stdlib/SharedArrays/src/SharedArrays.jl b/stdlib/SharedArrays/src/SharedArrays.jl
index 85f1eb4fff150..1348a68dca957 100644
--- a/stdlib/SharedArrays/src/SharedArrays.jl
+++ b/stdlib/SharedArrays/src/SharedArrays.jl
@@ -507,9 +507,9 @@ end
 Array(S::SharedArray) = S.s
 
 # pass through getindex and setindex! - unlike DArrays, these always work on the complete array
-getindex(S::SharedArray, i::Real) = getindex(S.s, i)
+Base.@propagate_inbounds getindex(S::SharedArray, i::Real) = getindex(S.s, i)
 
-setindex!(S::SharedArray, x, i::Real) = setindex!(S.s, x, i)
+Base.@propagate_inbounds setindex!(S::SharedArray, x, i::Real) = setindex!(S.s, x, i)
 
 function fill!(S::SharedArray, v)
     vT = convert(eltype(S), v)
diff --git a/stdlib/Sockets/src/PipeServer.jl b/stdlib/Sockets/src/PipeServer.jl
index bc203f5f31436..4a8965c8f0462 100644
--- a/stdlib/Sockets/src/PipeServer.jl
+++ b/stdlib/Sockets/src/PipeServer.jl
@@ -70,6 +70,9 @@ end
     listen(path::AbstractString) -> PipeServer
 
 Create and listen on a named pipe / UNIX domain socket.
+
+!!! note
+    Path length on Unix is limited to somewhere between 92 and 108 bytes (cf. `man unix`).
 """
 function listen(path::AbstractString)
     sock = PipeServer()
@@ -93,5 +96,8 @@ end
     connect(path::AbstractString) -> PipeEndpoint
 
 Connect to the named pipe / UNIX domain socket at `path`.
+
+!!! note
+    Path length on Unix is limited to somewhere between 92 and 108 bytes (cf. `man unix`).
 """
 connect(path::AbstractString) = connect(PipeEndpoint(), path)
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 6952aa9bd8a0f..fb46b9255e6f0 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -139,9 +139,6 @@ function TCPServer(; delay=true)
     return tcp
 end
 
-isreadable(io::TCPSocket) = isopen(io) || bytesavailable(io) > 0
-iswritable(io::TCPSocket) = isopen(io) && io.status != StatusClosing
-
 """
     accept(server[, client])
 
@@ -578,11 +575,11 @@ Enables or disables Nagle's algorithm on a given TCP server or socket.
 """
 function nagle(sock::Union{TCPServer, TCPSocket}, enable::Bool)
     # disable or enable Nagle's algorithm on all OSes
-    Sockets.iolock_begin()
-    Sockets.check_open(sock)
+    iolock_begin()
+    check_open(sock)
     err = ccall(:uv_tcp_nodelay, Cint, (Ptr{Cvoid}, Cint), sock.handle, Cint(!enable))
     # TODO: check err
-    Sockets.iolock_end()
+    iolock_end()
     return err
 end
 
@@ -592,15 +589,15 @@ end
 On Linux systems, the TCP_QUICKACK is disabled or enabled on `socket`.
 """
 function quickack(sock::Union{TCPServer, TCPSocket}, enable::Bool)
-    Sockets.iolock_begin()
-    Sockets.check_open(sock)
+    iolock_begin()
+    check_open(sock)
     @static if Sys.islinux()
         # tcp_quickack is a linux only option
         if ccall(:jl_tcp_quickack, Cint, (Ptr{Cvoid}, Cint), sock.handle, Cint(enable)) < 0
             @warn "Networking unoptimized ( Error enabling TCP_QUICKACK : $(Libc.strerror(Libc.errno())) )" maxlog=1
         end
     end
-    Sockets.iolock_end()
+    iolock_end()
     nothing
 end
 
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index b00eeeee2d068..90a281050d150 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -196,6 +196,31 @@ end
 
 
 @testset "getnameinfo on some unroutable IP addresses (RFC 5737)" begin
+    try
+        getnameinfo(ip"192.0.2.1")
+        getnameinfo(ip"198.51.100.1")
+        getnameinfo(ip"203.0.113.1")
+        getnameinfo(ip"0.1.1.1")
+        getnameinfo(ip"::ffff:0.1.1.1")
+        getnameinfo(ip"::ffff:192.0.2.1")
+        getnameinfo(ip"2001:db8::1")
+    catch
+        # NOTE: Default Ubuntu installations contain a faulty DNS configuration
+        # that returns `EAI_AGAIN` instead of `EAI_NONAME`.  To fix this, try
+        # installing `libnss-resolve`, which installs the `systemd-resolve`
+        # backend for NSS, which should fix it.
+        #
+        # If you are running tests inside Docker, you'll need to install
+        # `libnss-resolve` both outside Docker (i.e. on the host machine) and
+        # inside the Docker container.
+        if Sys.islinux()
+            error_msg = string(
+                "`getnameinfo` failed on an unroutable IP address. ",
+                "If your DNS setup seems to be working, try installing libnss-resolve",
+            )
+            @error(error_msg)
+        end
+    end
     @test getnameinfo(ip"192.0.2.1") == "192.0.2.1"
     @test getnameinfo(ip"198.51.100.1") == "198.51.100.1"
     @test getnameinfo(ip"203.0.113.1") == "203.0.113.1"
@@ -526,17 +551,42 @@ end
         r = @async close(s)
         @test_throws Base._UVError("connect", Base.UV_ECANCELED) Sockets.wait_connected(s)
         fetch(r)
+        close(srv)
     end
 end
 
 @testset "iswritable" begin
     let addr = Sockets.InetAddr(ip"127.0.0.1", 4445)
         srv = listen(addr)
-        s = Sockets.TCPSocket()
-        Sockets.connect!(s, addr)
-        @test iswritable(s)
-        close(s)
-        @test !iswritable(s)
+        let s = Sockets.TCPSocket()
+            Sockets.connect!(s, addr)
+            @test iswritable(s) broken=Sys.iswindows()
+            close(s)
+            @test !iswritable(s)
+        end
+        let s = Sockets.connect(addr)
+            @test iswritable(s)
+            closewrite(s)
+            @test !iswritable(s)
+            close(s)
+        end
+        close(srv)
+        srv = listen(addr)
+        let s = Sockets.connect(addr)
+            let c = accept(srv)
+                Base.errormonitor(@async try; write(c, c); finally; close(c); end)
+            end
+            @test iswritable(s)
+            write(s, "hello world\n")
+            closewrite(s)
+            @test !iswritable(s)
+            @test isreadable(s)
+            @test read(s, String) == "hello world\n"
+            @test !isreadable(s)
+            @test !isopen(s)
+            close(s)
+        end
+        close(srv)
     end
 end
 
diff --git a/stdlib/SparseArrays/src/SparseArrays.jl b/stdlib/SparseArrays/src/SparseArrays.jl
index e3fcd1ef955c9..727abddbb62e8 100644
--- a/stdlib/SparseArrays/src/SparseArrays.jl
+++ b/stdlib/SparseArrays/src/SparseArrays.jl
@@ -6,28 +6,21 @@ Support for sparse arrays. Provides `AbstractSparseArray` and subtypes.
 module SparseArrays
 
 using Base: ReshapedArray, promote_op, setindex_shape_check, to_shape, tail,
-    require_one_based_indexing
+    require_one_based_indexing, promote_eltype
 using Base.Sort: Forward
 using LinearAlgebra
+using LinearAlgebra: AdjOrTrans, matprod
 
 import Base: +, -, *, \, /, &, |, xor, ==, zero
 import LinearAlgebra: mul!, ldiv!, rdiv!, cholesky, adjoint!, diag, eigen, dot,
     issymmetric, istril, istriu, lu, tr, transpose!, tril!, triu!, isbanded,
-    cond, diagm, factorize, ishermitian, norm, opnorm, lmul!, rmul!, tril, triu, matprod
+    cond, diagm, factorize, ishermitian, norm, opnorm, lmul!, rmul!, tril, triu
 
-import Base: acos, acosd, acot, acotd, acsch, asech, asin, asind, asinh,
-    atan, atand, atanh, broadcast!, conj!, cos, cosc, cosd, cosh, cospi, cot,
-    cotd, coth, count, csc, cscd, csch,
-    exp10, exp2, findprev, findnext, floor, hash, argmin, inv,
-    log10, log2, sec, secd, sech, show,
-    sin, sinc, sind, sinh, sinpi, dropdims, sum, summary, tan,
-    tand, tanh, trunc, abs, abs2,
-    broadcast, ceil, complex, conj, convert, copy, copyto!, adjoint,
-    exp, expm1, findall, findmax, findmin, float, getindex,
-    vcat, hcat, hvcat, cat, imag, argmax, kron, kron!, length, log, log1p, max, min,
-    maximum, minimum, one, promote_eltype, real, reshape, rot180,
-    rotl90, rotr90, round, setindex!, similar, size, transpose,
-    vec, permute!, map, map!, Array, diff, circshift!, circshift
+import Base: adjoint, argmin, argmax, Array, broadcast, circshift!, complex, Complex,
+    conj, conj!, convert, copy, copy!, copyto!, count, diff, findall, findmax, findmin,
+    float, getindex, imag, inv, kron, kron!, length, map, maximum, minimum, permute!, real,
+    rot180, rotl90, rotr90, setindex!, show, similar, size, sum, transpose,
+    vcat, hcat, hvcat, cat, vec
 
 using Random: default_rng, AbstractRNG, randsubseq, randsubseq!
 
diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl
index a5941da764883..70cf7a2b9ba89 100644
--- a/stdlib/SparseArrays/src/higherorderfns.jl
+++ b/stdlib/SparseArrays/src/higherorderfns.jl
@@ -840,13 +840,19 @@ function _outer(trans::Tf, x, y) where Tf
     @inbounds colptrC[1] = 1
     @inbounds for jj = 1:nnzy
         yval = nzvalsy[jj]
-        iszero(yval) && continue
+        if iszero(yval)
+            nnzC -= nnzx
+            continue
+        end
         col = rowvaly[jj]
         yval = trans(yval)
 
         for ii = 1:nnzx
             xval = nzvalsx[ii]
-            iszero(xval) && continue
+            if iszero(xval)
+                nnzC -= 1
+                continue
+            end
             idx += 1
             colptrC[col+1] += 1
             rowvalC[idx] = rowvalx[ii]
@@ -854,6 +860,8 @@ function _outer(trans::Tf, x, y) where Tf
         end
     end
     cumsum!(colptrC, colptrC)
+    resize!(rowvalC, nnzC)
+    resize!(nzvalsC, nnzC)
 
     return SparseMatrixCSC(nx, ny, colptrC, rowvalC, nzvalsC)
 end
diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl
index cf0b2a75f1282..357d468b42e3e 100644
--- a/stdlib/SparseArrays/src/linalg.jl
+++ b/stdlib/SparseArrays/src/linalg.jl
@@ -1363,7 +1363,15 @@ end
     end
     return C
 end
-
+@inline function kron!(C::SparseMatrixCSC, A::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}, B::AbstractSparseMatrixCSC)
+    return kron!(C, copy(A), B)
+end
+@inline function kron!(C::SparseMatrixCSC, A::AbstractSparseMatrixCSC, B::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC})
+    return kron!(C, A, copy(B))
+end
+@inline function kron!(C::SparseMatrixCSC, A::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}, B::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC})
+    return kron!(C, copy(A), copy(B))
+end
 @inline function kron!(z::SparseVector, x::SparseVector, y::SparseVector)
     nnzx = nnz(x); nnzy = nnz(y);
     nzind = nonzeroinds(z)
@@ -1391,6 +1399,11 @@ function kron(A::AbstractSparseMatrixCSC{T1,S1}, B::AbstractSparseMatrixCSC{T2,S
     sizehint!(C, nnz(A)*nnz(B))
     return @inbounds kron!(C, A, B)
 end
+kron(A::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}, B::AbstractSparseMatrixCSC) = kron(copy(A), B)
+kron(A::AbstractSparseMatrixCSC, B::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}) = kron(A, copy(B))
+function kron(A::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}, B::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC})
+    return kron(copy(A), copy(B))
+end
 
 # sparse vector ⊗ sparse vector
 function kron(x::SparseVector{T1,S1}, y::SparseVector{T2,S2}) where {T1,S1,T2,S2}
@@ -1407,21 +1420,29 @@ Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::AbstractSparseMatrixCSC, x
 Base.@propagate_inbounds kron!(C::SparseMatrixCSC, x::SparseVector, A::AbstractSparseMatrixCSC) = kron!(C, SparseMatrixCSC(x), A)
 
 kron(A::AbstractSparseMatrixCSC, x::SparseVector) = kron(A, SparseMatrixCSC(x))
+kron(A::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}, x::SparseVector) =
+    kron(copy(A), x)
 kron(x::SparseVector, A::AbstractSparseMatrixCSC) = kron(SparseMatrixCSC(x), A)
+kron(x::SparseVector, A::AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}) =
+    kron(x, copy(A))
 
 # sparse vec/mat ⊗ vec/mat and vice versa
 Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::Union{SparseVector,AbstractSparseMatrixCSC}, B::VecOrMat) = kron!(C, A, sparse(B))
 Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::VecOrMat, B::Union{SparseVector,AbstractSparseMatrixCSC}) = kron!(C, sparse(A), B)
 
-kron(A::Union{SparseVector,AbstractSparseMatrixCSC}, B::VecOrMat) = kron(A, sparse(B))
-kron(A::VecOrMat, B::Union{SparseVector,AbstractSparseMatrixCSC}) = kron(sparse(A), B)
+kron(A::Union{SparseVector,AbstractSparseMatrixCSC,AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}}, B::VecOrMat) =
+    kron(A, sparse(B))
+kron(A::VecOrMat, B::Union{SparseVector,AbstractSparseMatrixCSC,AdjOrTrans{<:Any,<:AbstractSparseMatrixCSC}}) =
+    kron(sparse(A), B)
 
 # sparse vec/mat ⊗ Diagonal and vice versa
 Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::Diagonal{T}, B::Union{SparseVector{S}, AbstractSparseMatrixCSC{S}}) where {T<:Number, S<:Number} = kron!(C, sparse(A), B)
 Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::Union{SparseVector{T}, AbstractSparseMatrixCSC{T}}, B::Diagonal{S}) where {T<:Number, S<:Number} = kron!(C, A, sparse(B))
 
-kron(A::Diagonal{T}, B::Union{SparseVector{S}, AbstractSparseMatrixCSC{S}}) where {T<:Number, S<:Number} = kron(sparse(A), B)
-kron(A::Union{SparseVector{T}, AbstractSparseMatrixCSC{T}}, B::Diagonal{S}) where {T<:Number, S<:Number} = kron(A, sparse(B))
+kron(A::Diagonal{T}, B::Union{SparseVector{S}, AbstractSparseMatrixCSC{S}, AdjOrTrans{S,<:AbstractSparseMatrixCSC}}) where {T<:Number, S<:Number} =
+    kron(sparse(A), B)
+kron(A::Union{SparseVector{T}, AbstractSparseMatrixCSC{T}, AdjOrTrans{S,<:AbstractSparseMatrixCSC}}, B::Diagonal{S}) where {T<:Number, S<:Number} =
+    kron(A, sparse(B))
 
 # sparse outer product
 kron!(C::SparseMatrixCSC, A::SparseVectorUnion, B::AdjOrTransSparseVectorUnion) = broadcast!(*, C, A, B)
diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
index af5eb4e4ab726..74c215fbcc710 100644
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ b/stdlib/SparseArrays/src/sparsematrix.jl
@@ -563,6 +563,8 @@ SparseMatrixCSC(M::Matrix) = sparse(M)
 SparseMatrixCSC(T::Tridiagonal{Tv}) where Tv = SparseMatrixCSC{Tv,Int}(T)
 function SparseMatrixCSC{Tv,Ti}(T::Tridiagonal) where {Tv,Ti}
     m = length(T.d)
+    m == 0 && return SparseMatrixCSC{Tv,Ti}(0, 0, ones(Ti, 1), Ti[], Tv[])
+    m == 1 && return SparseMatrixCSC{Tv,Ti}(1, 1, Ti[1, 2], Ti[1], Tv[T.d[1]])
 
     colptr = Vector{Ti}(undef, m+1)
     colptr[1] = 1
@@ -593,6 +595,8 @@ end
 SparseMatrixCSC(T::SymTridiagonal{Tv}) where Tv = SparseMatrixCSC{Tv,Int}(T)
 function SparseMatrixCSC{Tv,Ti}(T::SymTridiagonal) where {Tv,Ti}
     m = length(T.dv)
+    m == 0 && return SparseMatrixCSC{Tv,Ti}(0, 0, ones(Ti, 1), Ti[], Tv[])
+    m == 1 && return SparseMatrixCSC{Tv,Ti}(1, 1, Ti[1, 2], Ti[1], Tv[T.dv[1]])
 
     colptr = Vector{Ti}(undef, m+1)
     colptr[1] = 1
@@ -623,7 +627,7 @@ end
 SparseMatrixCSC(B::Bidiagonal{Tv}) where Tv = SparseMatrixCSC{Tv,Int}(B)
 function SparseMatrixCSC{Tv,Ti}(B::Bidiagonal) where {Tv,Ti}
     m = length(B.dv)
-    m == 0 && return SparseMatrixCSC{Tv,Ti}(zeros(Tv, 0, 0))
+    m == 0 && return SparseMatrixCSC{Tv,Ti}(0, 0, ones(Ti, 1), Ti[], Tv[])
 
     colptr = Vector{Ti}(undef, m+1)
     colptr[1] = 1
@@ -652,8 +656,33 @@ end
 SparseMatrixCSC(D::Diagonal{Tv}) where Tv = SparseMatrixCSC{Tv,Int}(D)
 function SparseMatrixCSC{Tv,Ti}(D::Diagonal) where {Tv,Ti}
     m = length(D.diag)
-    return SparseMatrixCSC(m, m, Vector(1:(m+1)), Vector(1:m), Vector{Tv}(D.diag))
+    m == 0 && return SparseMatrixCSC{Tv,Ti}(zeros(Tv, 0, 0))
+
+    nz = count(!iszero, D.diag)
+    nz_counter = 1
+
+    rowval = Vector{Ti}(undef, nz)
+    nzval =  Vector{Tv}(undef, nz)
+
+    nz == 0 && return SparseMatrixCSC{Tv,Ti}(m, m, ones(Ti, m+1), rowval, nzval)
+
+    colptr = Vector{Ti}(undef, m+1)
+
+    @inbounds for i=1:m
+        if !iszero(D.diag[i])
+            colptr[i] = nz_counter
+            rowval[nz_counter] = i
+            nzval[nz_counter]  = D.diag[i]
+            nz_counter += 1
+        else
+            colptr[i] = nz_counter
+        end
+    end
+    colptr[end] = nz_counter
+
+    return SparseMatrixCSC{Tv,Ti}(m, m, colptr, rowval, nzval)
 end
+
 SparseMatrixCSC(M::AbstractMatrix{Tv}) where {Tv} = SparseMatrixCSC{Tv,Int}(M)
 SparseMatrixCSC{Tv}(M::AbstractMatrix{Tv}) where {Tv} = SparseMatrixCSC{Tv,Int}(M)
 function SparseMatrixCSC{Tv,Ti}(M::AbstractMatrix) where {Tv,Ti}
@@ -1642,13 +1671,13 @@ argument specifies a random number generator, see [Random Numbers](@ref).
 ```jldoctest; setup = :(using Random; Random.seed!(1234))
 julia> sprand(Bool, 2, 2, 0.5)
 2×2 SparseMatrixCSC{Bool, Int64} with 2 stored entries:
- ⋅  ⋅
  1  1
+ ⋅  ⋅
 
 julia> sprand(Float64, 3, 0.75)
 3-element SparseVector{Float64, Int64} with 2 stored entries:
-  [1]  =  0.523355
-  [2]  =  0.0890391
+  [1]  =  0.795547
+  [2]  =  0.49425
 ```
 """
 function sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn::Function, ::Type{T}=eltype(rfn(r, 1))) where T
@@ -1690,8 +1719,8 @@ argument specifies a random number generator, see [Random Numbers](@ref).
 ```jldoctest; setup = :(using Random; Random.seed!(0))
 julia> sprandn(2, 2, 0.75)
 2×2 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
- -1.92631  -0.858041
-   ⋅        0.0213808
+ -1.20577     ⋅
+  0.311817  -0.234641
 ```
 """
 sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) =
@@ -2168,8 +2197,10 @@ end
 _isless_fm(a, b)    =  b == b && ( a != a || isless(a, b) )
 _isgreater_fm(a, b) =  b == b && ( a != a || isless(b, a) )
 
-findmin(A::AbstractSparseMatrixCSC{Tv,Ti}, region) where {Tv,Ti} = _findr(_isless_fm, A, region, Tv)
-findmax(A::AbstractSparseMatrixCSC{Tv,Ti}, region) where {Tv,Ti} = _findr(_isgreater_fm, A, region, Tv)
+findmin(A::AbstractSparseMatrixCSC{Tv}, region::Union{Integer,Tuple{Integer},NTuple{2,Integer}}) where {Tv} =
+    _findr(_isless_fm, A, region, Tv)
+findmax(A::AbstractSparseMatrixCSC{Tv}, region::Union{Integer,Tuple{Integer},NTuple{2,Integer}}) where {Tv} =
+    _findr(_isgreater_fm, A, region, Tv)
 findmin(A::AbstractSparseMatrixCSC) = (r=findmin(A,(1,2)); (r[1][1], r[2][1]))
 findmax(A::AbstractSparseMatrixCSC) = (r=findmax(A,(1,2)); (r[1][1], r[2][1]))
 
@@ -2185,7 +2216,7 @@ end
 getindex(A::AbstractSparseMatrixCSC, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2])
 
 function getindex(A::AbstractSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T
-    if !(1 <= i0 <= size(A, 1) && 1 <= i1 <= size(A, 2)); throw(BoundsError()); end
+    @boundscheck checkbounds(A, i0, i1)
     r1 = Int(getcolptr(A)[i1])
     r2 = Int(getcolptr(A)[i1+1]-1)
     (r1 > r2) && return zero(T)
@@ -3836,3 +3867,91 @@ end
 
 circshift!(O::AbstractSparseMatrixCSC, X::AbstractSparseMatrixCSC, (r,)::Base.DimsInteger{1}) = circshift!(O, X, (r,0))
 circshift!(O::AbstractSparseMatrixCSC, X::AbstractSparseMatrixCSC, r::Real) = circshift!(O, X, (Integer(r),0))
+
+## swaprows! / swapcols!
+macro swap(a, b)
+    esc(:(($a, $b) = ($b, $a)))
+end
+
+function Base.swapcols!(A::AbstractSparseMatrixCSC, i, j)
+    i == j && return
+
+    # For simplicitly, let i denote the smaller of the two columns
+    j < i && @swap(i, j)
+
+    colptr = getcolptr(A)
+    irow = colptr[i]:(colptr[i+1]-1)
+    jrow = colptr[j]:(colptr[j+1]-1)
+
+    function rangeexchange!(arr, irow, jrow)
+        if length(irow) == length(jrow)
+            for (a, b) in zip(irow, jrow)
+                @inbounds @swap(arr[i], arr[j])
+            end
+            return
+        end
+        # This is similar to the triple-reverse tricks for
+        # circshift!, except that we have three ranges here,
+        # so it ends up being 4 reverse calls (but still
+        # 2 overall reversals for the memory range). Like
+        # circshift!, there's also a cycle chasing algorithm
+        # with optimal memory complexity, but the performance
+        # tradeoffs against this implementation are non-trivial,
+        # so let's just do this simple thing for now.
+        # See https://github.com/JuliaLang/julia/pull/42676 for
+        # discussion of circshift!-like algorithms.
+        reverse!(@view arr[irow])
+        reverse!(@view arr[jrow])
+        reverse!(@view arr[(last(irow)+1):(first(jrow)-1)])
+        reverse!(@view arr[first(irow):last(jrow)])
+    end
+    rangeexchange!(rowvals(A), irow, jrow)
+    rangeexchange!(nonzeros(A), irow, jrow)
+
+    if length(irow) != length(jrow)
+        @inbounds colptr[i+1:j] .+= length(jrow) - length(irow)
+    end
+    return nothing
+end
+
+function Base.swaprows!(A::AbstractSparseMatrixCSC, i, j)
+    # For simplicitly, let i denote the smaller of the two rows
+    j < i && @swap(i, j)
+
+    rows = rowvals(A)
+    vals = nonzeros(A)
+    for col = 1:size(A, 2)
+        rr = nzrange(A, col)
+        iidx = searchsortedfirst(@view(rows[rr]), i)
+        has_i = iidx <= length(rr) && rows[rr[iidx]] == i
+
+        jrange = has_i ? (iidx:last(rr)) : rr
+        jidx = searchsortedlast(@view(rows[jrange]), j)
+        has_j = jidx != 0 && rows[jrange[jidx]] == j
+
+        if !has_j && !has_i
+            # Has neither row - nothing to do
+            continue
+        elseif has_i && has_j
+            # This column had both i and j rows - swap them
+            @swap(vals[rr[iidx]], vals[jrange[jidx]])
+        elseif has_i
+            # Update the rowval and then rotate both nonzeros
+            # and the remaining rowvals into the correct place
+            rows[rr[iidx]] = j
+            jidx == 0 && continue
+            rotate_range = rr[iidx]:jrange[jidx]
+            circshift!(@view(vals[rotate_range]), -1)
+            circshift!(@view(rows[rotate_range]), -1)
+        else
+            # Same as i, but in the opposite direction
+            @assert has_j
+            rows[jrange[jidx]] = i
+            iidx > length(rr) && continue
+            rotate_range = rr[iidx]:jrange[jidx]
+            circshift!(@view(vals[rotate_range]), 1)
+            circshift!(@view(rows[rotate_range]), 1)
+        end
+    end
+    return nothing
+end
diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl
index 55ad738a7eb77..c8f4eaaada8e0 100644
--- a/stdlib/SparseArrays/src/sparsevector.jl
+++ b/stdlib/SparseArrays/src/sparsevector.jl
@@ -1372,7 +1372,7 @@ end
 
 ### Reduction
 
-function _sum(f, x::AbstractSparseVector)
+function sum(f, x::AbstractSparseVector)
     n = length(x)
     n > 0 || return sum(f, nonzeros(x)) # return zero() of proper type
     m = nnz(x)
@@ -1381,11 +1381,9 @@ function _sum(f, x::AbstractSparseVector)
      Base.add_sum((n - m) * f(zero(eltype(x))), sum(f, nonzeros(x))))
 end
 
-sum(f::Union{Function, Type}, x::AbstractSparseVector) = _sum(f, x) # resolve ambiguity
-sum(f, x::AbstractSparseVector) = _sum(f, x)
 sum(x::AbstractSparseVector) = sum(nonzeros(x))
 
-function _maximum(f, x::AbstractSparseVector)
+function maximum(f, x::AbstractSparseVector)
     n = length(x)
     if n == 0
         if f === abs || f === abs2
@@ -1400,11 +1398,9 @@ function _maximum(f, x::AbstractSparseVector)
      max(f(zero(eltype(x))), maximum(f, nonzeros(x))))
 end
 
-maximum(f::Union{Function, Type}, x::AbstractSparseVector) = _maximum(f, x) # resolve ambiguity
-maximum(f, x::AbstractSparseVector) = _maximum(f, x)
 maximum(x::AbstractSparseVector) = maximum(identity, x)
 
-function _minimum(f, x::AbstractSparseVector)
+function minimum(f, x::AbstractSparseVector)
     n = length(x)
     n > 0 || throw(ArgumentError("minimum over an empty array is not allowed."))
     m = nnz(x)
@@ -1413,10 +1409,29 @@ function _minimum(f, x::AbstractSparseVector)
      min(f(zero(eltype(x))), minimum(f, nonzeros(x))))
 end
 
-minimum(f::Union{Function, Type}, x::AbstractSparseVector) = _minimum(f, x) # resolve ambiguity
-minimum(f, x::AbstractSparseVector) = _minimum(f, x)
 minimum(x::AbstractSparseVector) = minimum(identity, x)
 
+for (fun, comp, word) in ((:findmin, :(<), "minimum"), (:findmax, :(>), "maximum"))
+    @eval function $fun(f, x::AbstractSparseVector{T}) where {T}
+        n = length(x)
+        n > 0 || throw(ArgumentError($word * " over empty array is not allowed"))
+        nzvals = nonzeros(x)
+        m = length(nzvals)
+        m == 0 && return zero(T), firstindex(x)
+        val, index = $fun(f, nzvals)
+        m == n && return val, index
+        nzinds = nonzeroinds(x)
+        zeroval = f(zero(T))
+        $comp(val, zeroval) && return val, nzinds[index]
+        # we need to find the first zero, which could be stored or implicit
+        # we try to avoid findfirst(iszero, x)
+        sindex = findfirst(iszero, nzvals) # first stored zero, if any
+        zindex = findfirst(i -> i < nzinds[i], eachindex(nzinds)) # first non-stored zero
+        index = isnothing(sindex) ? zindex : min(sindex, zindex)
+        return zeroval, index
+    end
+end
+
 norm(x::SparseVectorUnion, p::Real=2) = norm(nonzeros(x), p)
 
 ### linalg.jl
@@ -2085,18 +2100,6 @@ function fill!(A::Union{SparseVector, AbstractSparseMatrixCSC}, x)
     return A
 end
 
-
-
-# in-place swaps (dense) blocks start:split and split+1:fin in col
-function _swap!(col::AbstractVector, start::Integer, fin::Integer, split::Integer)
-    split == fin && return
-    reverse!(col, start, split)
-    reverse!(col, split + 1, fin)
-    reverse!(col, start, fin)
-    return
-end
-
-
 # in-place shifts a sparse subvector by r. Used also by sparsematrix.jl
 function subvector_shifter!(R::AbstractVector, V::AbstractVector, start::Integer, fin::Integer, m::Integer, r::Integer)
     split = fin
@@ -2110,16 +2113,14 @@ function subvector_shifter!(R::AbstractVector, V::AbstractVector, start::Integer
         end
     end
     # ...but rowval should be sorted within columns
-    _swap!(R, start, fin, split)
-    _swap!(V, start, fin, split)
+    circshift!(@view(R[start:fin]), split-start+1)
+    circshift!(@view(V[start:fin]), split-start+1)
 end
 
-
 function circshift!(O::SparseVector, X::SparseVector, (r,)::Base.DimsInteger{1})
     copy!(O, X)
     subvector_shifter!(nonzeroinds(O), nonzeros(O), 1, length(nonzeroinds(O)), length(O), mod(r, length(X)))
     return O
 end
 
-
 circshift!(O::SparseVector, X::SparseVector, r::Real,) = circshift!(O, X, (Integer(r),))
diff --git a/stdlib/SparseArrays/test/higherorderfns.jl b/stdlib/SparseArrays/test/higherorderfns.jl
index af8d15e57375e..8da605cf6c0c0 100644
--- a/stdlib/SparseArrays/test/higherorderfns.jl
+++ b/stdlib/SparseArrays/test/higherorderfns.jl
@@ -626,7 +626,8 @@ end
     @test 2 .* ((1:5) .+ A) == 2:2:10
     @test 2 .* (A .+ (1:5)) == 2:2:10
 
-    @test Diagonal(spzeros(5)) \ view(rand(10), 1:5) == [Inf,Inf,Inf,Inf,Inf]
+    # lu(zeros(5,5)) throw SingularException, see #42343
+    @test_throws SingularException Diagonal(spzeros(5)) \ view(rand(10), 1:5)
 end
 
 @testset "Issue #27836" begin
@@ -726,4 +727,13 @@ end
     @test extrema(x; dims=[]) == extrema(y; dims=[])
 end
 
+@testset "issue #42670 - error in sparsevec outer product" begin
+    A = spzeros(Int, 4)
+    B = copy(A)
+    C = sparsevec([0 0 1 1 0 0])'
+    A[2] = 1
+    A[2] = 0
+    @test A * C == B * C == spzeros(Int, 4, 6)
+end
+
 end # module
diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl
index 221997a8f3384..4a9a6477af9f9 100644
--- a/stdlib/SparseArrays/test/sparse.jl
+++ b/stdlib/SparseArrays/test/sparse.jl
@@ -454,29 +454,35 @@ end
         c_di = Diagonal(rand(m)); c = sparse(c_di); c_d = Array(c_di)
         d_di = Diagonal(rand(n)); d = sparse(d_di); d_d = Array(d_di)
         # mat ⊗ mat
-        @test Array(kron(a, b)) == kron(a_d, b_d)
-        @test Array(kron(a_d, b)) == kron(a_d, b_d)
-        @test Array(kron(a, b_d)) == kron(a_d, b_d)
-        @test issparse(kron(c, d_di))
-        @test Array(kron(c, d_di)) == kron(c_d, d_d)
-        @test issparse(kron(c_di, d))
-        @test Array(kron(c_di, d)) == kron(c_d, d_d)
-        @test issparse(kron(c_di, y))
-        @test Array(kron(c_di, y)) == kron(c_di, y_d)
-        @test issparse(kron(x, d_di))
-        @test Array(kron(x, d_di)) == kron(x_d, d_di)
+        for t in (identity, adjoint, transpose)
+            @test Array(kron(t(a), b)::SparseMatrixCSC) == kron(t(a_d), b_d)
+            @test Array(kron(a, t(b))::SparseMatrixCSC) == kron(a_d, t(b_d))
+            @test Array(kron(t(a), t(b))::SparseMatrixCSC) == kron(t(a_d), t(b_d))
+            @test Array(kron(a_d, t(b))::SparseMatrixCSC) == kron(a_d, t(b_d))
+            @test Array(kron(t(a), b_d)::SparseMatrixCSC) == kron(t(a_d), b_d)
+            @test issparse(kron(c, d_di))
+            @test Array(kron(c, d_di)) == kron(c_d, d_d)
+            @test issparse(kron(c_di, d))
+            @test Array(kron(c_di, d)) == kron(c_d, d_d)
+            @test issparse(kron(c_di, y))
+            @test Array(kron(c_di, y)) == kron(c_di, y_d)
+            @test issparse(kron(x, d_di))
+            @test Array(kron(x, d_di)) == kron(x_d, d_di)
+        end
         # vec ⊗ vec
         @test Vector(kron(x, y)) == kron(x_d, y_d)
         @test Vector(kron(x_d, y)) == kron(x_d, y_d)
         @test Vector(kron(x, y_d)) == kron(x_d, y_d)
-        # mat ⊗ vec
-        @test Array(kron(a, y)) == kron(a_d, y_d)
-        @test Array(kron(a_d, y)) == kron(a_d, y_d)
-        @test Array(kron(a, y_d)) == kron(a_d, y_d)
-        # vec ⊗ mat
-        @test Array(kron(x, b)) == kron(x_d, b_d)
-        @test Array(kron(x_d, b)) == kron(x_d, b_d)
-        @test Array(kron(x, b_d)) == kron(x_d, b_d)
+        for t in (identity, adjoint, transpose)
+            # mat ⊗ vec
+            @test Array(kron(t(a), y)::SparseMatrixCSC) == kron(t(a_d), y_d)
+            @test Array(kron(t(a_d), y)) == kron(t(a_d), y_d)
+            @test Array(kron(t(a), y_d)::SparseMatrixCSC) == kron(t(a_d), y_d)
+            # vec ⊗ mat
+            @test Array(kron(x, t(b))::SparseMatrixCSC) == kron(x_d, t(b_d))
+            @test Array(kron(x_d, t(b))::SparseMatrixCSC) == kron(x_d, t(b_d))
+            @test Array(kron(x, t(b_d))) == kron(x_d, t(b_d))
+        end
         # vec ⊗ vec'
         @test issparse(kron(v, y'))
         @test issparse(kron(x, y'))
@@ -786,6 +792,8 @@ end
     end
 
     @testset "empty cases" begin
+        errchecker(str) = occursin("reducing over an empty collection is not allowed", str) ||
+                          occursin("collection slices must be non-empty", str)
         @test sum(sparse(Int[])) === 0
         @test prod(sparse(Int[])) === 1
         @test_throws ArgumentError minimum(sparse(Int[]))
@@ -798,9 +806,9 @@ end
             @test isequal(f(spzeros(0, 1), dims=3), f(Matrix{Int}(I, 0, 1), dims=3))
         end
         for f in (minimum, maximum, findmin, findmax)
-            @test_throws ArgumentError f(spzeros(0, 1), dims=1)
+            @test_throws errchecker f(spzeros(0, 1), dims=1)
             @test isequal(f(spzeros(0, 1), dims=2), f(Matrix{Int}(I, 0, 1), dims=2))
-            @test_throws ArgumentError f(spzeros(0, 1), dims=(1, 2))
+            @test_throws errchecker f(spzeros(0, 1), dims=(1, 2))
             @test isequal(f(spzeros(0, 1), dims=3), f(Matrix{Int}(I, 0, 1), dims=3))
         end
     end
@@ -1689,6 +1697,41 @@ end
     S2 = SparseMatrixCSC(D)
     @test Array(D) == Array(S) == Array(S2)
     @test S == S2
+
+    # An issue discovered in #42574 where
+    # SparseMatrixCSC{Tv, Ti}(::Diagonal) ignored Ti
+    D = Diagonal(rand(3))
+    S = SparseMatrixCSC{Float64, Int8}(D)
+    @test S isa SparseMatrixCSC{Float64, Int8}
+end
+
+@testset "Sparse construction with empty/1x1 structured matrices" begin
+    empty = spzeros(0, 0)
+
+    @test sparse(Diagonal(zeros(0, 0))) == empty
+    @test sparse(Bidiagonal(zeros(0, 0), :U)) == empty
+    @test sparse(Bidiagonal(zeros(0, 0), :L)) == empty
+    @test sparse(SymTridiagonal(zeros(0, 0))) == empty
+    @test sparse(Tridiagonal(zeros(0, 0))) == empty
+
+    one_by_one = rand(1,1)
+    sp_one_by_one = sparse(one_by_one)
+
+    @test sparse(Diagonal(one_by_one)) == sp_one_by_one
+    @test sparse(Bidiagonal(one_by_one, :U)) == sp_one_by_one
+    @test sparse(Bidiagonal(one_by_one, :L)) == sp_one_by_one
+    @test sparse(Tridiagonal(one_by_one)) == sp_one_by_one
+
+    s = SymTridiagonal(rand(1), rand(0))
+    @test sparse(s) == s
+end
+
+@testset "avoid allocation for zeros in diagonal" begin
+    x = [1, 0, 0, 5, 0]
+    d = Diagonal(x)
+    s = sparse(d)
+    @test s == d
+    @test nnz(s) == 2
 end
 
 @testset "error conditions for reshape, and dropdims" begin
@@ -1739,10 +1782,10 @@ end
 end
 
 @testset "droptol" begin
-    local A = guardseed(1234321) do
+    A = guardseed(1234321) do
         triu(sprand(10, 10, 0.2))
     end
-    @test getcolptr(SparseArrays.droptol!(A, 0.01)) == [1, 1, 1, 1, 3, 3, 5, 6, 8, 11, 12]
+    @test getcolptr(SparseArrays.droptol!(A, 0.01)) == [1, 1, 1, 1, 2, 2, 2, 4, 4, 5, 5]
     @test isequal(SparseArrays.droptol!(sparse([1], [1], [1]), 1), SparseMatrixCSC(1, 1, Int[1, 1], Int[], Int[]))
 end
 
@@ -2074,7 +2117,7 @@ end
 end
 
 @testset "sparse matrix opnormestinv" begin
-    Random.seed!(1234)
+    Random.seed!(1235)
     Ac = sprandn(20,20,.5) + im* sprandn(20,20,.5)
     Aci = ceil.(Int64, 100*sprand(20,20,.5)) + im*ceil.(Int64, sprand(20,20,.5))
     Ar = sprandn(20,20,.5)
@@ -3259,4 +3302,26 @@ end
     @test eval(Meta.parse(repr(m))) == m
 end
 
+using Base: swaprows!, swapcols!
+@testset "swaprows!, swapcols!" begin
+    S = sparse(
+        [ 0   0  0  0  0   0
+          0  -1  1  1  0   0
+          0   0  0  1  1   0
+          0   0  1  1  1  -1])
+
+    for (f!, i, j) in
+            ((swaprows!, 1, 2), # Test swapping rows where one row is fully sparse
+             (swaprows!, 2, 3), # Test swapping rows of unequal length
+             (swaprows!, 2, 4), # Test swapping non-adjacent rows
+             (swapcols!, 1, 2), # Test swapping columns where one column is fully sparse
+             (swapcols!, 2, 3), # Test swapping coulms of unequal length
+             (swapcols!, 2, 4)) # Test swapping non-adjacent columns
+        Scopy = copy(S)
+        Sdense = Array(S)
+        f!(Scopy, i, j); f!(Sdense, i, j)
+        @test Scopy == Sdense
+    end
+end
+
 end # module
diff --git a/stdlib/SparseArrays/test/sparsevector.jl b/stdlib/SparseArrays/test/sparsevector.jl
index 2dc9738111a87..f6e7be312b08c 100644
--- a/stdlib/SparseArrays/test/sparsevector.jl
+++ b/stdlib/SparseArrays/test/sparsevector.jl
@@ -816,10 +816,14 @@ end
     @test norm(x, Inf) == 3.5
 end
 
-@testset "maximum, minimum" begin
+@testset "maximum, minimum, findmax, findmin" begin
     let x = spv_x1
         @test maximum(x) == 3.5
+        @test findmax(x) == findmax(Vector(x)) == (3.5, 6)
+        @test findmax(x -> -x, x) == findmax(-x) == (0.75, 5)
         @test minimum(x) == -0.75
+        @test findmin(x) == findmin(Vector(x)) == (-0.75, 5)
+        @test findmin(x -> -x, x) == findmin(-x) == (-3.5, 6)
         @test maximum(abs, x) == 3.5
         @test minimum(abs, x) == 0.0
         @test @inferred(minimum(t -> true, x)) === true
@@ -832,21 +836,51 @@ end
 
     let x = abs.(spv_x1)
         @test maximum(x) == 3.5
+        @test findmax(x) == findmax(Vector(x)) == (3.5, 6)
+        @test findmax(abs2, x) == findmax(abs2.(x)) == findmax(Vector(abs2.(x)))
         @test minimum(x) == 0.0
+        @test findmin(x) == findmin(Vector(x)) == (0.0, 1)
+        @test findmin(abs2, x) == findmin(abs2.(x)) == findmin(Vector(abs2.(x)))
     end
 
     let x = -abs.(spv_x1)
         @test maximum(x) == 0.0
+        @test findmax(x) == findmax(Vector(x)) == (0.0, 1)
         @test minimum(x) == -3.5
+        @test findmin(x) == findmin(Vector(x)) == (-3.5, 6)
     end
 
     let x = SparseVector(3, [1, 2, 3], [-4.5, 2.5, 3.5])
         @test maximum(x) == 3.5
+        @test findmax(x) == findmax(Vector(x)) == (3.5, 3)
         @test minimum(x) == -4.5
+        @test findmin(x) == findmin(Vector(x)) == (-4.5, 1)
         @test maximum(abs, x) == 4.5
         @test minimum(abs, x) == 2.5
     end
 
+    let x = SparseVector(3, [1, 2, 3], [4.5, 0.0, 3.5])
+        @test minimum(x) == 0.0
+        @test findmin(x) == findmin(Vector(x)) == (0.0, 2)
+    end
+
+    let x = SparseVector(3, [1, 2, 3], [-4.5, 0.0, -3.5])
+        @test maximum(x) == 0.0
+        @test findmax(x) == findmax(Vector(x)) == (0.0, 2)
+    end
+
+    for i in (2, 3)
+        let x = SparseVector(4, [1, i, 4], [4.5, 0.0, 3.5])
+            @test minimum(x) == 0.0
+            @test findmin(x) == findmin(Vector(x)) == (0.0, 2)
+        end
+
+        let x = SparseVector(4, [1, i, 4], [-4.5, 0.0, -3.5])
+            @test maximum(x) == 0.0
+            @test findmax(x) == findmax(Vector(x)) == (0.0, 2)
+        end
+    end
+
     let x = spzeros(Float64, 8)
         @test maximum(x) == 0.0
         @test minimum(x) == 0.0
@@ -861,6 +895,8 @@ end
     let x = spzeros(Float64, 0)
         @test_throws ArgumentError minimum(t -> true, x)
         @test_throws ArgumentError maximum(t -> true, x)
+        @test_throws ArgumentError findmin(x)
+        @test_throws ArgumentError findmax(x)
     end
 end
 
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index 84cdf8630e8fc..172f88f1351fc 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,2 +1,4 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = 54f9b0d999813aa9fab039f632df222ffd2a96a8
+STATISTICS_SHA1 = 5256d570d0a554780ed80949c79116f47eac6382
+STATISTICS_GIT_URL := https://github.com/JuliaLang/Statistics.jl.git
+STATISTICS_TAR_URL = https://api.github.com/repos/JuliaLang/Statistics.jl/tarball/$1
diff --git a/stdlib/SuiteSparse.version b/stdlib/SuiteSparse.version
index b9f2f184893cd..c4e296a56d94c 100644
--- a/stdlib/SuiteSparse.version
+++ b/stdlib/SuiteSparse.version
@@ -1,2 +1,4 @@
 SUITESPARSE_BRANCH = master
-SUITESPARSE_SHA1 = 76856153eef26c008f13520ffa12288e214fe02c
+SUITESPARSE_SHA1 = e4df734c3e0b54cd2275adbd923b5afaf0f7e3d0
+SUITESPARSE_GIT_URL := https://github.com/JuliaLang/SuiteSparse.jl.git
+SUITESPARSE_TAR_URL = https://api.github.com/repos/JuliaLang/SuiteSparse.jl/tarball/$1
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index b129d22665e12..b9d8f06dc4a19 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,2 +1,4 @@
 TAR_BRANCH = master
-TAR_SHA1 = ffb3dd5e697eb6690fce9cceb67edb82134f8337
+TAR_SHA1 = 6a946029685639b69ce5a7cc4c4a6c0e6c6b2697
+TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
+TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/Project.toml b/stdlib/Test/Project.toml
index 9602ce461617b..ee1ae15fd7154 100644
--- a/stdlib/Test/Project.toml
+++ b/stdlib/Test/Project.toml
@@ -6,3 +6,9 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[extras]
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[targets]
+test = ["Distributed"]
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index 98fdf45706bf7..77989abbe19e1 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -19,7 +19,8 @@ Base.runtests
 The `Test` module provides simple *unit testing* functionality. Unit testing is a way to
 see if your code is correct by checking that the results are what you expect. It can be helpful
 to ensure your code still works after you make changes, and can be used when developing as a way
-of specifying the behaviors your code should have when complete.
+of specifying the behaviors your code should have when complete. You may also want to look at the
+documentation for [adding tests to your Julia Package](https://pkgdocs.julialang.org/dev/creating-packages/#Adding-tests-to-the-package).
 
 Simple unit testing can be performed with the `@test` and `@test_throws` macros:
 
@@ -112,19 +113,19 @@ Test.TestSetException
 
 We can put our tests for the `foo(x)` function in a test set:
 
-```jldoctest testfoo
+```jldoctest testfoo; filter = r"[0-9\.]+s"
 julia> @testset "Foo Tests" begin
            @test foo("a")   == 1
            @test foo("ab")  == 4
            @test foo("abc") == 9
        end;
-Test Summary: | Pass  Total
-Foo Tests     |    3      3
+Test Summary: | Pass  Total  Time
+Foo Tests     |    3      3  0.0s
 ```
 
 Test sets can also be nested:
 
-```jldoctest testfoo
+```jldoctest testfoo; filter = r"[0-9\.]+s"
 julia> @testset "Foo Tests" begin
            @testset "Animals" begin
                @test foo("cat") == 9
@@ -135,14 +136,28 @@ julia> @testset "Foo Tests" begin
                @test foo(fill(1.0, i)) == i^2
            end
        end;
-Test Summary: | Pass  Total
-Foo Tests     |    8      8
+Test Summary: | Pass  Total  Time
+Foo Tests     |    8      8  0.0s
+```
+
+As well as call functions:
+
+```jldoctest testfoo; filter = r"[0-9\.]+s"
+julia> f(x) = @test isone(x)
+f (generic function with 1 method)
+
+julia> @testset f(1);
+Test Summary: | Pass  Total  Time
+f             |    1      1  0.0s
 ```
 
+This can be used to allow for factorization of test sets, making it easier to run individual
+test sets by running the associated functions instead.
+Note that in the case of functions, the test set will be given the name of the called function.
 In the event that a nested test set has no failures, as happened here, it will be hidden in the
 summary, unless the `verbose=true` option is passed:
 
-```jldoctest testfoo
+```jldoctest testfoo; filter = r"[0-9\.]+s"
 julia> @testset verbose = true "Foo Tests" begin
            @testset "Animals" begin
                @test foo("cat") == 9
@@ -153,17 +168,17 @@ julia> @testset verbose = true "Foo Tests" begin
                @test foo(fill(1.0, i)) == i^2
            end
        end;
-Test Summary: | Pass  Total
-Foo Tests     |    8      8
-  Animals     |    2      2
-  Arrays 1    |    2      2
-  Arrays 2    |    2      2
-  Arrays 3    |    2      2
+Test Summary: | Pass  Total  Time
+Foo Tests     |    8      8  0.0s
+  Animals     |    2      2  0.0s
+  Arrays 1    |    2      2  0.0s
+  Arrays 2    |    2      2  0.0s
+  Arrays 3    |    2      2  0.0s
 ```
 
 If we do have a test failure, only the details for the failed test sets will be shown:
 
-```julia-repl
+```julia-repl; filter = r"[0-9\.]+s"
 julia> @testset "Foo Tests" begin
            @testset "Animals" begin
                @testset "Felines" begin
@@ -183,10 +198,10 @@ Arrays: Test Failed
   Expression: foo(fill(1.0, 4)) == 15
    Evaluated: 16 == 15
 [...]
-Test Summary: | Pass  Fail  Total
-Foo Tests     |    3     1      4
-  Animals     |    2            2
-  Arrays      |    1     1      2
+Test Summary: | Pass  Fail  Total  Time
+Foo Tests     |    3     1      4  0.0s
+  Animals     |    2            2  0.0s
+  Arrays      |    1     1      2  0.0s
 ERROR: Some tests did not pass: 3 passed, 1 failed, 0 errored, 0 broken.
 ```
 
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 461c48026944e..d5cfeff6b9ea9 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -86,8 +86,9 @@ struct Pass <: Result
     data
     value
     source::Union{Nothing,LineNumberNode}
-    function Pass(test_type::Symbol, orig_expr, data, thrown, source=nothing)
-        return new(test_type, orig_expr, data, thrown isa String ? "String" : thrown, source)
+    message_only::Bool
+    function Pass(test_type::Symbol, orig_expr, data, thrown, source=nothing, message_only=false)
+        return new(test_type, orig_expr, data, thrown, source, message_only)
     end
 end
 
@@ -98,7 +99,11 @@ function Base.show(io::IO, t::Pass)
     end
     if t.test_type === :test_throws
         # The correct type of exception was thrown
-        print(io, "\n      Thrown: ", t.value isa String ? t.value : typeof(t.value))
+        if t.message_only
+            print(io, "\n     Message: ", t.value)
+        else
+            print(io, "\n      Thrown: ", typeof(t.value))
+        end
     elseif t.test_type === :test && t.data !== nothing
         # The test was an expression, so display the term-by-term
         # evaluated version as well
@@ -118,12 +123,14 @@ struct Fail <: Result
     data::Union{Nothing, String}
     value::String
     source::LineNumberNode
-    function Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode)
+    message_only::Bool
+    function Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode, message_only::Bool=false)
         return new(test_type,
             string(orig_expr),
             data === nothing ? nothing : string(data),
             string(isa(data, Type) ? typeof(value) : value),
-            source)
+            source,
+            message_only)
     end
 end
 
@@ -132,18 +139,24 @@ function Base.show(io::IO, t::Fail)
     print(io, " at ")
     printstyled(io, something(t.source.file, :none), ":", t.source.line, "\n"; bold=true, color=:default)
     print(io, "  Expression: ", t.orig_expr)
+    value, data = t.value, t.data
     if t.test_type === :test_throws_wrong
         # An exception was thrown, but it was of the wrong type
-        print(io, "\n    Expected: ", t.data)
-        print(io, "\n      Thrown: ", t.value)
+        if t.message_only
+            print(io, "\n    Expected: ", data)
+            print(io, "\n     Message: ", value)
+        else
+            print(io, "\n    Expected: ", data)
+            print(io, "\n      Thrown: ", value)
+        end
     elseif t.test_type === :test_throws_nothing
         # An exception was expected, but no exception was thrown
-        print(io, "\n    Expected: ", t.data)
+        print(io, "\n    Expected: ", data)
         print(io, "\n  No exception thrown")
-    elseif t.test_type === :test && t.data !== nothing
+    elseif t.test_type === :test && data !== nothing
         # The test was an expression, so display the term-by-term
         # evaluated version as well
-        print(io, "\n   Evaluated: ", t.data)
+        print(io, "\n   Evaluated: ", data)
     end
 end
 
@@ -167,13 +180,32 @@ struct Error <: Result
             bt = scrub_exc_stack(bt)
         end
         if test_type === :test_error || test_type === :nontest_error
-            bt_str = sprint(Base.show_exception_stack, bt; context=stdout)
+            bt_str = try # try the latest world for this, since we might have eval'd new code for show
+                    Base.invokelatest(sprint, Base.show_exception_stack, bt; context=stdout)
+                catch ex
+                    "#=ERROR showing exception stack=# " *
+                        try
+                            sprint(Base.showerror, ex, catch_backtrace(); context=stdout)
+                        catch
+                            "of type " * string(typeof(ex))
+                        end
+                end
         else
             bt_str = ""
         end
+        value = try # try the latest world for this, since we might have eval'd new code for show
+                Base.invokelatest(sprint, show, value, context = :limit => true)
+            catch ex
+                "#=ERROR showing error of type " * string(typeof(value)) * "=# " *
+                    try
+                        sprint(Base.showerror, ex, catch_backtrace(); context=stdout)
+                    catch
+                        "of type " * string(typeof(ex))
+                    end
+            end
         return new(test_type,
             string(orig_expr),
-            sprint(show, value, context = :limit => true),
+            value,
             bt_str,
             source)
     end
@@ -238,6 +270,7 @@ function Serialization.serialize(s::Serialization.AbstractSerializer, t::Pass)
     Serialization.serialize(s, t.data === nothing ? nothing : string(t.data))
     Serialization.serialize(s, string(t.value))
     Serialization.serialize(s, t.source === nothing ? nothing : t.source)
+    Serialization.serialize(s, t.message_only)
     nothing
 end
 
@@ -347,9 +380,10 @@ end
     @test ex broken=true
     @test ex skip=true
 
-Tests that the expression `ex` evaluates to `true`.
-Returns a `Pass` `Result` if it does, a `Fail` `Result` if it is
+Test that the expression `ex` evaluates to `true`.
+If executed inside a `@testset`, return a `Pass` `Result` if it does, a `Fail` `Result` if it is
 `false`, and an `Error` `Result` if it could not be evaluated.
+If executed outside a `@testset`, throw an exception instead of returning `Fail` or `Error`.
 
 # Examples
 ```jldoctest
@@ -656,6 +690,8 @@ end
 
 Tests that the expression `expr` throws `exception`.
 The exception may specify either a type,
+a string, regular expression, or list of strings occurring in the displayed error message,
+a matching function,
 or a value (which will be tested for equality by comparing fields).
 Note that `@test_throws` does not support a trailing keyword form.
 
@@ -670,7 +706,18 @@ julia> @test_throws DimensionMismatch [1, 2, 3] + [1, 2]
 Test Passed
   Expression: [1, 2, 3] + [1, 2]
       Thrown: DimensionMismatch
+
+julia> @test_throws "Try sqrt(Complex" sqrt(-1)
+Test Passed
+  Expression: sqrt(-1)
+     Message: "DomainError with -1.0:\\nsqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
 ```
+
+In the final example, instead of matching a single string it could alternatively have been performed with:
+
+- `["Try", "Complex"]` (a list of strings)
+- `r"Try sqrt\\([Cc]omplex"` (a regular expression)
+- `str -> occursin("complex", str)` (a matching function)
 """
 macro test_throws(extype, ex)
     orig_ex = Expr(:inert, ex)
@@ -696,6 +743,7 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
     if isa(result, Threw)
         # Check that the right type of exception was thrown
         success = false
+        message_only = false
         exc = result.exception
         # NB: Throwing LoadError from macroexpands is deprecated, but in order to limit
         # the breakage in package tests we add extra logic here.
@@ -706,12 +754,12 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
         if isa(extype, Type)
             success =
                 if from_macroexpand && extype == LoadError && exc isa Exception
-                    Base.depwarn("macroexpand no longer throw a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
+                    Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
                     true
                 else
                     isa(exc, extype)
                 end
-        else
+        elseif isa(extype, Exception) || !isa(exc, Exception)
             if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc)
                 extype = extype.error # deprecated
             end
@@ -724,11 +772,21 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
                     end
                 end
             end
+        else
+            message_only = true
+            exc = sprint(showerror, exc)
+            success = contains_warn(exc, extype)
+            exc = repr(exc)
+            if isa(extype, AbstractString)
+                extype = repr(extype)
+            elseif isa(extype, Function)
+                extype = "< match function >"
+            end
         end
         if success
-            testres = Pass(:test_throws, orig_expr, extype, exc, result.source)
+            testres = Pass(:test_throws, orig_expr, extype, exc, result.source, message_only)
         else
-            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, result.source)
+            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, result.source, message_only)
         end
     else
         testres = Fail(:test_throws_nothing, orig_expr, extype, nothing, result.source)
@@ -920,8 +978,11 @@ mutable struct DefaultTestSet <: AbstractTestSet
     n_passed::Int
     anynonpass::Bool
     verbose::Bool
+    showtiming::Bool
+    time_start::Float64
+    time_end::Union{Float64,Nothing}
 end
-DefaultTestSet(desc::AbstractString; verbose::Bool = false) = DefaultTestSet(String(desc)::String, [], 0, false, verbose)
+DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true) = DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing)
 
 # For a broken result, simply store the result
 record(ts::DefaultTestSet, t::Broken) = (push!(ts.results, t); t)
@@ -932,7 +993,7 @@ record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
 # but do not terminate. Print a backtrace.
 function record(ts::DefaultTestSet, t::Union{Fail, Error})
     if TESTSET_PRINT_ENABLE[]
-        printstyled(ts.description, ": ", color=:white)
+        print(ts.description, ": ")
         # don't print for interrupted tests
         if !(t isa Error) || t.test_type !== :test_interrupted
             print(t)
@@ -968,7 +1029,7 @@ end
 function print_test_results(ts::DefaultTestSet, depth_pad=0)
     # Calculate the overall number for each type so each of
     # the test result types are aligned
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = get_test_counts(ts)
+    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
     total_pass   = passes + c_passes
     total_fail   = fails  + c_fails
     total_error  = errors + c_errors
@@ -986,12 +1047,13 @@ function print_test_results(ts::DefaultTestSet, depth_pad=0)
     error_width  = dig_error  > 0 ? max(length("Error"),  dig_error)  : 0
     broken_width = dig_broken > 0 ? max(length("Broken"), dig_broken) : 0
     total_width  = dig_total  > 0 ? max(length("Total"),  dig_total)  : 0
+    duration_width = max(length("Time"), length(duration))
     # Calculate the alignment of the test result counts by
     # recursively walking the tree of test sets
     align = max(get_alignment(ts, 0), length("Test Summary:"))
     # Print the outer test set header once
     pad = total == 0 ? "" : " "
-    printstyled(rpad("Test Summary:", align, " "), " |", pad; bold=true, color=:white)
+    printstyled(rpad("Test Summary:", align, " "), " |", pad; bold=true)
     if pass_width > 0
         printstyled(lpad("Pass", pass_width, " "), "  "; bold=true, color=:green)
     end
@@ -1005,11 +1067,14 @@ function print_test_results(ts::DefaultTestSet, depth_pad=0)
         printstyled(lpad("Broken", broken_width, " "), "  "; bold=true, color=Base.warn_color())
     end
     if total_width > 0
-        printstyled(lpad("Total", total_width, " "); bold=true, color=Base.info_color())
+        printstyled(lpad("Total", total_width, " "), "  "; bold=true, color=Base.info_color())
+    end
+    if ts.showtiming
+        printstyled(lpad("Time", duration_width, " "); bold=true)
     end
     println()
     # Recursively print a summary at every level
-    print_counts(ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width)
+    print_counts(ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
 end
 
 
@@ -1018,6 +1083,7 @@ const TESTSET_PRINT_ENABLE = Ref(true)
 # Called at the end of a @testset, behaviour depends on whether
 # this is a child of another testset, or the "root" testset
 function finish(ts::DefaultTestSet)
+    ts.time_end = time()
     # If we are a nested test set, do not print a full summary
     # now - let the parent test set do the printing
     if get_testset_depth() != 0
@@ -1026,7 +1092,7 @@ function finish(ts::DefaultTestSet)
         record(parent_ts, ts)
         return ts
     end
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = get_test_counts(ts)
+    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
     total_pass   = passes + c_passes
     total_fail   = fails  + c_fails
     total_error  = errors + c_errors
@@ -1056,8 +1122,8 @@ end
 function get_alignment(ts::DefaultTestSet, depth::Int)
     # The minimum width at this depth is
     ts_width = 2*depth + length(ts.description)
-    # If all passing, no need to look at children
-    !ts.anynonpass && return ts_width
+    # If not verbose and all passing, no need to look at children
+    !ts.verbose && !ts.anynonpass && return ts_width
     # Return the maximum of this width and the minimum width
     # for all children (if they exist)
     isempty(ts.results) && return ts_width
@@ -1090,7 +1156,7 @@ function get_test_counts(ts::DefaultTestSet)
         isa(t, Error)  && (errors += 1)
         isa(t, Broken) && (broken += 1)
         if isa(t, DefaultTestSet)
-            np, nf, ne, nb, ncp, ncf, nce , ncb = get_test_counts(t)
+            np, nf, ne, nb, ncp, ncf, nce , ncb, duration = get_test_counts(t)
             c_passes += np + ncp
             c_fails  += nf + ncf
             c_errors += ne + nce
@@ -1098,16 +1164,28 @@ function get_test_counts(ts::DefaultTestSet)
         end
     end
     ts.anynonpass = (fails + errors + c_fails + c_errors > 0)
-    return passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken
+    duration = if isnothing(ts.time_end)
+        ""
+    else
+        dur_s = ts.time_end - ts.time_start
+        if dur_s < 60
+            string(round(dur_s, digits = 1), "s")
+        else
+            m, s = divrem(dur_s, 60)
+            s = lpad(string(round(s, digits = 1)), 4, "0")
+            string(round(Int, m), "m", s, "s")
+        end
+    end
+    return passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration
 end
 
 # Recursive function that prints out the results at each level of
 # the tree of test sets
 function print_counts(ts::DefaultTestSet, depth, align,
-                      pass_width, fail_width, error_width, broken_width, total_width)
+                      pass_width, fail_width, error_width, broken_width, total_width, duration_width, showtiming)
     # Count results by each type at this level, and recursively
     # through any child test sets
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = get_test_counts(ts)
+    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
     subtotal = passes + fails + errors + broken + c_passes + c_fails + c_errors + c_broken
     # Print test set header, with an alignment that ensures all
     # the test results appear above each other
@@ -1146,9 +1224,13 @@ function print_counts(ts::DefaultTestSet, depth, align,
     end
 
     if np == 0 && nf == 0 && ne == 0 && nb == 0
-        printstyled("No tests", color=Base.info_color())
+        printstyled(lpad("None", total_width, " "), "  ", color=Base.info_color())
     else
-        printstyled(lpad(string(subtotal), total_width, " "), color=Base.info_color())
+        printstyled(lpad(string(subtotal), total_width, " "), "  ", color=Base.info_color())
+    end
+
+    if showtiming
+        printstyled(lpad(string(duration), duration_width, " "))
     end
     println()
 
@@ -1158,7 +1240,7 @@ function print_counts(ts::DefaultTestSet, depth, align,
         for t in ts.results
             if isa(t, DefaultTestSet)
                 print_counts(t, depth + 1, align,
-                    pass_width, fail_width, error_width, broken_width, total_width)
+                    pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
             end
         end
     end
@@ -1186,6 +1268,7 @@ end
     @testset [CustomTestSet] [option=val  ...] ["description"] begin ... end
     @testset [CustomTestSet] [option=val  ...] ["description \$v"] for v in (...) ... end
     @testset [CustomTestSet] [option=val  ...] ["description \$v, \$w"] for v in (...), w in (...) ... end
+    @testset [CustomTestSet] [option=val  ...] ["description \$v, \$w"] foo()
 
 Starts a new test set, or multiple test sets if a `for` loop is provided.
 
@@ -1197,11 +1280,15 @@ along with a summary of the test results.
 Any custom testset type (subtype of `AbstractTestSet`) can be given and it will
 also be used for any nested `@testset` invocations. The given options are only
 applied to the test set where they are given. The default test set type
-accepts the `verbose` boolean option: if `true`, the result summary of the
-nested testsets is shown even when they all pass (the default is `false`).
+accepts two boolean options:
+- `verbose`: if `true`, the result summary of the nested testsets is shown even
+when they all pass (the default is `false`).
+- `showtiming`: if `true`, the duration of each displayed testset is shown
+(the default is `true`).
 
 The description string accepts interpolation from the loop indices.
 If no description is provided, one is constructed based on the variables.
+If a function call is provided, its name will be used. Explicit description strings override this behavior.
 
 By default the `@testset` macro will return the testset object itself, though
 this behavior can be customized in other testset types. If a `for` loop is used
@@ -1218,7 +1305,7 @@ re-arrangements of `@testset`s regardless of their side-effect on the
 global RNG state.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"trigonometric identities |    4      4  [0-9\\.]+s"
 julia> @testset "trigonometric identities" begin
            θ = 2/3*π
            @test sin(-θ) ≈ -sin(θ)
@@ -1226,8 +1313,8 @@ julia> @testset "trigonometric identities" begin
            @test sin(2θ) ≈ 2*sin(θ)*cos(θ)
            @test cos(2θ) ≈ cos(θ)^2 - sin(θ)^2
        end;
-Test Summary:            | Pass  Total
-trigonometric identities |    4      4
+Test Summary:            | Pass  Total  Time
+trigonometric identities |    4      4  0.2s
 ```
 """
 macro testset(args...)
@@ -1236,24 +1323,29 @@ macro testset(args...)
     tests = args[end]
 
     # Determine if a single block or for-loop style
-    if !isa(tests,Expr) || (tests.head !== :for && tests.head !== :block)
-        error("Expected begin/end block or for loop as argument to @testset")
+    if !isa(tests,Expr) || (tests.head !== :for && tests.head !== :block && tests.head != :call)
+
+        error("Expected function call, begin/end block or for loop as argument to @testset")
     end
 
     if tests.head === :for
         return testset_forloop(args, tests, __source__)
     else
-        return testset_beginend(args, tests, __source__)
+        return testset_beginend_call(args, tests, __source__)
     end
 end
 
 """
-Generate the code for a `@testset` with a `begin`/`end` argument
+Generate the code for a `@testset` with a function call or `begin`/`end` argument
 """
-function testset_beginend(args, tests, source)
+function testset_beginend_call(args, tests, source)
     desc, testsettype, options = parse_testset_args(args[1:end-1])
     if desc === nothing
-        desc = "test set"
+        if tests.head === :call
+            desc = string(tests.args[1]) # use the function name as test name
+        else
+            desc = "test set"
+        end
     end
     # If we're at the top level we'll default to DefaultTestSet. Otherwise
     # default to the type of the parent testset
@@ -1275,6 +1367,7 @@ function testset_beginend(args, tests, source)
         # by wrapping the body in a function
         local RNG = default_rng()
         local oldrng = copy(RNG)
+        local oldseed = Random.GLOBAL_SEED
         try
             # RNG is re-seeded with its own seed to ease reproduce a failed test
             Random.seed!(Random.GLOBAL_SEED)
@@ -1288,6 +1381,7 @@ function testset_beginend(args, tests, source)
             record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
         finally
             copy!(RNG, oldrng)
+            Random.set_global_seed!(oldseed)
             pop_testset()
             ret = finish(ts)
         end
@@ -1368,6 +1462,7 @@ function testset_forloop(args, testloop, source)
         local ts
         local RNG = default_rng()
         local oldrng = copy(RNG)
+        local oldseed = Random.GLOBAL_SEED
         Random.seed!(Random.GLOBAL_SEED)
         local tmprng = copy(RNG)
         try
@@ -1381,6 +1476,7 @@ function testset_forloop(args, testloop, source)
                 push!(arr, finish(ts))
             end
             copy!(RNG, oldrng)
+            Random.set_global_seed!(oldseed)
         end
         arr
     end
@@ -1541,30 +1637,32 @@ function _inferred(ex, mod, allow = :(Union{}))
         ex = Expr(:call, GlobalRef(Test, :_materialize_broadcasted),
             farg, ex.args[2:end]...)
     end
-    Base.remove_linenums!(quote
-        let
-            allow = $(esc(allow))
-            allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
-            $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
-                # Has keywords
-                args = gensym()
-                kwargs = gensym()
-                quote
-                    $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
-                    inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
-                end
-            else
-                # No keywords
-                quote
-                    args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
-                    result = $(esc(ex.args[1]))(args...)
-                    inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
-                end
-            end)
-            @assert length(inftypes) == 1
-            rettype = result isa Type ? Type{result} : typeof(result)
-            rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
-            result
+    Base.remove_linenums!(let ex = ex;
+        quote
+            let
+                allow = $(esc(allow))
+                allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
+                $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
+                    # Has keywords
+                    args = gensym()
+                    kwargs = gensym()
+                    quote
+                        $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
+                        inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
+                    end
+                else
+                    # No keywords
+                    quote
+                        args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
+                        result = $(esc(ex.args[1]))(args...)
+                        inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
+                    end
+                end)
+                @assert length(inftypes) == 1
+                rettype = result isa Type ? Type{result} : typeof(result)
+                rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
+                result
+            end
         end
     end)
 end
diff --git a/stdlib/Test/test/nothrow_testset.jl b/stdlib/Test/test/nothrow_testset.jl
index 73811b9702346..5b5c775960a9a 100644
--- a/stdlib/Test/test/nothrow_testset.jl
+++ b/stdlib/Test/test/nothrow_testset.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 mutable struct NoThrowTestSet <: Test.AbstractTestSet
     results::Vector
     NoThrowTestSet(desc) = new([])
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 3c56ad8feba51..b2125f07e02cf 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -96,6 +96,16 @@ end
                    "Thrown: ErrorException")
     @test endswith(sprint(show, @test_throws ErrorException("test") error("test")),
                    "Thrown: ErrorException")
+    @test endswith(sprint(show, @test_throws "a test" error("a test")),
+                   "Message: \"a test\"")
+    @test occursin("Message: \"DomainError",
+                   sprint(show, @test_throws r"sqrt\([Cc]omplex" sqrt(-1)))
+    @test endswith(sprint(show, @test_throws str->occursin("a t", str) error("a test")),
+                   "Message: \"a test\"")
+    @test endswith(sprint(show, @test_throws ["BoundsError", "access", "1-element", "at index [2]"] [1][2]),
+                   "Message: \"BoundsError: attempt to access 1-element Vector{$Int} at index [2]\"")
+    @test_throws "\"" throw("\"")
+    @test_throws Returns(false) throw(Returns(false))
 end
 # Test printing of Fail results
 include("nothrow_testset.jl")
@@ -148,6 +158,11 @@ let fails = @testset NoThrowTestSet begin
         @test contains(str1, str2)
         # 22 - Fail - Type Comparison
         @test typeof(1) <: typeof("julia")
+        # 23 - 26 - Fail - wrong message
+        @test_throws "A test" error("a test")
+        @test_throws r"sqrt\([Cc]omplx" sqrt(-1)
+        @test_throws str->occursin("a T", str) error("a test")
+        @test_throws ["BoundsError", "acess", "1-element", "at index [2]"] [1][2]
     end
     for fail in fails
         @test fail isa Test.Fail
@@ -262,13 +277,40 @@ let fails = @testset NoThrowTestSet begin
         @test occursin("Expression: typeof(1) <: typeof(\"julia\")", str)
         @test occursin("Evaluated: $(typeof(1)) <: $(typeof("julia"))", str)
     end
+
+    let str = sprint(show, fails[23])
+        @test occursin("Expected: \"A test\"", str)
+        @test occursin("Message: \"a test\"", str)
+    end
+
+    let str = sprint(show, fails[24])
+        @test occursin("Expected: r\"sqrt\\([Cc]omplx\"", str)
+        @test occursin(r"Message: .*Try sqrt\(Complex", str)
+    end
+
+    let str = sprint(show, fails[25])
+        @test occursin("Expected: < match function >", str)
+        @test occursin("Message: \"a test\"", str)
+    end
+
+    let str = sprint(show, fails[26])
+        @test occursin("Expected: [\"BoundsError\", \"acess\", \"1-element\", \"at index [2]\"]", str)
+        @test occursin(r"Message: \"BoundsError.* 1-element.*at index \[2\]", str)
+    end
+
 end
 
+struct BadError <: Exception end
+Base.show(io::IO, ::BadError) = throw("I am a bad error")
 let errors = @testset NoThrowTestSet begin
         # 1 - Error - unexpected pass
         @test_broken true
         # 2 - Error - converting a call into a comparison
         @test ==(1, 1:2...)
+        # 3 - Error - objects with broken show
+        @test throw(BadError())
+        @test BadError()
+        throw(BadError())
     end
 
     for err in errors
@@ -284,6 +326,18 @@ let errors = @testset NoThrowTestSet begin
         @test occursin("Expression: ==(1, 1:2...)", str)
         @test occursin("MethodError: no method matching ==(::$Int, ::$Int, ::$Int)", str)
     end
+
+    let str = sprint(show, errors[3])
+        @test occursin("Expression: throw(BadError())\n  #=ERROR showing exception stack=# \"I am a bad error\"\n  Stacktrace:\n", str)
+    end
+
+    let str = sprint(show, errors[4])
+        @test occursin("Expression: BadError()\n       Value: #=ERROR showing error of type $BadError=# \"I am a bad error\"\nStacktrace:\n", str)
+    end
+
+    let str = sprint(show, errors[5])
+        @test occursin("Got exception outside of a @test\n  #=ERROR showing exception stack=# \"I am a bad error\"\n  Stacktrace:\n", str)
+    end
 end
 
 let retval_tests = @testset NoThrowTestSet begin
@@ -706,13 +760,13 @@ let msg = read(pipeline(ignorestatus(`$(Base.julia_cmd()) --startup-file=no --co
                 @test foo(fill(1., 4)) == 15
             end
         end'`), stderr=devnull), String)
-    @test occursin("""
-        Test Summary: | Pass  Fail  Total
-        Foo Tests     |    2     2      4
-          Animals     |    1     1      2
-            Felines   |    1            1
-            Canines   |          1      1
-          Arrays      |    1     1      2
+    @test occursin(r"""
+        Test Summary: | Pass  Fail  Total  Time
+        Foo Tests     |    2     2      4  \s*\d*.\ds
+          Animals     |    1     1      2  \s*\d*.\ds
+            Felines   |    1            1  \s*\d*.\ds
+            Canines   |          1      1  \s*\d*.\ds
+          Arrays      |    1     1      2  \s*\d*.\ds
         """, msg)
 end
 
@@ -869,6 +923,29 @@ end
     Random.seed!(seed)
     @test a == rand()
     @test b == rand()
+
+    # Even when seed!() is called within a testset A, subsequent testsets
+    # should start with the same "global RNG state" as what A started with,
+    # such that the test `refvalue == rand(Int)` below succeeds.
+    # Currently, this means that Random.GLOBAL_SEED has to be restored,
+    # in addition to the state of Random.default_rng().
+    GLOBAL_SEED_orig = Random.GLOBAL_SEED
+    local refvalue
+    @testset "GLOBAL_SEED is also preserved (setup)" begin
+        @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+        refvalue = rand(Int)
+        Random.seed!()
+        @test GLOBAL_SEED_orig != Random.GLOBAL_SEED
+    end
+    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+    @testset "GLOBAL_SEED is also preserved (forloop)" for _=1:3
+        @test refvalue == rand(Int)
+        Random.seed!()
+    end
+    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+    @testset "GLOBAL_SEED is also preserved (beginend)" begin
+        @test refvalue == rand(Int)
+    end
 end
 
 @testset "InterruptExceptions #21043" begin
@@ -1001,18 +1078,18 @@ let ex = :(something_complex + [1, 2, 3])
 end
 
 @testset "verbose option" begin
-    expected = """
-    Test Summary: | Pass  Total
-    Parent        |    9      9
-      Child 1     |    3      3
-        Child 1.1 |    1      1
-        Child 1.2 |    1      1
-        Child 1.3 |    1      1
-      Child 2     |    3      3
-      Child 3     |    3      3
-        Child 3.1 |    1      1
-        Child 3.2 |    1      1
-        Child 3.3 |    1      1
+    expected = r"""
+    Test Summary:             | Pass  Total  Time
+    Parent                    |    9      9  \s*\d*.\ds
+      Child 1                 |    3      3  \s*\d*.\ds
+        Child 1.1 (long name) |    1      1  \s*\d*.\ds
+        Child 1.2             |    1      1  \s*\d*.\ds
+        Child 1.3             |    1      1  \s*\d*.\ds
+      Child 2                 |    3      3  \s*\d*.\ds
+      Child 3                 |    3      3  \s*\d*.\ds
+        Child 3.1             |    1      1  \s*\d*.\ds
+        Child 3.2             |    1      1  \s*\d*.\ds
+        Child 3.3             |    1      1  \s*\d*.\ds
     """
 
     mktemp() do f, _
@@ -1022,7 +1099,7 @@ end
 
         @testset "Parent" verbose = true begin
             @testset "Child 1" verbose = true begin
-                @testset "Child 1.1" begin
+                @testset "Child 1.1 (long name)" begin
                     @test 1 == 1
                 end
 
@@ -1202,4 +1279,27 @@ Test.finish(ts::PassInformationTestSet) = ts
     @test ts.results[2].data == ErrorException
     @test ts.results[2].value == ErrorException("Msg")
     @test ts.results[2].source == LineNumberNode(test_throws_line_number, @__FILE__)
-end
\ No newline at end of file
+end
+
+let
+    f(x) = @test isone(x)
+    function h(x)
+        @testset f(x)
+        @testset "success" begin @test true end
+        @testset for i in 1:3
+            @test !iszero(i)
+        end
+    end
+    tret = @testset h(1)
+    tdesc = @testset "description" h(1)
+    @testset "Function calls" begin
+        @test tret.description == "h"
+        @test tdesc.description == "description"
+        @test length(tret.results) == 5
+        @test tret.results[1].description == "f"
+        @test tret.results[2].description == "success"
+        for i in 1:3
+            @test tret.results[2+i].description == "i = $i"
+        end
+    end
+end
diff --git a/stdlib/Unicode/docs/src/index.md b/stdlib/Unicode/docs/src/index.md
index aba9d80c3e8b5..2771c8a9f01cc 100644
--- a/stdlib/Unicode/docs/src/index.md
+++ b/stdlib/Unicode/docs/src/index.md
@@ -1,7 +1,9 @@
 # Unicode
 
 ```@docs
+Unicode.julia_chartransform
 Unicode.isassigned
+Unicode.isequal_normalized
 Unicode.normalize
 Unicode.graphemes
 ```
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
index 7ac3a9f9b1d4b..e31f7ee1e27f2 100644
--- a/stdlib/Unicode/src/Unicode.jl
+++ b/stdlib/Unicode/src/Unicode.jl
@@ -2,7 +2,51 @@
 
 module Unicode
 
-export graphemes
+export graphemes, isequal_normalized
+
+"""
+    Unicode.julia_chartransform(c::Union{Char,Integer})
+
+Map the Unicode character (`Char`) or codepoint (`Integer`) `c` to the corresponding
+"equivalent" character or codepoint, respectively, according to the custom equivalence
+used within the Julia parser (in addition to NFC normalization).
+
+For example, `'µ'` (U+00B5 micro) is treated as equivalent to `'μ'` (U+03BC mu) by
+Julia's parser, so `julia_chartransform` performs this transformation while leaving
+other characters unchanged:
+```jldoctest
+julia> Unicode.julia_chartransform('\u00B5')
+'μ': Unicode U+03BC (category Ll: Letter, lowercase)
+
+julia> Unicode.julia_chartransform('x')
+'x': ASCII/Unicode U+0078 (category Ll: Letter, lowercase)
+```
+
+`julia_chartransform` is mainly useful for passing to the [`Unicode.normalize`](@ref)
+function in order to mimic the normalization used by the Julia parser:
+```jldoctest
+julia> s = "\u00B5o\u0308"
+"µö"
+
+julia> s2 = Unicode.normalize(s, compose=true, stable=true, chartransform=Unicode.julia_chartransform)
+"μö"
+
+julia> collect(s2)
+2-element Vector{Char}:
+ 'μ': Unicode U+03BC (category Ll: Letter, lowercase)
+ 'ö': Unicode U+00F6 (category Ll: Letter, lowercase)
+
+julia> s2 == string(Meta.parse(s))
+true
+```
+
+!!! compat "Julia 1.8"
+    This function was introduced in Julia 1.8.
+"""
+function julia_chartransform end
+julia_chartransform(codepoint::UInt32) = get(Base.Unicode._julia_charmap, codepoint, codepoint)
+julia_chartransform(codepoint::Integer) = julia_chartransform(UInt32(codepoint))
+julia_chartransform(char::Char) = Char(julia_chartransform(UInt32(char)))
 
 """
     Unicode.normalize(s::AbstractString; keywords...)
@@ -42,6 +86,13 @@ options (which all default to `false` except for `compose`) are specified:
 * `rejectna=true`: throw an error if unassigned code points are found
 * `stable=true`: enforce Unicode versioning stability (never introduce characters missing from earlier Unicode versions)
 
+You can also use the `chartransform` keyword (which defaults to `identity`) to pass an arbitrary
+*function* mapping `Integer` codepoints to codepoints, which is is called on each
+character in `s` as it is processed, in order to perform arbitrary additional normalizations.
+For example, by passing `chartransform=Unicode.julia_chartransform`, you can apply a few Julia-specific
+character normalizations that are performed by Julia when parsing identifiers (in addition to
+NFC normalization: `compose=true, stable=true`).
+
 For example, NFKC corresponds to the options `compose=true, compat=true, stable=true`.
 
 # Examples
@@ -58,6 +109,9 @@ julia> Unicode.normalize("JuLiA", casefold=true)
 julia> Unicode.normalize("JúLiA", stripmark=true)
 "JuLiA"
 ```
+
+!!! compat "Julia 1.8"
+    The `chartransform` keyword argument requires Julia 1.8.
 """
 function normalize end
 normalize(s::AbstractString, nf::Symbol) = Base.Unicode.normalize(s, nf)
@@ -89,4 +143,78 @@ letter combined with an accent mark is a single grapheme.)
 """
 graphemes(s::AbstractString) = Base.Unicode.GraphemeIterator{typeof(s)}(s)
 
+using Base.Unicode: utf8proc_error, UTF8PROC_DECOMPOSE, UTF8PROC_CASEFOLD, UTF8PROC_STRIPMARK
+
+function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, options::Integer)
+    ret = @ccall utf8proc_decompose_char(codepoint::UInt32, dest::Ptr{UInt32}, length(dest)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
+    ret < 0 && utf8proc_error(ret)
+    return ret
+end
+
+"""
+    isequal_normalized(s1::AbstractString, s2::AbstractString; casefold=false, stripmark=false, chartransform=identity)
+
+Return whether `s1` and `s2` are canonically equivalent Unicode strings.   If `casefold=true`,
+ignores case (performs Unicode case-folding); if `stripmark=true`, strips diacritical marks
+and other combining characters.
+
+As with [`Unicode.normalize`](@ref), you can also pass an arbitrary
+function via the `chartransform` keyword (mapping `Integer` codepoints to codepoints)
+to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref).
+
+# Examples
+
+For example, the string `"noël"` can be constructed in two canonically equivalent ways
+in Unicode, depending on whether `"ë"` is formed from a single codepoint U+00EB or
+from the ASCII character `'o'` followed by the U+0308 combining-diaeresis character.
+
+```jldoctest
+julia> s1 = "no\u00EBl"
+"noël"
+
+julia> s2 = "noe\u0308l"
+"noël"
+
+julia> s1 == s2
+false
+
+julia> isequal_normalized(s1, s2)
+true
+
+julia> isequal_normalized(s1, "noel", stripmark=true)
+true
+
+julia> isequal_normalized(s1, "NOËL", casefold=true)
+true
+```
+"""
+function isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity)
+    function decompose_next_char!(c, state, d, options, s)
+        n = _decompose_char!(c, d, options)
+        if n > length(d) # may be possible in future Unicode versions?
+            n = _decompose_char!(c, resize!(d, n), options)
+        end
+        return 1, n, iterate(s, state)
+    end
+    options = UTF8PROC_DECOMPOSE
+    casefold && (options |= UTF8PROC_CASEFOLD)
+    stripmark && (options |= UTF8PROC_STRIPMARK)
+    i1,i2 = iterate(s1),iterate(s2)
+    d1,d2 = Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4) # codepoint buffers
+    n1 = n2 = 0 # lengths of codepoint buffers
+    j1 = j2 = 1 # indices in d1, d2
+    while true
+        if j1 > n1
+            i1 === nothing && return i2 === nothing && j2 > n2
+            j1, n1, i1 = decompose_next_char!(chartransform(UInt32(i1[1])), i1[2], d1, options, s1)
+        end
+        if j2 > n2
+            i2 === nothing && return false
+            j2, n2, i2 = decompose_next_char!(chartransform(UInt32(i2[1])), i2[2], d2, options, s2)
+        end
+        d1[j1] == d2[j2] || return false
+        j1 += 1; j2 += 1
+    end
+end
+
 end
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
index 6888fa2d9ba40..a4faac2bd3ba9 100644
--- a/stdlib/Unicode/test/runtests.jl
+++ b/stdlib/Unicode/test/runtests.jl
@@ -2,7 +2,7 @@
 
 using Test
 using Unicode
-using Unicode: normalize, isassigned
+using Unicode: normalize, isassigned, julia_chartransform
 
 @testset "string normalization" begin
     # normalize (Unicode normalization etc.):
@@ -25,6 +25,11 @@ using Unicode: normalize, isassigned
     @test normalize("\t\r", stripcc=true) == "  "
     @test normalize("\t\r", stripcc=true, newline2ls=true) == " \u2028"
     @test normalize("\u0072\u0307\u0323", :NFC) == "\u1E5B\u0307" #26917
+
+    # julia_chartransform identifier normalization
+    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212", chartransform=julia_chartransform) ==
+        "julia\u03B5\u03BC\u22C5\u22C5\u002D"
+    @test julia_chartransform('\u00B5') === '\u03BC'
 end
 
 @testset "unicode sa#15" begin
@@ -417,3 +422,16 @@ end
     @test prod(["*" for i in 1:3]) == "***"
     @test prod(["*" for i in 1:0]) == ""
 end
+
+@testset "Unicode equivalence" begin
+    @test isequal_normalized("no\u00EBl", "noe\u0308l")
+    @test !isequal_normalized("no\u00EBl", "noe\u0308l ")
+    @test isequal_normalized("", "")
+    @test !isequal_normalized("", " ")
+    @test !isequal_normalized("no\u00EBl", "NOËL")
+    @test isequal_normalized("no\u00EBl", "NOËL", casefold=true)
+    @test !isequal_normalized("no\u00EBl", "noel")
+    @test isequal_normalized("no\u00EBl", "noel", stripmark=true)
+    @test isequal_normalized("no\u00EBl", "NOEL", stripmark=true, casefold=true)
+    @test isequal_normalized("\u00B5\u0302m", "\u03BC\u0302m", chartransform=julia_chartransform)
+end
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index 76818b73e9375..fc28a9eb7d99c 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "12.0.0+1"
+version = "12.0.1+4"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/test/Makefile b/test/Makefile
index 3d16f88a741b8..24e137a5b1492 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -7,7 +7,7 @@ STDLIBDIR := $(build_datarootdir)/julia/stdlib/$(VERSDIR)
 # TODO: this Makefile ignores BUILDDIR, except for computing JULIA_EXECUTABLE
 
 TESTGROUPS = unicode strings compiler
-TESTS = all stdlib $(TESTGROUPS) \
+TESTS = all default stdlib $(TESTGROUPS) \
 		$(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \
 		$(filter-out runtests testdefs, \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/*.jl))) \
@@ -19,7 +19,7 @@ EMBEDDING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/embedding" "CC=$(CC
 
 GCEXT_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/gcext" "CC=$(CC)"
 
-default: all
+default:
 
 $(TESTS):
 	@cd $(SRCDIR) && \
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index c3537aac4e887..7a8b9cd8b7d5f 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -514,6 +514,9 @@ function test_primitives(::Type{T}, shape, ::Type{TestAbstractArray}) where T
     @test convert(Matrix, Y) == Y
     @test convert(Matrix, view(Y, 1:2, 1:2)) == Y
     @test_throws MethodError convert(Matrix, X)
+
+    # convert(::Type{Union{}}, A::AbstractMatrix)
+    @test_throws MethodError convert(Union{}, X)
 end
 
 mutable struct TestThrowNoGetindex{T} <: AbstractVector{T} end
@@ -1433,7 +1436,8 @@ using Base: typed_hvncat
             v1 == v2 == 1 && continue
             for v3 ∈ ((), (1,), ([1],), (1, [1]), ([1], 1), ([1], [1]))
                 @test_throws ArgumentError hvncat((v1, v2), true, v3...)
-                @test_throws ArgumentError hvncat(((v1,), (v2,)), true, v3...)
+                @test_throws str->(occursin("`shape` argument must consist of positive integers", str) ||
+                                   occursin("reducing over an empty collection is not allowed", str)) hvncat(((v1,), (v2,)), true, v3...)
             end
         end
     end
@@ -1570,3 +1574,6 @@ end
     r = Base.IdentityUnitRange(3:4)
     @test reshape(r, :) === reshape(r, (:,)) === r
 end
+
+@test haskey([1, 2, 3], 1)
+@test !haskey([1, 2, 3], 4)
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index 265d97776c053..b329082022206 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -66,7 +66,7 @@ end
 ## Other ways of accessing functions
 # Test that non-ambiguous cases work
 let io = IOBuffer()
-    @test @test_logs precompile(ambig, (Int, Int))
+    @test precompile(ambig, (Int, Int))
     cf = @eval @cfunction(ambig, Int, (Int, Int))
     @test ccall(cf, Int, (Int, Int), 1, 2) == 4
     @test length(code_lowered(ambig, (Int, Int))) == 1
@@ -75,7 +75,7 @@ end
 
 # Test that ambiguous cases fail appropriately
 let io = IOBuffer()
-    @test @test_logs (:warn,) precompile(ambig, (UInt8, Int))
+    @test !precompile(ambig, (UInt8, Int))
     cf = @eval @cfunction(ambig, Int, (UInt8, Int))  # test for a crash (doesn't throw an error)
     @test_throws(MethodError(ambig, (UInt8(1), Int(2)), get_world_counter()),
                  ccall(cf, Int, (UInt8, Int), 1, 2))
@@ -386,4 +386,12 @@ end
 @test_throws MethodError B12814{3, Float64}([1, 2, 3]) # ambiguous
 @test B12814{3,Float64}((1, 2, 3)).x === (1.0, 2.0, 3.0)
 
+# issue #43040
+module M43040
+   struct C end
+   stripType(::Type{C}) where {T} = C # where {T} is intentionally incorrect
+end
+
+@test isempty(detect_ambiguities(M43040; recursive=true))
+
 nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index f2f79f4212524..3e20a92a9d990 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -567,6 +567,7 @@ end
     @test findlast(!iszero, a) == 8
     @test findlast(a.==0) == 5
     @test findlast(a.==5) == nothing
+    @test findlast(false) == nothing # test non-AbstractArray findlast
     @test findlast(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findlast(isodd, [2,4,6,3,9,2,0]) == 5
     @test findlast(isodd, [2,4,6,2,0]) == nothing
@@ -593,6 +594,10 @@ end
         @test findprev(b, T(1)) isa keytype(b)
         @test findprev(b, T(2)) isa keytype(b)
     end
+
+    @testset "issue 43078" begin
+        @test_throws TypeError findall([1])
+    end
 end
 @testset "find with Matrix" begin
     A = [1 2 0; 3 4 0]
@@ -748,6 +753,12 @@ end
     @test res === dst == [5 6 4; 2 3 1]
     res = circshift!(dst, src, (3.0, 2.0))
     @test res === dst == [5 6 4; 2 3 1]
+
+    # https://github.com/JuliaLang/julia/issues/41402
+    src = Float64[]
+    @test circshift(src, 1) == src
+    src = zeros(Bool, (4,0))
+    @test circshift(src, 1) == src
 end
 
 @testset "circcopy" begin
@@ -784,6 +795,10 @@ let A, B, C, D
 
     # With hash collisions
     @test map(x -> x.x, unique(map(HashCollision, B), dims=1)) == C
+
+    # With NaNs:
+    E = [1 NaN 3; 1 NaN 3; 1 NaN 3];
+    @test isequal(unique(E, dims=1), [1  NaN  3])
 end
 
 @testset "large matrices transpose" begin
@@ -1106,6 +1121,11 @@ end
     @test isequal(intersect([1,2,3], Float64[]), Float64[])
     @test isequal(intersect(Int64[], [1,2,3]), Int64[])
     @test isequal(intersect(Int64[]), Int64[])
+    @test isequal(intersect([1, 3], 1:typemax(Int)), [1, 3])
+    @test isequal(intersect(1:typemax(Int), [1, 3]), [1, 3])
+    @test isequal(intersect([1, 2, 3], 2:0.1:5), [2., 3.])
+    @test isequal(intersect([1.0, 2.0, 3.0], 2:5), [2., 3.])
+
     @test isequal(setdiff([1,2,3,4], [2,5,4]), [1,3])
     @test isequal(setdiff([1,2,3,4], [7,8,9]), [1,2,3,4])
     @test isequal(setdiff([1,2,3,4], Int64[]), Int64[1,2,3,4])
@@ -1446,6 +1466,26 @@ end
     @test isempty(eoa)
 end
 
+@testset "logical keepat!" begin
+    # Vector
+    a = Vector(1:10)
+    keepat!(a, [falses(5); trues(5)])
+    @test a == 6:10
+    @test_throws BoundsError keepat!(a, trues(1))
+    @test_throws BoundsError keepat!(a, trues(11))
+
+    # BitVector
+    ba = rand(10) .> 0.5
+    @test isa(ba, BitArray)
+    keepat!(ba, ba)
+    @test all(ba)
+
+    # empty array
+    ea = []
+    keepat!(ea, Bool[])
+    @test isempty(ea)
+end
+
 @testset "deleteat!" begin
     for idx in Any[1, 2, 5, 9, 10, 1:0, 2:1, 1:1, 2:2, 1:2, 2:4, 9:8, 10:9, 9:9, 10:10,
                    8:9, 9:10, 6:9, 7:10]
@@ -1545,6 +1585,12 @@ end
     @test reverse!(Any[]) == Any[]
 end
 
+@testset "reverseind" begin
+    @test reverseind([1, 2, 3], 2) == 2
+    @test reverseind([1, 2, 3], 0) == 4
+    @test reverseind([1, 2, 3], 3) == 1
+end
+
 @testset "reverse dim" begin
     @test isequal(reverse([2,3,1], dims=1), [1,3,2])
     @test_throws ArgumentError reverse([2,3,1], dims=2)
diff --git a/test/atomics.jl b/test/atomics.jl
index e4202b5ce1aea..c53471ed0da26 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -4,6 +4,8 @@ using Test, Base.Threads
 using Core: ConcurrencyViolationError
 import Base: copy
 
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
 mutable struct ARefxy{T}
     @atomic x::T
     y::T
@@ -86,17 +88,18 @@ Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int,
 
 @noinline function _test_field_operators(r)
     r = r[]
+    TT = fieldtype(typeof(r), :x)
     T = typeof(getfield(r, :x))
     @test getfield(r, :x, :sequentially_consistent) === T(123_10)
     @test setfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_1)
     @test getfield(r, :x, :sequentially_consistent) === T(123_1)
-    @test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), false)
-    @test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), true)
+    @test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), false))
+    @test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), true))
     @test getfield(r, :x, :sequentially_consistent) === T(123_30)
-    @test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === (T(123_30), false)
+    @test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_30), false))
     @test getfield(r, :x, :sequentially_consistent) === T(123_30)
-    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_30), T(123_31))
-    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_31), T(123_32))
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_30), T(123_31))
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_31), T(123_32))
     @test getfield(r, :x, :sequentially_consistent) === T(123_32)
     @test swapfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_32)
     @test getfield(r, :x, :sequentially_consistent) === T(123_1)
@@ -120,6 +123,7 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
 @noinline function _test_field_orderings(r, x, y)
     @nospecialize x y
     r = r[]
+    TT = fieldtype(typeof(r), :x)
 
     @test getfield(r, :x) === x
     @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :u)
@@ -184,7 +188,7 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test getfield(r, :y) === x
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :y, y, :u)
-    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :y, y, :unordered)
     @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :monotonic)
     @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :acquire)
     @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :release)
@@ -193,16 +197,16 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test swapfield!(r, :y, y, :not_atomic) === x
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :y, swap, y, :u)
-    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :y, swap, y, :unordered)
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :monotonic)
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :acquire)
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :release)
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :acquire_release)
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :sequentially_consistent)
-    @test modifyfield!(r, :y, swap, x, :not_atomic) === (y, x)
+    @test modifyfield!(r, :y, swap, x, :not_atomic) === Pair{TT,TT}(y, x)
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :u, :not_atomic)
-    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :unordered, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :monotonic, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :acquire, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :release, :not_atomic)
@@ -215,16 +219,16 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :acquire_release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :sequentially_consistent)
-    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === (x, true)
-    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === (y, x === y)
-    @test replacefield!(r, :y, y, y, :not_atomic) === (y, true)
-    @test replacefield!(r, :y, y, y) === (y, true)
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :y, y, y, :not_atomic) === ReplaceType{TT}((y, true))
+    @test replacefield!(r, :y, y, y) === ReplaceType{TT}((y, true))
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :x, x, :u)
     @test_throws ConcurrencyViolationError("swapfield!: atomic field cannot be written non-atomically") swapfield!(r, :x, x, :not_atomic)
     @test_throws ConcurrencyViolationError("swapfield!: atomic field cannot be written non-atomically") swapfield!(r, :x, x)
-    @test swapfield!(r, :x, x, :unordered) === y
-    @test swapfield!(r, :x, x, :monotonic) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :x, x, :unordered) === y
+    @test swapfield!(r, :x, x, :monotonic) === y
     @test swapfield!(r, :x, x, :acquire) === x
     @test swapfield!(r, :x, x, :release) === x
     @test swapfield!(r, :x, x, :acquire_release) === x
@@ -233,17 +237,17 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :x, swap, x, :u)
     @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x, :not_atomic)
     @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x)
-    @test modifyfield!(r, :x, swap, x, :unordered) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :monotonic) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :acquire) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :release) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :acquire_release) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :sequentially_consistent) === (x, x)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :x, swap, x, :unordered)
+    @test modifyfield!(r, :x, swap, x, :monotonic) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire_release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :sequentially_consistent) === Pair{TT,TT}(x, x)
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :u, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be written non-atomically") replacefield!(r, :x, x, x)
     @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be written non-atomically") replacefield!(r, :x, y, x, :not_atomic, :not_atomic)
-    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :unordered, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :monotonic, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :acquire, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :release, :not_atomic)
@@ -256,9 +260,9 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :acquire_release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :sequentially_consistent)
-    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === (x, true)
-    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === (y, x === y)
-    @test replacefield!(r, :x, y, x, :sequentially_consistent) === (y, true)
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :x, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
     nothing
 end
 @noinline function test_field_orderings(r, x, y)
@@ -288,21 +292,21 @@ end
 Base.convert(T::Type{<:UndefComplex}, S) = T()
 @noinline function _test_field_undef(r)
     r = r[]
-    T = fieldtype(typeof(r), :x)
-    x = convert(T, 12345_10)
+    TT = fieldtype(typeof(r), :x)
+    x = convert(TT, 12345_10)
     @test_throws UndefRefError getfield(r, :x)
     @test_throws UndefRefError getfield(r, :x, :sequentially_consistent)
     @test_throws UndefRefError modifyfield!(r, :x, add, 1, :sequentially_consistent)
-    @test_throws (T === Any ? UndefRefError : TypeError) replacefield!(r, :x, 1, 1.0, :sequentially_consistent)
+    @test_throws (TT === Any ? UndefRefError : TypeError) replacefield!(r, :x, 1, 1.0, :sequentially_consistent)
     @test_throws UndefRefError replacefield!(r, :x, 1, x, :sequentially_consistent)
     @test_throws UndefRefError getfield(r, :x, :sequentially_consistent)
     @test_throws UndefRefError swapfield!(r, :x, x, :sequentially_consistent)
     @test getfield(r, :x, :sequentially_consistent) === x === getfield(r, :x)
     nothing
 end
-@noinline function test_field_undef(T)
-    _test_field_undef(Ref(T()))
-    _test_field_undef(Ref{Any}(T()))
+@noinline function test_field_undef(TT)
+    _test_field_undef(Ref(TT()))
+    _test_field_undef(Ref{Any}(TT()))
     nothing
 end
 test_field_undef(ARefxy{BigInt})
@@ -339,10 +343,10 @@ let a = ARefxy(1, -1)
     @test 12 === @atomic :monotonic a.x *= 3
 
     @test 12 === @atomic a.x
-    @test (12, 13) === @atomic a.x + 1
-    @test (13, 15) === @atomic :monotonic a.x + 2
-    @test (15, 19) === @atomic a.x max 19
-    @test (19, 20) === @atomic :monotonic a.x max 20
+    @test (12 => 13) === @atomic a.x + 1
+    @test (13 => 15) === @atomic :monotonic a.x + 2
+    @test (15 => 19) === @atomic a.x max 19
+    @test (19 => 20) === @atomic :monotonic a.x max 20
     @test_throws ConcurrencyViolationError @atomic :not_atomic a.x + 1
     @test_throws ConcurrencyViolationError @atomic :not_atomic a.x max 30
 
@@ -352,17 +356,17 @@ let a = ARefxy(1, -1)
     @test_throws ConcurrencyViolationError @atomicswap :not_atomic a.x = 1
 
     @test 2 === @atomic a.x
-    @test (2, true) === @atomicreplace a.x 2 => 1
-    @test (1, false) === @atomicreplace :monotonic a.x 2 => 1
-    @test (1, false) === @atomicreplace :monotonic :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((2, true)) === @atomicreplace a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic :monotonic a.x 2 => 1
     @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x 1 => 2
     @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x 1 => 2
 
     @test 1 === @atomic a.x
     xchg = 1 => 2
-    @test (1, true) === @atomicreplace a.x xchg
-    @test (2, false) === @atomicreplace :monotonic a.x xchg
-    @test (2, false) === @atomicreplace :acquire_release :monotonic a.x xchg
+    @test ReplaceType{Int}((1, true)) === @atomicreplace a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :monotonic a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :acquire_release :monotonic a.x xchg
     @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x xchg
     @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
 end
diff --git a/test/backtrace.jl b/test/backtrace.jl
index 8917c8698c9c4..3aebfec410f34 100644
--- a/test/backtrace.jl
+++ b/test/backtrace.jl
@@ -258,3 +258,88 @@ let code = """
     @test occursin("InterpreterIP in top-level CodeInfo for Main.A", bt_str)
 end
 
+"""
+    _reformat_sp(bt_data...) -> sp::Vector{Ptr{Cvoid}}
+
+Convert the output `bt_data` of `jl_backtrace_from_here` with `returnsp` flag set to a
+vector of valid stack pointers `sp`; i.e., `sp` is a subset of `bt_data[3]`.
+
+See also `Base._reformat_bt`.
+"""
+function _reformat_sp(
+    bt_raw::Array{Ptr{Cvoid},1},
+    bt2::Array{Any,1},
+    sp_raw::Array{Ptr{Cvoid},1},
+)
+    bt = Base._reformat_bt(bt_raw, bt2)
+    sp = empty!(similar(sp_raw))
+    i = j = 0
+    while true
+        # Advance `i` such that `bt[i] isa Ptr{Cvoid}` (native pointer).
+        local ip
+        while true
+            if i == lastindex(bt)
+                return sp
+            end
+            i += 1
+            x = bt[i]
+            if x isa Ptr{Cvoid}
+                ip = x
+                break
+            end
+        end
+        # Advance `j` such that `bt_raw[j] == bt[i]` to find a valid stack pointer.
+        while true
+            if j == lastindex(bt_raw)
+                return sp
+            end
+            j += 1
+            if bt_raw[j] == ip
+                push!(sp, sp_raw[j])
+                break
+            end
+        end
+    end
+end
+
+"""
+    withframeaddress(f)
+
+Call function `f` with an address `ptr::Ptr{Cvoid}` of an independent frame
+immediately outer to `f`.
+"""
+withframeaddress
+@eval @noinline function withframeaddress(f)
+    sp = Core.Intrinsics.llvmcall(
+        ($"""
+        declare i8* @llvm.frameaddress(i32)
+        define private i$(Sys.WORD_SIZE) @frameaddr() {
+            %1 = call i8* @llvm.frameaddress(i32 0)
+            %2 = ptrtoint i8* %1 to i$(Sys.WORD_SIZE)
+            ret i$(Sys.WORD_SIZE) %2
+        }""", "frameaddr"),
+        UInt,
+        Tuple{},
+    )
+    @noinline f(Ptr{Cvoid}(sp))
+end
+
+function sandwiched_backtrace()
+    local ptr1, ptr2, bt
+    withframeaddress() do p1
+        ptr1 = p1
+        bt = ccall(:jl_backtrace_from_here, Ref{Base.SimpleVector}, (Cint, Cint), true, 0)
+        withframeaddress() do p2
+            ptr2 = p2
+        end
+    end
+    return ptr1, ptr2, bt
+end
+
+@testset "stack pointers" begin
+    ptr1, ptr2, bt_data = sandwiched_backtrace()
+    sp = _reformat_sp(bt_data...)
+    @test ptr2 < sp[2]
+    @test sp[1] < ptr1
+    @test all(diff(Int128.(UInt.(sp))) .> 0)
+end
diff --git a/test/bitarray.jl b/test/bitarray.jl
index b565252664876..75a6389815336 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -1711,3 +1711,60 @@ end
     @test typeof([a a ;;; a a]) <: BitArray
     @test typeof([a a ;;; [a a]]) <: BitArray
 end
+
+@testset "deleteat! additional tests" begin
+    for v in ([1, 2, 3], [true, true, true], trues(3))
+        @test_throws BoundsError deleteat!(v, true:true)
+    end
+
+    for v in ([1], [true], trues(1))
+        @test length(deleteat!(v, false:false)) == 1
+        @test isempty(deleteat!(v, true:true))
+    end
+
+    x = trues(3)
+    x[3] = false
+    @test deleteat!(x, [UInt8(2)]) == [true, false]
+    @test_throws ArgumentError deleteat!(x, Any[true])
+    @test_throws ArgumentError deleteat!(x, Any[1, true])
+    @test_throws ArgumentError deleteat!(x, Any[2, 1])
+    @test_throws BoundsError deleteat!(x, Any[4])
+    @test_throws BoundsError deleteat!(x, Any[2, 4])
+
+    function test_equivalence(n::Int)
+        x1 = rand(Bool, n)
+        x2 = BitVector(x1)
+        inds1 = rand(Bool, n)
+        inds2 = BitVector(inds1)
+        return deleteat!(copy(x1), findall(inds1)) ==
+               deleteat!(copy(x1), inds1) ==
+               deleteat!(copy(x2), inds1) ==
+               deleteat!(copy(x1), inds2) ==
+               deleteat!(copy(x2), inds2)
+    end
+
+    Random.seed!(1234)
+    for n in 1:20, _ in 1:100
+        @test test_equivalence(n)
+    end
+end
+
+@testset "fill! for BitArray with contiguous view (#42795)" begin
+    # change values in range `rangein`, `rangeout` should stay unchanged
+    for (rangein, rangeout) in ((1:5, 6:10), (5:10, 1:4))
+        bitvector = trues(10)
+        bitarray  = trues(10, 10)
+        viewvector = view(bitvector, rangein)
+        viewarray  = view(bitarray, rangein, rangein)
+        @test which(fill!, (typeof(viewvector), Bool)).sig == Tuple{typeof(fill!), SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}}}, Any}
+        @test which(fill!, (typeof(viewarray), Bool)).sig == Tuple{typeof(fill!), SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}, Vararg{Union{Int,AbstractUnitRange{Int}}}}}, Any}
+        fill!(viewvector, false)
+        fill!(viewarray, false)
+        @test all(bitvector[rangein] .== false)
+        @test all(bitvector[rangeout] .== true)
+        @test all(bitarray[rangein, rangein] .== false)
+        @test all(bitarray[rangeout, rangeout] .== true)
+        @test all(bitarray[rangeout, rangein] .== true)
+        @test all(bitarray[rangein, rangeout] .== true)
+    end
+end
\ No newline at end of file
diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl
index e1a7029334a3d..735cd88c13758 100644
--- a/test/boundscheck_exec.jl
+++ b/test/boundscheck_exec.jl
@@ -259,4 +259,17 @@ if bc_opt == bc_default || bc_opt == bc_off
     @test !occursin("arrayref(true", typed_40281)
 end
 
+@testset "pass inbounds meta to getindex on CartesianIndices (#42115)" begin
+    @inline getindex_42115(r, i, j) = @inbounds getindex(r, i, j)
+
+    R = CartesianIndices((5, 5))
+    if bc_opt == bc_on
+        @test_throws BoundsError getindex_42115(R, -1, -1)
+        @test_throws BoundsError getindex_42115(R, 1, -1)
+    else
+        @test getindex_42115(R, -1, -1) == CartesianIndex(-1, -1)
+        @test getindex_42115(R, 1, -1) == CartesianIndex(1, -1)
+    end
+end
+
 end
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 66c215aee9293..b3899ddb063df 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -991,10 +991,6 @@ end
     @test Core.Compiler.return_type(broadcast, Tuple{typeof(+), Vector{Int},
                                                      Vector{Union{Float64, Missing}}}) ==
         Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
-    @test isequal([1, 2] + [3.0, missing], [4.0, missing])
-    @test Core.Compiler.return_type(+, Tuple{Vector{Int},
-                                             Vector{Union{Float64, Missing}}}) ==
-        Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
     @test Core.Compiler.return_type(+, Tuple{Vector{Int},
                                              Vector{Union{Float64, Missing}}}) ==
         Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
@@ -1015,6 +1011,8 @@ end
     @test typeof.([iszero, iszero]) == [typeof(iszero), typeof(iszero)]
     @test isequal(identity.(Vector{<:Union{Int, Missing}}[[1, 2],[missing, 1]]),
                   [[1, 2],[missing, 1]])
+    @test broadcast(i -> ((x=i, y=(i==1 ? 1 : "a")), 3), 1:4) isa
+        Vector{Tuple{NamedTuple{(:x, :y)}, Int}}
 end
 
 @testset "Issue #28382: eltype inconsistent with getindex" begin
@@ -1056,3 +1054,19 @@ end
     @test Broadcast.BroadcastFunction(+)(2:3, 2:3) == 4:2:6
     @test Broadcast.BroadcastFunction(+)(2:3, 2:3) isa AbstractRange
 end
+
+@testset "#42063" begin
+    buf = IOBuffer()
+    @test println.(buf, [1,2,3]) == [nothing, nothing, nothing]
+    @test String(take!(buf)) == "1\n2\n3\n"
+end
+
+@testset "Memory allocation inconsistency in broadcasting #41565" begin
+    function test(y)
+        y .= 0 .- y ./ (y.^2) # extra allocation
+        return y
+    end
+    arr = rand(1000)
+    @allocated test(arr)
+    @test (@allocated test(arr)) == 0
+end
diff --git a/test/cartesian.jl b/test/cartesian.jl
index 8d2651b6f425f..b3cb8315decad 100644
--- a/test/cartesian.jl
+++ b/test/cartesian.jl
@@ -147,6 +147,14 @@ module TestOffsetArray
 end
 
 @testset "CartesianIndices getindex" begin
+    @testset "0D array" begin
+        a = zeros()
+        c = CartesianIndices(a)
+        @test a[c] == a
+        @test c[c] === c
+        @test c[] == CartesianIndex()
+    end
+
     @testset "AbstractUnitRange" begin
         for oinds in [(2, ), (2, 3), (2, 3, 4)]
             A = rand(1:10, oinds)
@@ -159,6 +167,34 @@ end
             @test all(i->A[i]==A[R[i]], R)
             @test all(i->A[i]==A[R[i]], collect(R))
             @test all(i->i in R, collect(R))
+
+            # Indexing a CartesianIndices with another CartesianIndices having the same ndims
+            # forwards the indexing to the component ranges and retains the wrapper
+            @test R[R] === R
+
+            R_array = collect(R)
+
+            all_onetoone = ntuple(x -> 1:1, Val(ndims(R)))
+            R2 = R[all_onetoone...]
+            @test R2 isa CartesianIndices{ndims(R)}
+
+            all_one = ntuple(x -> 1, Val(ndims(R)))
+            @test R2[all_one...] == R_array[all_one...]
+
+            @test R2 == R_array[all_onetoone...]
+
+            R3 = R[ntuple(x -> Colon(), Val(ndims(R)))...]
+            @test R3 === R
+
+            # test a mix of Colons and ranges
+            # up to two leading axes are colons, while the rest are UnitRanges
+            indstrailing = (1:1 for _ in min(ndims(R), 2)+1:ndims(R))
+            R4 = R[(Colon() for _ in 1:min(ndims(R), 2))..., indstrailing...]
+            @test R4 isa CartesianIndices{ndims(R)}
+            indsleading = CartesianIndices(axes(A)[1:min(ndims(A), 2)])
+            for I in indsleading
+                @test R4[I, indstrailing...] == R_array[I, indstrailing...]
+            end
         end
     end
 
@@ -173,6 +209,75 @@ end
 
             # TODO: A[SR] == A[Linearindices(SR)] should hold for StepRange CartesianIndices
             @test_broken A[SR] == A[LinearIndices(SR)]
+
+            # Create a CartesianIndices with StepRange indices to test indexing into it
+            R = CartesianIndices(oinds)
+            R_array = collect(R)
+
+            all_onetoone = ntuple(x -> 1:1, Val(ndims(R)))
+            R2 = R[all_onetoone...]
+            @test R2 isa CartesianIndices{ndims(R)}
+
+            all_one = ntuple(x -> 1, Val(ndims(R)))
+            @test R2[all_one...] == R_array[all_one...]
+            @test R2 == R_array[all_onetoone...]
+
+            R3 = R[ntuple(x -> Colon(), Val(ndims(R)))...]
+            @test R3 === R
+
+            # test a mix of Colons and ranges
+            # up to two leading axes are colons, while the rest are UnitRanges
+            indstrailing = (1:1 for _ in min(ndims(R), 2)+1:ndims(R))
+            R4 = R[(Colon() for _ in 1:min(ndims(R), 2))..., indstrailing...]
+            @test R4 isa CartesianIndices{ndims(R)}
+            indsleading = CartesianIndices(axes(R)[1:min(ndims(R), 2)])
+            for I in indsleading
+                @test R4[I, indstrailing...] == R_array[I, indstrailing...]
+            end
+        end
+
+        # CartesianIndices whole indices have a unit step may be their own axes
+        for oinds in [(1:1:4, ), (1:1:4, 1:1:5), (1:1:4, 1:1:5, 1:1:3)]
+            R = CartesianIndices(oinds)
+            @test R[R] === R
+            # test a mix of UnitRanges and StepRanges
+            R = CartesianIndices((oinds..., 1:3))
+            @test R[R] === R
+            R = CartesianIndices((1:3, oinds...))
+            @test R[R] === R
+        end
+    end
+
+    @testset "logical indexing of CartesianIndices with ranges" begin
+        c = CartesianIndices((1:0, 1:2))
+        c2 = c[true:false, 1:2]
+        @test c2 == c
+
+        for (inds, r) in Any[(1:2, false:true), (1:2, false:true:true),
+            (1:2:3, false:true), (1:2:3, false:true:true)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test c2[1, :] == c[2, :]
+        end
+
+        for (inds, r) in Any[(1:1, true:true), (1:1, true:true:true),
+            (1:1:1, true:true), (1:1:1, true:true:true)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test c2[1, :] == c[1, :]
+        end
+
+        for (inds, r) in Any[(1:1, false:false), (1:1, false:true:false),
+            (1:1:1, false:false), (1:1:1, false:true:false)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test size(c2, 1) == 0
         end
     end
 end
diff --git a/test/ccall.jl b/test/ccall.jl
index 02d005108459e..8f047ece65be2 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -982,6 +982,26 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
     end
 end
 
+
+#issue 40164
+@testset "llvm parameter attributes on cfunction closures" begin
+    struct Struct40164
+        x::Cdouble
+        y::Cdouble
+        z::Cdouble
+    end
+
+    function test_40164()
+        ret = Struct40164[]
+        f = x::Struct40164 -> (push!(ret, x); nothing)
+        f_c = @cfunction($f, Cvoid, (Struct40164,))
+        ccall(f_c.ptr, Ptr{Cvoid}, (Struct40164,), Struct40164(0, 1, 2))
+        ret
+    end
+
+    @test test_40164() == [Struct40164(0, 1, 2)]
+end
+
 else
 
 @test_broken "cfunction: no support for closures on this platform"
@@ -1783,6 +1803,11 @@ ccall_with_undefined_lib() = ccall((:time, xx_nOt_DeFiNeD_xx), Cint, (Ptr{Cvoid}
     @test b16 == b
 end
 
+@testset "transcode String to String" begin
+    a = "Julia strings and things"
+    @test transcode(String, a) === a
+end
+
 # issue 33413
 @testset "cglobal lowering" begin
     # crash in cglobal33413_ptrinline[_notype]() specifically requires the library pointer be
diff --git a/test/channels.jl b/test/channels.jl
index 0611b387e6f88..1a989747c3863 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -2,6 +2,7 @@
 
 using Random
 using Base: Experimental
+using Base: n_avail
 
 @testset "single-threaded Condition usage" begin
     a = Condition()
@@ -578,3 +579,43 @@ let c = Channel(3)
     close(c)
     @test repr(MIME("text/plain"), c) == "Channel{Any}(3) (closed)"
 end
+
+# PR #41833: data races in Channel
+@testset "n_avail(::Channel)" begin
+    # Buffered: n_avail() = buffer length + number of waiting tasks
+    let c = Channel(2)
+        @test n_avail(c) == 0;   put!(c, 0)
+        @test n_avail(c) == 1;   put!(c, 0)
+        @test n_avail(c) == 2;   t1 = @task put!(c, 0); yield(t1)
+        @test n_avail(c) == 3;   t2 = @task put!(c, 0); yield(t2)
+        @test n_avail(c) == 4
+        # Test n_avail(c) after interrupting a task waiting on the channel
+                                t3 = @task put!(c, 0)
+                                yield(t3)
+        @test n_avail(c) == 5
+                                @async Base.throwto(t3, ErrorException("Exit put!"))
+                                try wait(t3) catch end
+        @test n_avail(c) == 4
+                                close(c)
+                                try wait(t1) catch end
+                                try wait(t2) catch end
+        @test n_avail(c) == 2    # Already-buffered items remain
+    end
+    # Unbuffered: n_avail() = number of waiting tasks
+    let c = Channel()
+        @test n_avail(c) == 0;   t1 = @task put!(c, 0); yield(t1)
+        @test n_avail(c) == 1;   t2 = @task put!(c, 0); yield(t2)
+        @test n_avail(c) == 2
+        # Test n_avail(c) after interrupting a task waiting on the channel
+                                t3 = @task put!(c, 0)
+                                yield(t3)
+        @test n_avail(c) == 3
+                                @async Base.throwto(t3, ErrorException("Exit put!"))
+                                try wait(t3) catch end
+        @test n_avail(c) == 2
+                                close(c)
+                                try wait(t1) catch end
+                                try wait(t2) catch end
+        @test n_avail(c) == 0
+    end
+end
diff --git a/test/char.jl b/test/char.jl
index 279adb628ed17..615c31cfe44e5 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -2,6 +2,7 @@
 
 @testset "basic properties" begin
 
+    @test typemax(Char) == reinterpret(Char, typemax(UInt32))
     @test typemin(Char) == Char(0)
     @test ndims(Char) == 0
     @test getindex('a', 1) == 'a'
@@ -248,6 +249,7 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c)
 
 @testset "abstractchar" begin
     @test AbstractChar('x') === AbstractChar(UInt32('x')) === 'x'
+    @test convert(AbstractChar, 2.0) == Char(2)
 
     @test isascii(ASCIIChar('x'))
     @test ASCIIChar('x') < 'y'
@@ -255,6 +257,9 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c)
     @test ASCIIChar('x')^3 == "xxx"
     @test repr(ASCIIChar('x')) == "'x'"
     @test string(ASCIIChar('x')) == "x"
+    @test length(ASCIIChar('x')) == 1
+    @test !isempty(ASCIIChar('x'))
+    @test eltype(ASCIIChar) == ASCIIChar
     @test_throws MethodError write(IOBuffer(), ASCIIChar('x'))
     @test_throws MethodError read(IOBuffer('x'), ASCIIChar)
 end
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 21f313fdbbb34..e00aedffdd42e 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -28,12 +28,11 @@ const TESTNAMES = [
         "boundscheck", "error", "ambiguous", "cartesian", "osutils",
         "channels", "iostream", "secretbuffer", "specificity",
         "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
-        "smallarrayshrink", "opaque_closure", "filesystem", "download"
+        "smallarrayshrink", "opaque_closure", "filesystem", "download",
 ]
 
 """
-
-`tests, net_on, exit_on_error, seed = choosetests(choices)` selects a set of tests to be
+`(; tests, net_on, exit_on_error, seed) = choosetests(choices)` selects a set of tests to be
 run. `choices` should be a vector of test names; if empty or set to
 `["all"]`, all tests are selected.
 
@@ -41,7 +40,7 @@ This function also supports "test collections": specifically, "linalg"
  refers to collections of tests in the correspondingly-named
 directories.
 
-Upon return:
+The function returns a named tuple with the following elements:
   - `tests` is a vector of fully-expanded test names,
   - `net_on` is true if networking is available (required for some tests),
   - `exit_on_error` is true if an error in one test should cancel
@@ -49,50 +48,93 @@ Upon return:
   - `seed` is a seed which will be used to initialize the global RNG for each
     test to be run.
 
-Three options can be passed to `choosetests` by including a special token
+Several options can be passed to `choosetests` by including a special token
 in the `choices` argument:
    - "--skip", which makes all tests coming after be skipped,
    - "--exit-on-error" which sets the value of `exit_on_error`,
    - "--seed=SEED", which sets the value of `seed` to `SEED`
      (parsed as an `UInt128`); `seed` is otherwise initialized randomly.
      This option can be used to reproduce failed tests.
+   - "--help", which prints a help message and then skips all tests.
+   - "--help-list", which prints the options computed without running them.
 """
 function choosetests(choices = [])
     tests = []
-    skip_tests = []
+    skip_tests = Set()
     exit_on_error = false
     use_revise = false
     seed = rand(RandomDevice(), UInt128)
+    ci_option_passed = false
+    dryrun = false
 
     for (i, t) in enumerate(choices)
         if t == "--skip"
-            skip_tests = choices[i + 1:end]
+            union!(skip_tests, choices[i + 1:end])
             break
         elseif t == "--exit-on-error"
             exit_on_error = true
         elseif t == "--revise"
             use_revise = true
         elseif startswith(t, "--seed=")
-            seed = parse(UInt128, t[8:end])
+            seed = parse(UInt128, t[(length("--seed=") + 1):end])
+        elseif t == "--ci"
+            ci_option_passed = true
+        elseif t == "--help-list"
+            dryrun = true
+        elseif t == "--help"
+            println("""
+                USAGE: ./julia runtests.jl [options] [tests]
+                OPTIONS:
+                  --exit-on-error      : stop tests immediately when a test group fails
+                  --help               : prints this help message
+                  --help-list          : prints the options computed without running them
+                  --revise             : load Revise
+                  --seed=<SEED>        : set the initial seed for all testgroups (parsed as a UInt128)
+                  --skip <NAMES>...    : skip test or collection tagged with <NAMES>
+                TESTS:
+                  Can be special tokens, such as "all", "unicode", "stdlib", the names of stdlib \
+                  modules, or the names of any file in the TESTNAMES array (defaults to "all").
+
+                  Or prefix a name with `-` (such as `-core`) to skip a particular test.
+                """)
+            return (; tests = [],
+                      net_on = false,
+                      exit_on_error = false,
+                      use_revise = false,
+                      seed = UInt128(0))
+        elseif startswith(t, "--")
+            error("unknown option: $t")
+        elseif startswith(t, "-")
+            push!(skip_tests, t[2:end])
         else
             push!(tests, t)
         end
     end
 
-    if tests == ["all"] || isempty(tests)
-        tests = TESTNAMES
+    unhandled = copy(skip_tests)
+
+    requested_all     = "all"     in tests
+    requested_default = "default" in tests
+    if isempty(tests) || requested_all || requested_default
+        append!(tests, TESTNAMES)
     end
+    filter!(x -> x != "all",     tests)
+    filter!(x -> x != "default", tests)
 
     function filtertests!(tests, name, files=[name])
        flt = x -> (x != name && !(x in files))
        if name in skip_tests
            filter!(flt, tests)
+           pop!(unhandled, name)
        elseif name in tests
            filter!(flt, tests)
            prepend!(tests, files)
        end
     end
 
+    explicit_pkg            = "Pkg"            in tests
+    explicit_libgit2_online = "LibGit2/online" in tests
+
     filtertests!(tests, "unicode", ["unicode/utf8"])
     filtertests!(tests, "strings", ["strings/basic", "strings/search", "strings/util",
                    "strings/io", "strings/types"])
@@ -108,22 +150,40 @@ function choosetests(choices = [])
     if startswith(string(Sys.ARCH), "arm")
         # Remove profile from default tests on ARM since it currently segfaults
         # Allow explicitly adding it for testing
-        @warn "Skipping Profile tests"
+        @warn "Skipping Profile tests because the architecture is ARM"
         filter!(x -> (x != "Profile"), tests)
     end
 
-    net_required_for = ["download", "Sockets", "LibGit2", "LibCURL", "Downloads",
-                        "Artifacts", "LazyArtifacts"]
+    net_required_for = [
+        "Artifacts",
+        "Downloads",
+        "LazyArtifacts",
+        "LibCURL",
+        "LibGit2",
+        "Sockets",
+        "download",
+    ]
     net_on = true
-    try
-        ipa = getipaddr()
-    catch
-        @warn "Networking unavailable: Skipping tests [" * join(net_required_for, ", ") * "]"
-        net_on = false
-    end
-
-    if !net_on
-        filter!(!in(net_required_for), tests)
+    JULIA_TEST_NETWORKING_AVAILABLE = get(ENV, "JULIA_TEST_NETWORKING_AVAILABLE", "") |>
+                                      strip |>
+                                      lowercase |>
+                                      s -> tryparse(Bool, s) |>
+                                      x -> x === true
+    # If the `JULIA_TEST_NETWORKING_AVAILABLE` environment variable is set to `true`, we
+    # always set `net_on` to `true`.
+    # Otherwise, we set `net_on` to true if and only if networking is actually available.
+    if !JULIA_TEST_NETWORKING_AVAILABLE
+        try
+            ipa = getipaddr()
+        catch
+            if ci_option_passed
+                @error("Networking unavailable, but `--ci` was passed")
+                rethrow()
+            end
+            net_on = false
+            @warn "Networking unavailable: Skipping tests [" * join(net_required_for, ", ") * "]"
+            filter!(!in(net_required_for), tests)
+        end
     end
 
     if ccall(:jl_running_on_valgrind,Cint,()) != 0 && "rounding" in tests
@@ -131,10 +191,9 @@ function choosetests(choices = [])
         filter!(x -> x != "rounding", tests)
     end
 
+    filter!(!in(tests), unhandled)
     filter!(!in(skip_tests), tests)
 
-    explicit_pkg3    =  "Pkg/pkg"       in tests
-    explicit_libgit2 =  "LibGit2/online" in tests
     new_tests = String[]
     for test in tests
         if test in STDLIBS
@@ -150,11 +209,31 @@ function choosetests(choices = [])
     end
     filter!(x -> (x != "stdlib" && !(x in STDLIBS)) , tests)
     append!(tests, new_tests)
-    explicit_pkg3    || filter!(x -> x != "Pkg/pkg",       tests)
-    explicit_libgit2 || filter!(x -> x != "LibGit2/online", tests)
+
+    requested_all || explicit_pkg            || filter!(x -> x != "Pkg",            tests)
+    requested_all || explicit_libgit2_online || filter!(x -> x != "LibGit2/online", tests)
 
     # Filter out tests from the test groups in the stdlibs
+    filter!(!in(tests), unhandled)
     filter!(!in(skip_tests), tests)
 
-    tests, net_on, exit_on_error, use_revise, seed
+    if !isempty(unhandled)
+        @warn "Not skipping tests: $(join(unhandled, ", "))"
+    end
+
+    if dryrun
+        print("Tests enabled to run:")
+        foreach(t -> print("\n  ", t), tests)
+        if !isempty(skip_tests)
+            print("\n\nTests skipped:")
+            foreach(t -> print("\n  ", t), skip_tests)
+        end
+        print("\n")
+        exit_on_error && (print("\nwith option "); printstyled("exit_on_error", bold=true))
+        use_revise && (print("\nwith option "); printstyled("use_revise", bold=true); print(" (Revise.jl)"))
+        print("\n\n")
+        empty!(tests)
+    end
+
+    return (; tests, net_on, exit_on_error, use_revise, seed)
 end
diff --git a/test/clangsa/GCPushPop.cpp b/test/clangsa/GCPushPop.cpp
index a992630291bb5..f8dcfdafa5aa9 100644
--- a/test/clangsa/GCPushPop.cpp
+++ b/test/clangsa/GCPushPop.cpp
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker -x c++ %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
 
 #include "julia.h"
 
diff --git a/test/clangsa/ImplicitAtomicsTest.c b/test/clangsa/ImplicitAtomicsTest.c
new file mode 100644
index 0000000000000..2ad1e0b5f1016
--- /dev/null
+++ b/test/clangsa/ImplicitAtomicsTest.c
@@ -0,0 +1,168 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// RUN-TODO: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomics2Plugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
+// RUN-TODO: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomics2Plugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
+// RUN: clang --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libImplicitAtomicsPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.ImplicitAtomics --analyzer-no-default-checks -x c -std=c11 %s -v
+// RUN: clang --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libImplicitAtomicsPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -Xclang -analyzer-checker=core,julia.ImplicitAtomics --analyzer-no-default-checks -x c++ -std=c++11 %s -v
+
+#include "julia_atomics.h"
+
+_Atomic(int) x, *px;
+struct Atomic_xy_t {
+    _Atomic(int) x;
+    _Atomic(int) *px;
+    int y;
+} y, *py;
+_Atomic(int) z[2];
+
+
+// jwn: add tests for casts, and *py = y;
+
+void hiddenAtomics(void) {
+    // CHECK-NOT: [[@LINE+1]]
+    px = &x;
+    // CHECK-NOT: [[@LINE+1]]
+    py = &y;
+    // CHECK-NOT: [[@LINE+1]]
+    y.px = &y.x;
+    // CHECK: [[@LINE+1]]:7: warning: Implicit Atomic seq_cst synchronization
+    ++x; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:7: warning: Implicit Atomic seq_cst synchronization
+    --x; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x++; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x--; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x += 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x -= 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+2]]:5:
+    // CHECK-C: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x *= 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x = // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:9: warning: Implicit Atomic seq_cst synchronization
+        x; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+#endif
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x = 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x + 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+
+    // CHECK: [[@LINE+1]]:8: warning: Implicit Atomic seq_cst synchronization
+    ++*px; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:8: warning: Implicit Atomic seq_cst synchronization
+    --*px; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-NOT: [[@LINE+1]]
+    px++;
+    // CHECK-NOT: [[@LINE+1]]
+    px--;
+    // CHECK: [[@LINE+1]]:10: warning: Implicit Atomic seq_cst synchronization
+    1 + *px++; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:10: warning: Implicit Atomic seq_cst synchronization
+    1 + *px--; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:7: warning: Implicit Atomic seq_cst synchronization
+    (*px)++; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:7: warning: Implicit Atomic seq_cst synchronization
+    (*px)--; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px += 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px -= 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+2]]
+    // CHECK-C: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px *= 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px = // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:9: warning: Implicit Atomic seq_cst synchronization
+        x; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x = // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:10: warning: Implicit Atomic seq_cst synchronization
+        *px; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+#endif
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px = 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px + 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+
+    // CHECK-NOT: [[@LINE+1]]
+    *(int*)&x = 3;
+    // CHECK-NOT: [[@LINE+1]]
+    *(int*)px = 3;
+
+    // CHECK-NOT: [[@LINE+1]]
+    y.y = 2;
+    // CHECK-NOT: [[@LINE+1]]
+    py->y = 2;
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+1]]
+    *py = // TODO
+        y; // TODO
+    y = // TODO
+       *py; // TODO
+#endif
+    // CHECK: [[@LINE+1]]:22: warning: Implicit Atomic seq_cst synchronization
+    *(_Atomic(int)*)&y.y = 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:22: warning: Implicit Atomic seq_cst synchronization
+    *(_Atomic(int)*)&py->y = 2; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    y.x = 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *y.px = 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+2]]
+    // CHECK-C: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x = // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:13: warning: Implicit Atomic seq_cst synchronization
+        py->x; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    x = // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-C: [[@LINE+1]]:10: warning: Implicit Atomic seq_cst synchronization
+        *py->px; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+#endif
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    py->x = 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *py->px = 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+
+    // CHECK: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    z[1] = 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *z = 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *z += 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+
+#ifdef __cplusplus // check initialization / finalization
+    // CHECK-NOT: [[@LINE+1]]
+    _Atomic(int) lx{2};
+    // CHECK-CXX: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    lx = 3; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-CXX: [[@LINE+1]]:5: warning: Implicit Atomic seq_cst synchronization
+    lx += 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+
+    // CHECK-NOT: [[@LINE+1]]
+    struct large_type { int x[16]; };
+    // CHECK-NOT: [[@LINE+1]]
+    auto *ly = new std::atomic<struct large_type>();
+    // CHECK-CXX: [[@LINE+1]]:6: warning: Implicit Atomic seq_cst synchronization
+    *ly =    // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-NOT: [[@LINE+1]]
+        ly->load();
+    // CHECK-CXX: [[@LINE+1]]:28: warning: Implicit Atomic seq_cst synchronization
+    struct large_type a = *ly; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    // CHECK-NOT: [[@LINE+1]]
+    delete ly;
+
+#if 0 // enable for C++2a
+    std::atomic_ref<int> lz(*(int*)px);
+    lz = 3; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+    lz += 1; // expected-warning{{Implicit Atomic seq_cst synchronization}} expected-note{{Implicit Atomic seq_cst synchronization}}
+#endif
+#endif
+}
diff --git a/test/clangsa/Makefile b/test/clangsa/Makefile
index 850f9ea76985a..3bebd45c9a5a6 100644
--- a/test/clangsa/Makefile
+++ b/test/clangsa/Makefile
@@ -3,11 +3,11 @@ JULIAHOME := $(abspath $(SRCDIR)/../..)
 BUILDDIR := .
 include $(JULIAHOME)/Make.inc
 
-check: $(SRCDIR)
+check: .
 
 TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.c) $(wildcard $(SRCDIR)/*.cpp))
 
-$(SRCDIR) $(TESTS):
+. $(TESTS):
 	@$(MAKE) -C $(BUILDDIR)/../../src $(build_includedir)/julia/julia_version.h
 	@$(MAKE) -C $(BUILDDIR)/../../src clangsa
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
@@ -17,6 +17,6 @@ $(SRCDIR) $(TESTS):
 	CPPFLAGS_FLAGS="${CPPFLAGS_FLAGS}" \
 	CFLAGS_FLAGS="${CFLAGS_FLAGS}" \
 	CXXFLAGS_FLAGS="${CXXFLAGS_FLAGS}" \
-	$(build_depsbindir)/lit/lit.py -v $@
+	$(build_depsbindir)/lit/lit.py -v $(addprefix $(SRCDIR)/,$@)
 
-.PHONY: $(TESTS) $(SRCDIR) check all
+.PHONY: $(TESTS) check all .
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index 78dcc195d59ce..f0b32c54bc7b8 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
 
 #include "julia.h"
 #include "julia_internal.h"
diff --git a/test/clangsa/lit.cfg.py b/test/clangsa/lit.cfg.py
index 5790eab812e9c..bb48f0b891acf 100644
--- a/test/clangsa/lit.cfg.py
+++ b/test/clangsa/lit.cfg.py
@@ -14,8 +14,6 @@
     platform.system() == 'Windows' else '.so'))
 config.substitutions.append(("%julia_home", os.path.join(os.path.dirname(__file__), "../..")))
 
-path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH']))
-config.environment['PATH'] = path
 config.environment['HOME'] = "/tmp"
 config.environment['CLANGSA_FLAGS'] = os.environ.get('CLANGSA_FLAGS', "")
 config.environment['CLANGSA_CXXFLAGS'] = os.environ.get('CLANGSA_CXXFLAGS', "")
diff --git a/test/client.jl b/test/client.jl
index f917e45fb412d..195743b1d6208 100644
--- a/test/client.jl
+++ b/test/client.jl
@@ -43,3 +43,12 @@ end
     err_str = String(take!(errio))
     @test occursin(nested_error_pattern, err_str)
 end
+
+@testset "display_error(io, er, bt) works" begin
+    errio = IOBuffer()
+    Base.display_error(errio, ErrorException, [])
+    err_str = String(take!(errio))
+    @test occursin(r"""
+        ERROR: ErrorException
+        """s, err_str)
+end
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index c5e82b681a3c4..6e06f12ef4fe0 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -93,6 +93,26 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test v[2] == "1"
         @test isempty(v[3])
     end
+
+    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options", "HOME" => homedir()))
+        @test v[1]
+        @test contains(v[2], r"print-options + = 1")
+        @test contains(v[2], r"combiner-store-merge-dependence-limit + = 4")
+        @test contains(v[2], r"enable-tail-merge + = 2")
+        @test isempty(v[3])
+    end
+    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -combiner-store-merge-dependence-limit=6", "HOME" => homedir()))
+        @test v[1]
+        @test contains(v[2], r"print-options + = 1")
+        @test contains(v[2], r"combiner-store-merge-dependence-limit + = 6")
+        @test contains(v[2], r"enable-tail-merge + = 1")
+        @test isempty(v[3])
+    end
+    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -enable-tail-merge=1", "HOME" => homedir()))
+        @test !v[1]
+        @test isempty(v[2])
+        @test v[3] == "julia: for the --enable-tail-merge option: may only occur zero or one times!"
+    end
 end
 
 let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
@@ -119,7 +139,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # handling of @projectname in --project and JULIA_PROJECT
     let expanded = abspath(Base.load_path_expand("@foo"))
         @test expanded == readchomp(`$exename --project='@foo' -e 'println(Base.active_project())'`)
-        @test expanded == readchomp(setenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@foo", "HOME" => homedir()))
+        @test expanded == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@foo", "HOME" => homedir()))
     end
 
     # --quiet, --banner
@@ -599,7 +619,7 @@ end
 
 
 # test error handling code paths of running --sysimage
-let exename = Base.julia_cmd()
+let exename = `$(Base.julia_cmd().exec[1]) -t 1`
     sysname = unsafe_string(Base.JLOptions().image_file)
     for nonexist_image in (
             joinpath(@__DIR__, "nonexistent"),
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index cc6095b867d41..3fcb9f37c5b47 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -4,6 +4,7 @@
 
 using Random
 using InteractiveUtils
+using Libdl
 
 const opt_level = Base.JLOptions().opt_level
 const coverage = (Base.JLOptions().code_coverage > 0) || (Base.JLOptions().malloc_log > 0)
@@ -350,7 +351,7 @@ struct Const{T<:Array}
 end
 
 @eval Base.getindex(A::Const, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1)
-@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =  (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =  (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
 
 function foo31018!(a, b)
     @aliasscope for i in eachindex(a, b)
@@ -594,6 +595,23 @@ f41438(y) = y[].x
 @test f41438(Ref{A41438}(A41438(C_NULL))) === C_NULL
 @test f41438(Ref{B41438}(B41438(C_NULL))) === C_NULL
 
+const S41438 = Pair{Any, Ptr{T}} where T
+g41438() = Array{S41438,1}(undef,1)[1].first
+get_llvm(g41438, ()); # cause allocation of layout
+@test S41438.body.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438.body)
+@test S41438{Int}.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438{Int})
+
+
+# issue #43303
+struct A43303{T}
+    x::Pair{Ptr{T},Ptr{T}}
+end
+@test A43303.body.layout != C_NULL
+@test isbitstype(A43303{Int})
+@test A43303.body.types[1].layout != C_NULL
+
 # issue #41157
 f41157(a, b) = a[1] = b[1]
 @test_throws BoundsError f41157(Tuple{Int}[], Tuple{Union{}}[])
@@ -629,3 +647,47 @@ t41096 = Term41096{:t}(Modulate41096(:t, false))
 U41096 = Term41096{:U}(Modulate41096(:U, false))
 
 @test !newexpand41096((t=t41096, μ=μ41096, U=U41096), :U)
+
+# test that we can start julia with libjulia-codegen removed; PR #41936
+mktempdir() do pfx
+    cp(dirname(Sys.BINDIR), pfx; force=true)
+    libpath = relpath(dirname(dlpath("libjulia-codegen")), dirname(Sys.BINDIR))
+    libs_deleted = 0
+    for f in filter(f -> startswith(f, "libjulia-codegen"), readdir(joinpath(pfx, libpath)))
+        rm(f; force=true, recursive=true)
+        libs_deleted += 1
+    end
+    @test libs_deleted > 0
+    @test readchomp(`$pfx/bin/$(Base.julia_exename()) -e 'println("no codegen!")'`) == "no codegen!"
+end
+
+# issue #42645
+mutable struct A42645{T}
+    x::Bool
+    function A42645(a::Vector{T}) where T
+        r = new{T}()
+        r.x = false
+        return r
+    end
+end
+mutable struct B42645{T}
+  y::A42645{T}
+end
+x42645 = 1
+function f42645()
+  res = B42645(A42645([x42645]))
+  res.y = A42645([x42645])
+  res.y.x = true
+  res
+end
+@test ((f42645()::B42645).y::A42645{Int}).x
+
+# issue #43123
+@noinline cmp43123(a::Some, b::Some) = something(a) === something(b)
+@noinline cmp43123(a, b) = a[] === b[]
+@test cmp43123(Some{Function}(+), Some{Union{typeof(+), typeof(-)}}(+))
+@test !cmp43123(Some{Function}(+), Some{Union{typeof(+), typeof(-)}}(-))
+@test cmp43123(Ref{Function}(+), Ref{Union{typeof(+), typeof(-)}}(+))
+@test !cmp43123(Ref{Function}(+), Ref{Union{typeof(+), typeof(-)}}(-))
+@test cmp43123(Function[+], Union{typeof(+), typeof(-)}[+])
+@test !cmp43123(Function[+], Union{typeof(+), typeof(-)}[-])
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
index 5d97f4f6542b7..10fff8d90ff24 100644
--- a/test/compiler/contextual.jl
+++ b/test/compiler/contextual.jl
@@ -46,6 +46,7 @@ module MiniCassette
         # Insert one SSAValue for every argument statement
         prepend!(code, [Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
         prepend!(ci.codelocs, [0 for i = 1:nargs])
+        prepend!(ci.ssaflags, [0x00 for i = 1:nargs])
         ci.ssavaluetypes += nargs
         function map_slot_number(slot)
             if slot == 1
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 008e6ff0d6997..5803a5aee80d1 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -44,6 +44,45 @@ let t = Tuple{Ref{T},T,T} where T, c = Tuple{Ref, T, T} where T # #36407
     @test t <: Core.Compiler.limit_type_size(t, c, Union{}, 1, 100)
 end
 
+# obtain Vararg with 2 undefined fields
+let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tuple{Tuple{Vararg{Any, N}}} where N)[2][1]
+    @test Core.Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
+end
+
+# issue #42835
+@test !Core.Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{Int}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Type{Int}, Core.svec(Type{Int}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Int, Core.svec(Type{Int}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Int}}}, Type{Type{Int}}, Core.svec(Type{Type{Int}}), 1, 1, 1)
+
+@test  Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(ComplexF32, Type{ComplexF32}, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(Type{Type{ComplexF32}}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Type{Type{ComplexF32}}, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, Type{ComplexF32}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, ComplexF32, Core.svec(ComplexF32), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{ComplexF32}}}, Type{Type{ComplexF32}}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+
+# n.b. Type{Type{Union{}} === Type{Core.TypeofBottom}
+@test !Core.Compiler.type_more_complex(Type{Union{}}, Any, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{Type{Union{}}}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Type{Type{Union{}}}, Core.svec(Type{Type{Union{}}}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Type{Union{}}}}}, Type{Type{Type{Union{}}}}, Core.svec(Type{Type{Type{Union{}}}}), 1, 1, 1)
+
+@test !Core.Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 0, 1, 1)
+@test_broken Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+
+
 let # 40336
     t = Type{Type{Int}}
     c = Type{Int}
@@ -236,6 +275,15 @@ barTuple2() = fooTuple{tuple(:y)}()
           Dict{Int64,Tuple{UnitRange{Int64},UnitRange{Int64}}},
           Core.Compiler.Const(:vals)) == Array{Tuple{UnitRange{Int64},UnitRange{Int64}},1}
 
+# assert robustness of `getfield_tfunc`
+struct GetfieldRobustness
+    field::String
+end
+@test Base.return_types((GetfieldRobustness,String,)) do obj, s
+    t = (10, s) # to form `PartialStruct`
+    getfield(obj, t)
+end |> only === Union{}
+
 # issue #12476
 function f12476(a)
     (k, v) = a
@@ -1105,6 +1153,11 @@ end
     @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:y)) === Const(true)
     @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:z)) === Const(false)
 end
+struct UnionIsdefinedA; x; end
+struct UnionIsdefinedB; x; end
+@test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:x)) === Const(true)
+@test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:y)) === Const(false)
+@test isdefined_tfunc(Union{UnionIsdefinedA,Nothing}, Const(:x)) === Bool
 
 @noinline map3_22347(f, t::Tuple{}) = ()
 @noinline map3_22347(f, t::Tuple) = (f(t[1]), map3_22347(f, Base.tail(t))...)
@@ -1524,7 +1577,6 @@ let linfo = get_linfo(Base.convert, Tuple{Type{Int64}, Int32}),
     @test opt.src.ssavaluetypes isa Vector{Any}
     @test !opt.src.inferred
     @test opt.mod === Base
-    @test opt.nargs == 3
 end
 
 # approximate static parameters due to unions
@@ -1789,6 +1841,16 @@ end
         0
     end == Any[Int]
 
+    # slot as SSA
+    isaT(x, T) = isa(x, T)
+    @test Base.return_types((Any,Int)) do a, b
+        c = a
+        if isaT(c, typeof(b))
+            return c # c::Int
+        end
+        return 0
+    end |> only === Int
+
     # with Base functions
     @test Base.return_types((Any,)) do a
         Base.Fix2(isa, Int)(a) && return a # a::Int
@@ -1806,6 +1868,18 @@ end
         Meta.isexpr(x, :call) && return x # x::Expr
         return nothing
     end == Any[Union{Nothing,Expr}]
+
+    # handle the edge case
+    let ts = @eval Module() begin
+            edgecase(_) = $(Core.Compiler.InterConditional(2, Int, Any))
+            # create cache
+            Base.return_types(edgecase, (Any,))
+            Base.return_types((Any,)) do x
+                edgecase(x) ? x : nothing # ::Any
+            end
+        end
+        @test ts == Any[Any]
+    end
 end
 
 @testset "branching on conditional object" begin
@@ -1823,16 +1897,39 @@ end
         return c, d # ::Tuple{Int,Int}
     end == Any[Tuple{Int,Int}]
 
-    # shouldn't use the old constraint when the subject of condition has changed
+    # should invalidate old constraint when the subject of condition has changed
     @test Base.return_types((Union{Nothing,Int},)) do a
-        b = a === nothing
-        c = b ? 0 : a # c::Int
+        cond = a === nothing
+        r1 = cond ? 0 : a # r1::Int
         a = 0
-        d = b ? a : 1 # d::Int, not d::Union{Nothing,Int}
-        return c, d # ::Tuple{Int,Int}
+        r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
+        return r1, r2 # ::Tuple{Int,Int}
     end == Any[Tuple{Int,Int}]
 end
 
+# https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
+# `PartialStruct` shoudln't wrap `Conditional`
+let M = Module()
+    @eval M begin
+        struct BePartialStruct
+            val::Int
+            cond
+        end
+    end
+
+    rt = @eval M begin
+        Base.return_types((Union{Nothing,Int},)) do a
+            cond = a === nothing
+            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
+            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+            a = $(gensym(:anyvar))::Any
+            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constrait invalidation here)
+            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
+        end |> only
+    end
+    @test rt == Tuple{Union{Nothing,Int},Any}
+end
+
 @testset "conditional constraint propagation from non-`Conditional` object" begin
     @test Base.return_types((Bool,)) do b
         if b
@@ -1851,6 +1948,35 @@ end
     end == Any[Union{Bool,Nothing}]
 end
 
+@testset "`from_interprocedural!`: translate inter-procedural information" begin
+    # TODO come up with a test case to check the functionality of `collect_limitations!`
+    # one heavy test case would be to use https://github.com/aviatesk/JET.jl and
+    # check `julia /path/to/JET/jet /path/to/JET/src/JET.jl` doesn't result in errors
+    # because of nested `LimitedAccuracy`es
+
+    # `InterConditional` handling: `abstract_invoke`
+    ispositive(a) = isa(a, Int) && a > 0
+    @test Base.return_types((Any,)) do a
+        if Base.@invoke ispositive(a::Any)
+            return a
+        end
+        return 0
+    end |> only == Int
+    # the `fargs = nothing` edge case
+    @test Base.return_types((Any,)) do a
+        Core.Compiler.return_type(invoke, Tuple{typeof(ispositive), Type{Tuple{Any}}, Any})
+    end |> only == Type{Bool}
+
+    # `InterConditional` handling: `abstract_call_opaque_closure`
+    @test Base.return_types((Any,)) do a
+        f = Base.Experimental.@opaque a -> isa(a, Int) && a > 0
+        if f(a)
+            return a
+        end
+        return 0
+    end |> only === Int
+end
+
 function f25579(g)
     h = g[]
     t = (h === nothing)
@@ -1952,6 +2078,67 @@ function _g_ifelse_isa_()
 end
 @test Base.return_types(_g_ifelse_isa_, ()) == [Int]
 
+@testset "Conditional forwarding" begin
+    # forward `Conditional` if it conveys a constraint on any other argument
+    ifelselike(cnd, x, y) = cnd ? x : y
+
+    @test Base.return_types((Any,Int,)) do x, y
+        ifelselike(isa(x, Int), x, y)
+    end |> only == Int
+
+    # should work nicely with union-split
+    @test Base.return_types((Union{Int,Nothing},)) do x
+        ifelselike(isa(x, Int), x, 0)
+    end |> only == Int
+
+    @test Base.return_types((Any,Int)) do x, y
+        ifelselike(!isa(x, Int), y, x)
+    end |> only == Int
+
+    @test Base.return_types((Any,Int)) do x, y
+        a = ifelselike(x === 0, x, 0) # ::Const(0)
+        if a == 0
+            return y
+        else
+            return nothing # dead branch
+        end
+    end |> only == Int
+
+    # pick up the first if there are multiple constrained arguments
+    @test Base.return_types((Any,)) do x
+        ifelselike(isa(x, Int), x, x)
+    end |> only == Any
+
+    # just propagate multiple constraints
+    ifelselike2(cnd1, cnd2, x, y, z) = cnd1 ? x : cnd2 ? y : z
+    @test Base.return_types((Any,Any)) do x, y
+        ifelselike2(isa(x, Int), isa(y, Int), x, y, 0)
+    end |> only == Int
+
+    # work with `invoke`
+    @test Base.return_types((Any,Any)) do x, y
+        Base.@invoke ifelselike(isa(x, Int), x, y::Int)
+    end |> only == Int
+
+    # don't be confused with vararg method
+    vacond(cnd, va...) = cnd ? va : 0
+    @test Base.return_types((Any,)) do x
+        # at runtime we will see `va::Tuple{Tuple{Int,Int}, Tuple{Int,Int}}`
+        vacond(isa(x, Tuple{Int,Int}), x, x)
+    end |> only == Union{Int,Tuple{Any,Any}}
+
+    # demonstrate extra constraint propagation for Base.ifelse
+    @test Base.return_types((Any,Int,)) do x, y
+        ifelse(isa(x, Int), x, y)
+    end |> only == Int
+
+    # slot as SSA
+    @test Base.return_types((Any,Vector{Any})) do x, y
+        z = x
+        ifelselike(isa(z, Int), z, length(y))
+    end |> only === Int
+end
+
 # Equivalence of Const(T.instance) and T for singleton types
 @test Const(nothing) ⊑ Nothing && Nothing ⊑ Const(nothing)
 
@@ -2210,12 +2397,10 @@ code28279 = code_lowered(f28279, (Bool,))[1].code
 oldcode28279 = deepcopy(code28279)
 ssachangemap = fill(0, length(code28279))
 labelchangemap = fill(0, length(code28279))
-worklist = Int[]
 let i
     for i in 1:length(code28279)
         stmt = code28279[i]
         if isa(stmt, GotoIfNot)
-            push!(worklist, i)
             ssachangemap[i] = 1
             if i < length(code28279)
                 labelchangemap[i + 1] = 1
@@ -2672,7 +2857,7 @@ const DenseIdx = Union{IntRange,Integer}
     foo_26724((result..., length(r)), I...)
 @test @inferred(foo_26724((), 1:4, 1:5, 1:6)) === (4, 5, 6)
 
-# Non uniformity in expresions with PartialTypeVar
+# Non uniformity in expressions with PartialTypeVar
 @test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
 let N = TypeVar(:N)
     @test Core.Compiler.apply_type_nothrow([Core.Compiler.Const(NTuple),
@@ -2864,6 +3049,21 @@ function symcmp36230(vec)
 end
 @test Base.return_types(symcmp36230, (Vector{Any},)) == Any[Bool]
 
+function foo42190(r::Union{Nothing,Int}, n::Int)
+    while r !== nothing && r < n
+        return r # `r::Int`
+    end
+    return n
+end
+@test Base.return_types(foo42190, (Union{Nothing, Int}, Int)) == Any[Int]
+function bar42190(r::Union{Nothing,Int}, n::Int)
+    while r === nothing || r < n
+        return n
+    end
+    return r # `r::Int`
+end
+@test Base.return_types(bar42190, (Union{Nothing, Int}, Int)) == Any[Int]
+
 # Issue #36531, double varargs in abstract_iteration
 f36531(args...) = tuple((args...)...)
 @test @inferred(f36531(1,2,3)) == (1,2,3)
@@ -2875,9 +3075,24 @@ partial_return_2(x) = Val{partial_return_1(x)[2]}
 
 @test Base.return_types(partial_return_2, (Int,)) == Any[Type{Val{1}}]
 
-# Precision of abstract_iteration
+# Soundness and precision of abstract_iteration
+f41839() = (1:100...,)
+@test NTuple{100,Int} <: only(Base.return_types(f41839, ())) <: Tuple{Vararg{Int}}
 f_splat(x) = (x...,)
 @test Base.return_types(f_splat, (Pair{Int,Int},)) == Any[Tuple{Int, Int}]
+@test Base.return_types(f_splat, (UnitRange{Int},)) == Any[Tuple{Vararg{Int}}]
+struct Itr41839_1 end # empty or infinite
+Base.iterate(::Itr41839_1) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_1, ::Nothing) = (nothing, nothing)
+@test Base.return_types(f_splat, (Itr41839_1,)) == Any[Tuple{}]
+struct Itr41839_2 end # empty or failing
+Base.iterate(::Itr41839_2) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_2, ::Nothing) = error()
+@test Base.return_types(f_splat, (Itr41839_2,)) == Any[Tuple{}]
+struct Itr41839_3 end
+Base.iterate(::Itr41839_3 ) = rand(Bool) ? nothing : (nothing, 1)
+Base.iterate(::Itr41839_3 , i) = i < 16 ? (i, i + 1) : nothing
+@test only(Base.return_types(f_splat, (Itr41839_3,))) <: Tuple{Vararg{Union{Nothing, Int}}}
 
 # issue #32699
 f32699(a) = (id = a[1],).id
@@ -3008,14 +3223,14 @@ end
 # Some very limited testing of timing the type inference (#37749).
 @testset "Core.Compiler.Timings" begin
     # Functions that call each other
-    @eval module M
+    @eval module M1
         i(x) = x+5
         i2(x) = x+2
         h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
         g(y::Integer, x) = h(Any[y]) + Int(x)
     end
     timing1 = time_inference() do
-        @eval M.g(2, 3.0)
+        @eval M1.g(2, 3.0)
     end
     @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
     # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
@@ -3027,11 +3242,11 @@ end
     @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
     # Test that inference has cached some of the Method Instances
     timing2 = time_inference() do
-        @eval M.g(2, 3.0)
+        @eval M1.g(2, 3.0)
     end
     @test length(flatten_times(timing2)) < length(flatten_times(timing1))
     # Printing of InferenceFrameInfo for mi.def isa Module
-    @eval module M
+    @eval module M2
         i(x) = x+5
         i2(x) = x+2
         h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
@@ -3041,7 +3256,7 @@ end
     timingmod = time_inference() do
         @eval @testset "Outer" begin
             @testset "Inner" begin
-                for i = 1:2 M.g(2, 3.0) end
+                for i = 1:2 M2.g(2, 3.0) end
             end
         end
     end
@@ -3128,9 +3343,9 @@ g38888() = S38888(Base.inferencebarrier(3), nothing)
 f_inf_error_bottom(x::Vector) = isempty(x) ? error(x[1]) : x
 @test Core.Compiler.return_type(f_inf_error_bottom, Tuple{Vector{Any}}) == Vector{Any}
 
-# @aggressive_constprop
+# @constprop :aggressive
 @noinline g_nonaggressive(y, x) = Val{x}()
-@noinline @Base.aggressive_constprop g_aggressive(y, x) = Val{x}()
+@noinline Base.@constprop :aggressive g_aggressive(y, x) = Val{x}()
 
 f_nonaggressive(x) = g_nonaggressive(x, 1)
 f_aggressive(x) = g_aggressive(x, 1)
@@ -3140,6 +3355,12 @@ f_aggressive(x) = g_aggressive(x, 1)
 @test Base.return_types(f_nonaggressive, Tuple{Int})[1] == Val
 @test Base.return_types(f_aggressive, Tuple{Int})[1] == Val{1}
 
+# @constprop :none
+@noinline Base.@constprop :none g_noaggressive(flag::Bool) = flag ? 1 : 1.0
+ftrue_noaggressive() = g_noaggressive(true)
+@test only(Base.return_types(ftrue_noaggressive, Tuple{})) == Union{Int,Float64}
+
+
 function splat_lotta_unions()
     a = Union{Tuple{Int},Tuple{String,Vararg{Int}},Tuple{Int,Vararg{Int}}}[(2,)][1]
     b = Union{Int8,Int16,Int32,Int64,Int128}[1][1]
@@ -3340,11 +3561,10 @@ let
     ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
     sv = Core.Compiler.OptimizationState(mi, Core.Compiler.OptimizationParams(),
         Core.Compiler.NativeInterpreter())
-    ir = Core.Compiler.convert_to_ircode(ci, Core.Compiler.copy_exprargs(ci.code),
-        false, 2, sv)
-    ir = Core.Compiler.slot2reg(ir, ci, 2, sv)
+    ir = Core.Compiler.convert_to_ircode(ci, sv)
+    ir = Core.Compiler.slot2reg(ir, ci, sv)
     ir = Core.Compiler.compact!(ir)
-    Core.Compiler.replace_code_newstyle!(ci, ir, 3)
+    Core.Compiler.replace_code_newstyle!(ci, ir, 4)
     ci.ssavaluetypes = length(ci.code)
     @test any(x->isa(x, Core.PhiNode), ci.code)
     oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, false, Any, Any,
@@ -3395,3 +3615,224 @@ end
         x.x
     end) == Any[Int]
 end
+
+@testset "fieldtype for unions" begin # e.g. issue #40177
+    f40177(::Type{T}) where {T} = fieldtype(T, 1)
+    for T in [
+        Union{Tuple{Val}, Tuple{Tuple}},
+        Union{Base.RefValue{T}, Type{Int32}} where T<:Real,
+        Union{Tuple{Vararg{Symbol}}, Tuple{Float64, Vararg{Float32}}},
+    ]
+        @test @inferred(f40177(T)) == fieldtype(T, 1)
+    end
+end
+
+# issue #41908
+f41908(x::Complex{T}) where {String<:T<:String} = 1
+g41908() = f41908(Any[1][1])
+@test only(Base.return_types(g41908, ())) <: Int
+
+# issue #42022
+let x = Tuple{Int,Any}[
+        #= 1=# (0, Expr(:(=), Core.SlotNumber(3), 1))
+        #= 2=# (0, Expr(:enter, 18))
+        #= 3=# (2, Expr(:(=), Core.SlotNumber(3), 2.0))
+        #= 4=# (2, Expr(:enter, 12))
+        #= 5=# (4, Expr(:(=), Core.SlotNumber(3), '3'))
+        #= 6=# (4, Core.GotoIfNot(Core.SlotNumber(2), 9))
+        #= 7=# (4, Expr(:leave, 2))
+        #= 8=# (0, Core.ReturnNode(1))
+        #= 9=# (4, Expr(:call, GlobalRef(Main, :throw)))
+        #=10=# (4, Expr(:leave, 1))
+        #=11=# (2, Core.GotoNode(16))
+        #=12=# (4, Expr(:leave, 1))
+        #=13=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
+        #=14=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
+        #=15=# (2, Expr(:pop_exception, Core.SSAValue(4)))
+        #=16=# (2, Expr(:leave, 1))
+        #=17=# (0, Core.GotoNode(22))
+        #=18=# (2, Expr(:leave, 1))
+        #=19=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
+        #=20=# (0, nothing)
+        #=21=# (0, Expr(:pop_exception, Core.SSAValue(2)))
+        #=22=# (0, Core.ReturnNode(Core.SlotNumber(3)))
+    ]
+    handler_at = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
+    @test handler_at == first.(x)
+end
+
+@test only(Base.return_types((Bool,)) do y
+        x = 1
+        try
+            x = 2.0
+            try
+                x = '3'
+                y ? (return 1) : throw()
+            catch ex1
+                rethrow()
+            end
+        catch ex2
+            nothing
+        end
+        return x
+    end) === Union{Int, Float64, Char}
+
+# issue #42097
+struct Foo42097{F} end
+Foo42097(f::F, args) where {F} = Foo42097{F}()
+Foo42097(A) = Foo42097(Base.inferencebarrier(+), Base.inferencebarrier(1)...)
+foo42097() = Foo42097([1]...)
+@test foo42097() isa Foo42097{typeof(+)}
+
+# eliminate unbound `TypeVar`s on `argtypes` construction
+let
+    a0(a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, va...) = nothing
+    method = only(methods(a0))
+    unbound = TypeVar(:Unbound, Integer)
+    specTypes = Tuple{typeof(a0),
+               # TypeVar
+        #=01=# Bound,                  # => Integer
+        #=02=# unbound,                # => Integer (invalid `TypeVar` widened beforehand)
+               # DataType
+        #=03=# Type{Bound},            # => Type{Bound} where Bound<:Integer
+        #=04=# Type{unbound},          # => Type
+        #=05=# Vector{Bound},          # => Vector{Bound} where Bound<:Integer
+        #=06=# Vector{unbound},        # => Any
+               # UnionAll
+        #=07=# Type{<:Bound},          # => Type{<:Bound} where Bound<:Integer
+        #=08=# Type{<:unbound},        # => Any
+               # Union
+        #=09=# Union{Nothing,Bound},   # => Union{Nothing,Bound} where Bound<:Integer
+        #=10=# Union{Nothing,unbound}, # => Any
+               # Vararg
+        #=va=# Bound, unbound,         # => Tuple{Integer,Integer} (invalid `TypeVar` widened beforehand)
+        } where Bound<:Integer
+    argtypes = Core.Compiler.most_general_argtypes(method, specTypes, true)
+    popfirst!(argtypes)
+    @test argtypes[1] == Integer
+    @test argtypes[2] == Integer
+    @test argtypes[3] == Type{Bound} where Bound<:Integer
+    @test argtypes[4] == Type
+    @test argtypes[5] == Vector{Bound} where Bound<:Integer
+    @test argtypes[6] == Any
+    @test argtypes[7] == Type{<:Bound} where Bound<:Integer
+    @test argtypes[8] == Any
+    @test argtypes[9] == Union{Nothing,Bound} where Bound<:Integer
+    @test argtypes[10] == Any
+    @test argtypes[11] == Tuple{Integer,Integer}
+end
+
+# make sure not to call `widenconst` on `TypeofVararg` objects
+@testset "unhandled Vararg" begin
+    struct UnhandledVarargCond
+        val::Bool
+    end
+    function Base.:+(a::UnhandledVarargCond, xs...)
+        if a.val
+            return nothing
+        else
+            s = 0
+            for x in xs
+                s += x
+            end
+            return s
+        end
+    end
+    @test Base.return_types((Vector{Int},)) do xs
+        +(UnhandledVarargCond(false), xs...)
+    end |> only === Int
+
+    @test (Base.return_types((Vector{Any},)) do xs
+        Core.kwfunc(xs...)
+    end; true)
+
+    @test Base.return_types((Vector{Vector{Int}},)) do xs
+        Tuple(xs...)
+    end |> only === Tuple{Vararg{Int}}
+end
+
+# issue #42646
+@test only(Base.return_types(getindex, (Array{undef}, Int))) >: Union{} # check that it does not throw
+
+# form PartialStruct for extra type information propagation
+struct FieldTypeRefinement{S,T}
+    s::S
+    t::T
+end
+@test Base.return_types((Int,)) do s
+    o = FieldTypeRefinement{Any,Int}(s, s)
+    o.s
+end |> only == Int
+@test Base.return_types((Int,)) do s
+    o = FieldTypeRefinement{Int,Any}(s, s)
+    o.t
+end |> only == Int
+@test Base.return_types((Int,)) do s
+    o = FieldTypeRefinement{Any,Any}(s, s)
+    o.s, o.t
+end |> only == Tuple{Int,Int}
+@test Base.return_types((Int,)) do a
+    s1 = Some{Any}(a)
+    s2 = Some{Any}(s1)
+    s2.value.value
+end |> only == Int
+
+# issue #42986
+@testset "narrow down `Union` using `isdefined` checks" begin
+    # basic functionality
+    @test Base.return_types((Union{Nothing,Core.CodeInstance},)) do x
+        if isdefined(x, :inferred)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Core.CodeInstance
+
+    @test Base.return_types((Union{Nothing,Core.CodeInstance},)) do x
+        if isdefined(x, :not_exist)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Union{}
+
+    # even when isdefined is malformed, we can filter out types with no fields
+    @test Base.return_types((Union{Nothing, Core.CodeInstance},)) do x
+        if isdefined(x, 5)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Core.CodeInstance
+
+    struct UnionNarrowingByIsdefinedA; x; end
+    struct UnionNarrowingByIsdefinedB; x; end
+    struct UnionNarrowingByIsdefinedC; x; end
+
+    # > 2 types in the union
+    @test  Base.return_types((Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB, UnionNarrowingByIsdefinedC},)) do x
+        if isdefined(x, :x)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB, UnionNarrowingByIsdefinedC}
+
+    # > 2 types in the union and some aren't defined
+    @test  Base.return_types((Union{UnionNarrowingByIsdefinedA, Core.CodeInstance, UnionNarrowingByIsdefinedC},)) do x
+        if isdefined(x, :x)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedC}
+
+    # should respect `Const` information still
+    @test Base.return_types((Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB},)) do x
+        if isdefined(x, :x)
+            return x
+        else
+            return nothing # dead branch
+        end
+    end |> only === Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB}
+end
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 00797304ce5c0..83780ca8b1ac5 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -172,8 +172,7 @@ function f_ifelse(x)
     b = ifelse(a, true, false)
     return b ? x + 1 : x
 end
-# 2 for now because the compiler leaves a GotoNode around
-@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
+@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
 
 # Test that inlining of _apply_iterate properly hits the inference cache
 @noinline cprop_inline_foo1() = (1, 1)
@@ -381,19 +380,29 @@ using Base.Experimental: @opaque
 f_oc_getfield(x) = (@opaque ()->x)()
 @test fully_eliminated(f_oc_getfield, Tuple{Int})
 
-# check if `x` is a statically-resolved call of a function whose name is `sym`
-isinvoke(@nospecialize(x), sym::Symbol) = isinvoke(x, mi->mi.def.name===sym)
-function isinvoke(@nospecialize(x), pred)
-    if Meta.isexpr(x, :invoke)
-        return pred(x.args[1]::Core.MethodInstance)
+import Core.Compiler: argextype, singleton_type
+const EMPTY_SPTYPES = Any[]
+
+code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::Core.CodeInfo
+get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
+
+# check if `x` is a dynamic call of a given function
+iscall(y) = @nospecialize(x) -> iscall(y, x)
+function iscall((src, f)::Tuple{Core.CodeInfo,Function}, @nospecialize(x))
+    return iscall(x) do @nospecialize x
+        singleton_type(argextype(x, src, EMPTY_SPTYPES)) === f
     end
-    return false
 end
-code_typed1(args...; kwargs...) = (first∘first)(code_typed(args...; kwargs...))::Core.CodeInfo
+iscall(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1])
+
+# check if `x` is a statically-resolved call of a function whose name is `sym`
+isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
+isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
+isinvoke(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :invoke) && pred(x.args[1]::Core.MethodInstance)
 
 @testset "@inline/@noinline annotation before definition" begin
-    m = Module()
-    @eval m begin
+    M = Module()
+    @eval M begin
         @inline function _def_inline(x)
             # this call won't be resolved and thus will prevent inlining to happen if we don't
             # annotate `@inline` at the top of this function body
@@ -404,42 +413,34 @@ code_typed1(args...; kwargs...) = (first∘first)(code_typed(args...; kwargs...)
         def_noinline(x) = _def_noinline(x)
 
         # test that they don't conflict with other "before-definition" macros
-        @inline Base.@aggressive_constprop function _def_inline_noconflict(x)
+        @inline Base.@constprop :aggressive function _def_inline_noconflict(x)
             # this call won't be resolved and thus will prevent inlining to happen if we don't
             # annotate `@inline` at the top of this function body
             return unresolved_call(x)
         end
         def_inline_noconflict(x) = _def_inline_noconflict(x)
-        @noinline Base.@aggressive_constprop _def_noinline_noconflict(x) = x # obviously will be inlined otherwise
+        @noinline Base.@constprop :aggressive _def_noinline_noconflict(x) = x # obviously will be inlined otherwise
         def_noinline_noconflict(x) = _def_noinline_noconflict(x)
     end
 
-    let ci = code_typed1(m.def_inline, (Int,))
-        @test all(ci.code) do x
-            !isinvoke(x, :_def_inline)
-        end
+    let code = get_code(M.def_inline, (Int,))
+        @test all(!isinvoke(:_def_inline), code)
     end
-    let ci = code_typed1(m.def_noinline, (Int,))
-        @test any(ci.code) do x
-            isinvoke(x, :_def_noinline)
-        end
+    let code = get_code(M.def_noinline, (Int,))
+        @test any(isinvoke(:_def_noinline), code)
     end
     # test that they don't conflict with other "before-definition" macros
-    let ci = code_typed1(m.def_inline_noconflict, (Int,))
-        @test all(ci.code) do x
-            !isinvoke(x, :_def_inline_noconflict)
-        end
+    let code = get_code(M.def_inline_noconflict, (Int,))
+        @test all(!isinvoke(:_def_inline_noconflict), code)
     end
-    let ci = code_typed1(m.def_noinline_noconflict, (Int,))
-        @test any(ci.code) do x
-            isinvoke(x, :_def_noinline_noconflict)
-        end
+    let code = get_code(M.def_noinline_noconflict, (Int,))
+        @test any(isinvoke(:_def_noinline_noconflict), code)
     end
 end
 
 @testset "@inline/@noinline annotation within a function body" begin
-    m = Module()
-    @eval m begin
+    M = Module()
+    @eval M begin
         function _body_inline(x)
             @inline
             # this call won't be resolved and thus will prevent inlining to happen if we don't
@@ -471,33 +472,391 @@ end
         end
     end
 
-    let ci = code_typed1(m.body_inline, (Int,))
-        @test all(ci.code) do x
-            !isinvoke(x, :_body_inline)
-        end
+    let code = get_code(M.body_inline, (Int,))
+        @test all(!isinvoke(:_body_inline), code)
     end
-    let ci = code_typed1(m.body_noinline, (Int,))
-        @test any(ci.code) do x
-            isinvoke(x, :_body_noinline)
-        end
+    let code = get_code(M.body_noinline, (Int,))
+        @test any(isinvoke(:_body_noinline), code)
     end
     # test annotations for `do` blocks
-    let ci = code_typed1(m.do_inline, (Int,))
+    let code = get_code(M.do_inline, (Int,))
         # what we test here is that both `simple_caller` and the anonymous function that the
         # `do` block creates should inlined away, and as a result there is only the unresolved call
-        @test all(ci.code) do x
-            !isinvoke(x, :simple_caller) &&
-            !isinvoke(x, mi->startswith(string(mi.def.name), '#'))
+        @test all(code) do @nospecialize x
+            !isinvoke(:simple_caller, x) &&
+            !isinvoke(x) do mi
+                startswith(string(mi.def.name), '#')
+            end
         end
     end
-    let ci = code_typed1(m.do_noinline, (Int,))
+    let code = get_code(M.do_noinline, (Int,))
         # the anonymous function that the `do` block created shouldn't be inlined here
-        @test any(ci.code) do x
-            isinvoke(x, mi->startswith(string(mi.def.name), '#'))
+        @test any(code) do @nospecialize x
+            isinvoke(x) do mi
+                startswith(string(mi.def.name), '#')
+            end
+        end
+    end
+end
+
+@testset "callsite @inline/@noinline annotations" begin
+    M = Module()
+    @eval M begin
+        # this global variable prevents inference to fold everything as constant, and/or the optimizer to inline the call accessing to this
+        g = 0
+
+        @noinline noinlined_explicit(x) = x
+        force_inline_explicit(x)        = @inline noinlined_explicit(x)
+        force_inline_block_explicit(x)  = @inline noinlined_explicit(x) + noinlined_explicit(x)
+        noinlined_implicit(x)          = g
+        force_inline_implicit(x)       = @inline noinlined_implicit(x)
+        force_inline_block_implicit(x) = @inline noinlined_implicit(x) + noinlined_implicit(x)
+
+        @inline inlined_explicit(x)      = x
+        force_noinline_explicit(x)       = @noinline inlined_explicit(x)
+        force_noinline_block_explicit(x) = @noinline inlined_explicit(x) + inlined_explicit(x)
+        inlined_implicit(x)              = x
+        force_noinline_implicit(x)       = @noinline inlined_implicit(x)
+        force_noinline_block_implicit(x) = @noinline inlined_implicit(x) + inlined_implicit(x)
+
+        # test callsite annotations for constant-prop'ed calls
+
+        @noinline Base.@constprop :aggressive noinlined_constprop_explicit(a) = a+g
+        force_inline_constprop_explicit() = @inline noinlined_constprop_explicit(0)
+        Base.@constprop :aggressive noinlined_constprop_implicit(a) = a+g
+        force_inline_constprop_implicit() = @inline noinlined_constprop_implicit(0)
+
+        @inline Base.@constprop :aggressive inlined_constprop_explicit(a) = a+g
+        force_noinline_constprop_explicit() = @noinline inlined_constprop_explicit(0)
+        @inline Base.@constprop :aggressive inlined_constprop_implicit(a) = a+g
+        force_noinline_constprop_implicit() = @noinline inlined_constprop_implicit(0)
+
+        @noinline notinlined(a) = a
+        function nested(a0, b0)
+            @noinline begin
+                a = @inline notinlined(a0) # this call should be inlined
+                b = notinlined(b0) # this call should NOT be inlined
+                return a, b
+            end
+        end
+    end
+
+    let code = get_code(M.force_inline_explicit, (Int,))
+        @test all(!isinvoke(:noinlined_explicit), code)
+    end
+    let code = get_code(M.force_inline_block_explicit, (Int,))
+        @test all(code) do @nospecialize x
+            !isinvoke(:noinlined_explicit, x) &&
+            !isinvoke(:(+), x)
+        end
+    end
+    let code = get_code(M.force_inline_implicit, (Int,))
+        @test all(!isinvoke(:noinlined_implicit), code)
+    end
+    let code = get_code(M.force_inline_block_implicit, (Int,))
+        @test all(!isinvoke(:noinlined_explicit), code)
+    end
+
+    let code = get_code(M.force_noinline_explicit, (Int,))
+        @test any(isinvoke(:inlined_explicit), code)
+    end
+    let code = get_code(M.force_noinline_block_explicit, (Int,))
+        @test count(isinvoke(:inlined_explicit), code) == 2
+    end
+    let code = get_code(M.force_noinline_implicit, (Int,))
+        @test any(isinvoke(:inlined_implicit), code)
+    end
+    let code = get_code(M.force_noinline_block_implicit, (Int,))
+        @test count(isinvoke(:inlined_implicit), code) == 2
+    end
+
+    let code = get_code(M.force_inline_constprop_explicit)
+        @test all(!isinvoke(:noinlined_constprop_explicit), code)
+    end
+    let code = get_code(M.force_inline_constprop_implicit)
+        @test all(!isinvoke(:noinlined_constprop_implicit), code)
+    end
+
+    let code = get_code(M.force_noinline_constprop_explicit)
+        @test any(isinvoke(:inlined_constprop_explicit), code)
+    end
+    let code = get_code(M.force_noinline_constprop_implicit)
+        @test any(isinvoke(:inlined_constprop_implicit), code)
+    end
+
+    let code = get_code(M.nested, (Int,Int))
+        @test count(isinvoke(:notinlined), code) == 1
+    end
+end
+
+# force constant-prop' for `setproperty!`
+# https://github.com/JuliaLang/julia/pull/41882
+let code = @eval Module() begin
+        # if we don't force constant-prop', `T = fieldtype(Foo, ::Symbol)` will be union-split to
+        # `Union{Type{Any},Type{Int}` and it will make `convert(T, nothing)` too costly
+        # and it leads to inlining failure
+        mutable struct Foo
+            val
+            _::Int
         end
+
+        function setter(xs)
+            for x in xs
+                x.val = nothing
+            end
+        end
+
+        $get_code(setter, (Vector{Foo},))
     end
+
+    @test !any(isinvoke(:setproperty!), code)
 end
 
 # Issue #41299 - inlining deletes error check in :>
 g41299(f::Tf, args::Vararg{Any,N}) where {Tf,N} = f(args...)
 @test_throws TypeError g41299(>:, 1, 2)
+
+# https://github.com/JuliaLang/julia/issues/42078
+# idempotency of callsite inling
+function getcache(mi::Core.MethodInstance)
+    cache = Core.Compiler.code_cache(Core.Compiler.NativeInterpreter())
+    codeinf = Core.Compiler.get(cache, mi, nothing)
+    return isnothing(codeinf) ? nothing : codeinf
+end
+@noinline f42078(a) = sum(sincos(a))
+let
+    ninlined = let
+        code = get_code((Int,)) do a
+            @inline f42078(a)
+        end
+        @test all(!isinvoke(:f42078), code)
+        length(code)
+    end
+
+    let # codegen will discard the source because it's not supposed to be inlined in general context
+        a = 42
+        f42078(a)
+    end
+    let # make sure to discard the inferred source
+        specs = collect(only(methods(f42078)).specializations)
+        mi = specs[findfirst(!isnothing, specs)]::Core.MethodInstance
+        codeinf = getcache(mi)::Core.CodeInstance
+        codeinf.inferred = nothing
+    end
+
+    let # inference should re-infer `f42078(::Int)` and we should get the same code
+        code = get_code((Int,)) do a
+            @inline f42078(a)
+        end
+        @test all(!isinvoke(:f42078), code)
+        @test ninlined == length(code)
+    end
+end
+
+begin
+    # more idempotency of callsite inling
+    # -----------------------------------
+    # this test case requires forced constant propagation for callsite inlined function call,
+    # particularly, in the following example, the inlinear will look up `+ₚ(::Point, ::Const(Point(2.25, 4.75)))`
+    # and the callsite inlining needs the corresponding constant result to exist in the local cache
+
+    struct Point
+        x::Float64
+        y::Float64
+    end
+    @noinline a::Point +ₚ b::Point = Point(a.x + b.x, a.y + b.y)
+
+    function compute(n)
+        a = Point(1.5, 2.5)
+        b = Point(2.25, 4.75)
+        for i in 0:(n-1)
+            a = @inline (a +ₚ b) +ₚ b
+        end
+        return a.x, a.y
+    end
+    let src = code_typed1(compute, (Int,))
+        @test count(isinvoke(:+ₚ), src.code) == 0 # successful inlining
+    end
+
+    function compute(n)
+        a = Point(1.5, 2.5)
+        b = Point(2.25, 4.75)
+        for i in 0:(n-1)
+            a = (a +ₚ b) +ₚ b
+        end
+        return a.x, a.y
+    end
+    let src = code_typed1(compute, (Int,))
+        @test count(isinvoke(:+ₚ), src.code) == 2 # no inlining
+    end
+
+    compute(42) # this execution should discard the cache of `+ₚ` since it's declared as `@noinline`
+
+    function compute(n)
+        a = Point(1.5, 2.5)
+        b = Point(2.25, 4.75)
+        for i in 0:(n-1)
+            @inline a = (a +ₚ b) +ₚ b
+        end
+        return a.x, a.y
+    end
+    let src = code_typed1(compute, (Int,))
+        @test count(isinvoke(:+ₚ), src.code) == 0 # no inlining !?
+    end
+end
+
+# https://github.com/JuliaLang/julia/issues/42246
+@test mktempdir() do dir
+    cd(dir) do
+        code = quote
+            issue42246() = @noinline IOBuffer("a")
+            let
+                ci, rt = only(code_typed(issue42246))
+                if any(ci.code) do stmt
+                       Meta.isexpr(stmt, :invoke) &&
+                       stmt.args[1].def.name === nameof(IOBuffer)
+                   end
+                    exit(0)
+                else
+                    exit(1)
+               end
+            end
+        end |> string
+        cmd = `$(Base.julia_cmd()) --code-coverage=tmp.info -e $code`
+        success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr))
+    end
+end
+
+# Issue #42264 - crash on certain union splits
+let f(x) = (x...,)
+    # Test splatting with a Union of non-{Tuple, SimpleVector} types that require creating new `iterate` calls
+    # in inlining. For this particular case, we're relying on `iterate(::CaretesianIndex)` throwing an error, such
+    # the the original apply call is not union-split, but the inserted `iterate` call is.
+    @test code_typed(f, Tuple{Union{Int64, CartesianIndex{1}, CartesianIndex{3}}})[1][2] == Tuple{Int64}
+end
+
+# https://github.com/JuliaLang/julia/issues/42754
+# inline union-split constant-prop'ed results
+mutable struct X42754
+    # NOTE in order to confuse `fieldtype_tfunc`, we need to have at least two fields with different types
+    a::Union{Nothing, Int}
+    b::Symbol
+end
+let src = code_typed1((X42754, Union{Nothing,Int})) do x, a
+        # this `setproperty` call would be union-split and constant-prop will happen for
+        # each signature: inlining would fail if we don't use constant-prop'ed source
+        # since the approximate inlining cost of `convert(fieldtype(X, sym), a)` would
+        # end up very high if we don't propagate `sym::Const(:a)`
+        x.a = a
+        x
+    end
+    @test all(src.code) do @nospecialize x
+        !(isinvoke(:setproperty!, x) || iscall((src, setproperty!), x))
+    end
+end
+
+import Base: @constprop
+
+# test union-split callsite with successful and unsuccessful constant-prop' results
+@constprop :aggressive @inline f42840(xs, a::Int) = xs[a]             # should be successful, and inlined
+@constprop :none @noinline f42840(xs::AbstractVector, a::Int) = xs[a] # should be unsuccessful, but still statically resolved
+let src = code_typed((Union{Tuple{Int,Int,Int}, Vector{Int}},)) do xs
+             f42840(xs, 2)
+         end |> only |> first
+    # `(xs::Tuple{Int,Int,Int})[a::Const(2)]` => `getfield(xs, 2)`
+    @test count(iscall((src, getfield)), src.code) == 1
+    @test count(isinvoke(:f42840), src.code) == 1
+end
+# a bit weird, but should handle this kind of case as well
+@constprop :aggressive @noinline g42840(xs, a::Int) = xs[a]         # should be successful, but only statically resolved
+@constprop :none @inline g42840(xs::AbstractVector, a::Int) = xs[a] # should be unsuccessful, still inlined
+let src = code_typed((Union{Tuple{Int,Int,Int}, Vector{Int}},)) do xs
+        g42840(xs, 2)
+    end |> only |> first
+    # `(xs::Vector{Int})[a::Const(2)]` => `Base.arrayref(true, xs, 2)`
+    @test count(iscall((src, Base.arrayref)), src.code) == 1
+    @test count(isinvoke(:g42840), src.code) == 1
+end
+
+# test single, non-dispatchtuple callsite inlining
+
+@constprop :none @inline test_single_nondispatchtuple(@nospecialize(t)) =
+    isa(t, DataType) && t.name === Type.body.name
+let
+    src = code_typed1((Any,)) do x
+        test_single_nondispatchtuple(x)
+    end
+    @test all(src.code) do @nospecialize x
+        !(isinvoke(:test_single_nondispatchtuple, x) || iscall((src, test_single_nondispatchtuple), x))
+    end
+end
+
+@constprop :aggressive @inline test_single_nondispatchtuple(c, @nospecialize(t)) =
+    c && isa(t, DataType) && t.name === Type.body.name
+let
+    src = code_typed1((Any,)) do x
+        test_single_nondispatchtuple(true, x)
+    end
+    @test all(src.code) do @nospecialize(x)
+        !(isinvoke(:test_single_nondispatchtuple, x) || iscall((src, test_single_nondispatchtuple), x))
+    end
+end
+
+# validate inlining processing
+
+@constprop :none @inline validate_unionsplit_inlining(@nospecialize(t)) = throw("invalid inlining processing detected")
+@constprop :none @noinline validate_unionsplit_inlining(i::Integer) = (println(IOBuffer(), "prevent inlining"); false)
+let
+    invoke(xs) = validate_unionsplit_inlining(xs[1])
+    @test invoke(Any[10]) === false
+end
+
+@constprop :aggressive @inline validate_unionsplit_inlining(c, @nospecialize(t)) = c && throw("invalid inlining processing detected")
+@constprop :aggressive @noinline validate_unionsplit_inlining(c, i::Integer) = c && (println(IOBuffer(), "prevent inlining"); false)
+let
+    invoke(xs) = validate_unionsplit_inlining(true, xs[1])
+    @test invoke(Any[10]) === false
+end
+
+# issue 43104
+
+@inline isGoodType(@nospecialize x::Type) =
+    x !== Any && !(@noinline Base.has_free_typevars(x))
+let # aggressive inlining of single, abstract method match
+    src = code_typed((Type, Any,)) do x, y
+        isGoodType(x), isGoodType(y)
+    end |> only |> first
+    # both callsites should be inlined
+    @test count(isinvoke(:has_free_typevars), src.code) == 2
+    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
+    @test count(iscall((src,isGoodType)), src.code) == 1
+end
+
+@inline isGoodType2(cnd, @nospecialize x::Type) =
+    x !== Any && !(@noinline (cnd ? Core.Compiler.isType : Base.has_free_typevars)(x))
+let # aggressive inlining of single, abstract method match (with constant-prop'ed)
+    src = code_typed((Type, Any,)) do x, y
+        isGoodType2(true, x), isGoodType2(true, y)
+    end |> only |> first
+    # both callsite should be inlined with constant-prop'ed result
+    @test count(isinvoke(:isType), src.code) == 2
+    @test count(isinvoke(:has_free_typevars), src.code) == 0
+    # `isGoodType(y::Any)` isn't fully convered, thus a runtime type check and fallback dynamic dispatch should be inserted
+    @test count(iscall((src,isGoodType2)), src.code) == 1
+end
+
+@noinline function checkBadType!(@nospecialize x::Type)
+    if x === Any || Base.has_free_typevars(x)
+        println(x)
+    end
+    return nothing
+end
+let # aggressive static dispatch of single, abstract method match
+    src = code_typed((Type, Any,)) do x, y
+        checkBadType!(x), checkBadType!(y)
+    end |> only |> first
+    # both callsites should be resolved statically
+    @test count(isinvoke(:checkBadType!), src.code) == 2
+    # `checkBadType!(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
+    @test count(iscall((src,checkBadType!)), src.code) == 1
+end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 3be15ef9cc317..dbffa41edc7ae 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -69,6 +69,233 @@ end
 
 # Tests for SROA
 
+import Core.Compiler: argextype, singleton_type
+const EMPTY_SPTYPES = Any[]
+
+code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::Core.CodeInfo
+get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
+
+# check if `x` is a statement with a given `head`
+isnew(@nospecialize x) = Meta.isexpr(x, :new)
+
+# check if `x` is a dynamic call of a given function
+iscall(y) = @nospecialize(x) -> iscall(y, x)
+function iscall((src, f)::Tuple{Core.CodeInfo,Function}, @nospecialize(x))
+    return iscall(x) do @nospecialize x
+        singleton_type(argextype(x, src, EMPTY_SPTYPES)) === f
+    end
+end
+iscall(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1])
+
+struct ImmutableXYZ; x; y; z; end
+mutable struct MutableXYZ; x; y; z; end
+
+# should optimize away very basic cases
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        xyz.x, xyz.y, xyz.z
+    end
+    @test !any(isnew, src.code)
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        xyz.x, xyz.y, xyz.z
+    end
+    @test !any(isnew, src.code)
+end
+
+# should handle simple mutabilities
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        xyz.y = 42
+        xyz.x, xyz.y, xyz.z
+    end
+    @test !any(isnew, src.code)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), 42, #=x=# Core.Argument(4)]
+    end
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        xyz.x, xyz.z = xyz.z, xyz.x
+        xyz.x, xyz.y, xyz.z
+    end
+    @test !any(isnew, src.code)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=z=# Core.Argument(4), #=y=# Core.Argument(3), #=x=# Core.Argument(2)]
+    end
+end
+# circumvent uninitialized fields as far as there is a solid `setfield!` definition
+let src = code_typed1() do
+        r = Ref{Any}()
+        r[] = 42
+        return r[]
+    end
+    @test !any(isnew, src.code)
+end
+let src = code_typed1((Bool,)) do cond
+        r = Ref{Any}()
+        if cond
+            r[] = 42
+            return r[]
+        else
+            r[] = 32
+            return r[]
+        end
+    end
+    @test !any(isnew, src.code)
+end
+let src = code_typed1((Bool,)) do cond
+        r = Ref{Any}()
+        if cond
+            r[] = 42
+        else
+            r[] = 32
+        end
+        return r[]
+    end
+    @test !any(isnew, src.code)
+end
+let src = code_typed1((Bool,Bool,Any,Any,Any)) do c1, c2, x, y, z
+        r = Ref{Any}()
+        if c1
+            if c2
+                r[] = x
+            else
+                r[] = y
+            end
+        else
+            r[] = z
+        end
+        return r[]
+    end
+    @test !any(isnew, src.code)
+end
+let src = code_typed1((Bool,)) do cond
+        r = Ref{Any}()
+        if cond
+            r[] = 42
+        end
+        return r[]
+    end
+    # N.B. `r` should be allocated since `cond` might be `false` and then it will be thrown
+    @test any(isnew, src.code)
+end
+let src = code_typed1((Bool,Bool,Any,Any)) do c1, c2, x, y
+        r = Ref{Any}()
+        if c1
+            if c2
+                r[] = x
+            end
+        else
+            r[] = y
+        end
+        return r[]
+    end
+    # N.B. `r` should be allocated since `c2` might be `false` and then it will be thrown
+    @test any(isnew, src.code)
+end
+
+# should include a simple alias analysis
+struct ImmutableOuter{T}; x::T; y::T; z::T; end
+mutable struct MutableOuter{T}; x::T; y::T; z::T; end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        outer = ImmutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test !any(src.code) do @nospecialize x
+        Meta.isexpr(x, :new)
+    end
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
+    end
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        # #42831 forms ::PartialStruct(ImmutableOuter{Any}, Any[ImmutableXYZ, ImmutableXYZ, ImmutableXYZ])
+        # so the succeeding `getproperty`s are type stable and inlined
+        outer = ImmutableOuter{Any}(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test !any(isnew, src.code)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
+    end
+end
+
+# FIXME our analysis isn't yet so powerful at this moment: may be unable to handle nested objects well
+# OK: mutable(immutable(...)) case
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        t   = (xyz,)
+        v = t[1].x
+        v, v, v
+    end
+    @test !any(isnew, src.code)
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        outer = ImmutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test !any(isnew, src.code)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
+    end
+end
+let # this is a simple end to end test case, which demonstrates allocation elimination
+    # by handling `mutable[RefValue{String}](immutable[Tuple](...))` case correctly
+    # NOTE this test case isn't so robust and might be subject to future changes of the broadcasting implementation,
+    # in that case you don't really need to stick to keeping this test case around
+    simple_sroa(s) = broadcast(identity, Ref(s))
+    s = Base.inferencebarrier("julia")::String
+    simple_sroa(s)
+    # NOTE don't hard-code `"julia"` in `@allocated` clause and make sure to execute the
+    # compiled code for `simple_sroa`, otherwise everything can be folded even without SROA
+    @test @allocated(simple_sroa(s)) == 0
+end
+# FIXME: immutable(mutable(...)) case
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        outer = MutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test_broken !any(isnew, src.code)
+end
+# FIXME: mutable(mutable(...)) case
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        outer = MutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test_broken !any(isnew, src.code)
+end
+
+# should work nicely with inlining to optimize away a complicated case
+# adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B
+struct Point
+    x::Float64
+    y::Float64
+end
+#=@inline=# add(a::Point, b::Point) = Point(a.x + b.x, a.y + b.y)
+function compute()
+    a = Point(1.5, 2.5)
+    b = Point(2.25, 4.75)
+    for i in 0:(100000000-1)
+        a = add(add(a, b), b)
+    end
+    a.x, a.y
+end
+let src = code_typed1(compute)
+    @test !any(isnew, src.code)
+end
+
 mutable struct Foo30594; x::Float64; end
 Base.copy(x::Foo30594) = Foo30594(x.x)
 function add!(p::Foo30594, off::Foo30594)
@@ -128,7 +355,7 @@ let nt = (a=1, b=2)
     @test_throws ArgumentError blah31139(nt)
 end
 
-# Expr(:new) annoted as PartialStruct
+# Expr(:new) annotated as PartialStruct
 struct FooPartial
     x
     y
@@ -180,7 +407,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any])
     @test Core.Compiler.verify_ir(ir) === nothing
-    ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir)
+    ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
     @test Core.Compiler.verify_ir(ir) === nothing
 end
 
@@ -383,3 +610,65 @@ exc39508 = ErrorException("expected")
     return err
 end
 @test test39508() === exc39508
+
+let # `sroa_pass!` should work with constant globals
+    # immutable pass
+    src = @eval Module() begin
+        const REF_FLD = :x
+        struct ImmutableRef{T}
+            x::T
+        end
+
+        code_typed((Int,)) do x
+            r = ImmutableRef{Int}(x) # should be eliminated
+            x = getfield(r, REF_FLD) # should be eliminated
+            return sin(x)
+        end |> only |> first
+    end
+    @test !any(src.code) do @nospecialize(stmt)
+        Meta.isexpr(stmt, :call) || return false
+        ft = Core.Compiler.argextype(stmt.args[1], src, EMPTY_SPTYPES)
+        return Core.Compiler.widenconst(ft) == typeof(getfield)
+    end
+    @test !any(src.code) do @nospecialize(stmt)
+        return Meta.isexpr(stmt, :new)
+    end
+
+    # mutable pass
+    src = @eval Module() begin
+        const REF_FLD = :x
+        code_typed() do
+            r = Ref{Int}(42) # should be eliminated
+            x = getfield(r, REF_FLD) # should be eliminated
+            return sin(x)
+        end |> only |> first
+    end
+    @test !any(src.code) do @nospecialize(stmt)
+        Meta.isexpr(stmt, :call) || return false
+        ft = Core.Compiler.argextype(stmt.args[1], src, EMPTY_SPTYPES)
+        return Core.Compiler.widenconst(ft) == typeof(getfield)
+    end
+    @test !any(src.code) do @nospecialize(stmt)
+        return Meta.isexpr(stmt, :new)
+    end
+end
+
+let
+    # `typeassert` elimination after SROA
+    # NOTE we can remove this optimization once inference is able to reason about memory-effects
+    src = @eval Module() begin
+        mutable struct Foo; x; end
+
+        code_typed((Int,)) do a
+            x1 = Foo(a)
+            x2 = Foo(x1)
+            return typeassert(x2.x, Foo).x
+        end |> only |> first
+    end
+    # eliminate `typeassert(x2.x, Foo)`
+    @test all(src.code) do @nospecialize stmt
+        Meta.isexpr(stmt, :call) || return true
+        ft = Core.Compiler.argextype(stmt.args[1], src, EMPTY_SPTYPES)
+        return Core.Compiler.widenconst(ft) !== typeof(typeassert)
+    end
+end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index f90bb71e291d0..ffb48a9de38e9 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -310,3 +310,27 @@ let cfg = CFG(BasicBlock[
     Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
     @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 end
+
+# Issue #41975 - SSA conversion drops type check
+f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
+@test_throws TypeError f_if_typecheck()
+
+@test let # https://github.com/JuliaLang/julia/issues/42258
+    code = quote
+        function foo()
+            a = @noinline rand(rand(0:10))
+            if isempty(a)
+                err = BoundsError(a)
+                throw(err)
+                return nothing
+            end
+            return a
+        end
+        code_typed(foo; optimize=true)
+
+        code_typed(Core.Compiler.setindex!, (Core.Compiler.UseRef,Core.Compiler.NewSSAValue); optimize=true)
+    end |> string
+    cmd = `$(Base.julia_cmd()) -g 2 -e $code`
+    stderr = IOBuffer()
+    success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr)) && isempty(String(take!(stderr)))
+end
diff --git a/test/compiler/validation.jl b/test/compiler/validation.jl
index d07007069b5c8..6326cb709e30d 100644
--- a/test/compiler/validation.jl
+++ b/test/compiler/validation.jl
@@ -21,7 +21,7 @@ end
 msig = Tuple{typeof(f22938),Int,Int,Int,Int}
 world = typemax(UInt)
 match = Base._methods_by_ftype(msig, -1, world)[]
-mi = Core.Compiler.specialize_method(match, false)
+mi = Core.Compiler.specialize_method(match)
 c0 = Core.Compiler.retrieve_code_info(mi)
 
 @test isempty(Core.Compiler.validate_code(mi))
@@ -105,6 +105,14 @@ end
     @test errors[1].kind === Core.Compiler.SSAVALUETYPES_MISMATCH_UNINFERRED
 end
 
+@testset "SSAFLAGS_MISMATCH" begin
+    c = copy(c0)
+    empty!(c.ssaflags)
+    errors = Core.Compiler.validate_code(c)
+    @test length(errors) == 1
+    @test errors[1].kind === Core.Compiler.SSAFLAGS_MISMATCH
+end
+
 @testset "SIGNATURE_NARGS_MISMATCH" begin
     old_sig = mi.def.sig
     mi.def.sig = Tuple{1,2}
diff --git a/test/core.jl b/test/core.jl
index 0a92c3e1aeef3..3727d3ff8f069 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -7236,6 +7236,12 @@ end
 @test string((B40050(),)) == "($B40050(),)"
 @test_broken isbitstype(Tuple{B40050})
 
+# issue #41654
+struct X41654 <: Ref{X41654}
+end
+@test isbitstype(X41654)
+@test ('a'=>X41654(),)[1][2] isa X41654
+
 # Issue #34206/34207
 function mre34206(a, n)
     va = view(a, :)
@@ -7573,3 +7579,40 @@ const T35130 = Tuple{Vector{Int}, <:Any}
 end
 h35130(x) = A35130(Any[x][1]::Vector{T35130})
 @test h35130(T35130[([1],1)]) isa A35130
+
+# issue #41503
+let S = Tuple{Tuple{Tuple{K, UInt128} where K<:Tuple{Int64}, Int64}},
+    T = Tuple{Tuple{Tuple{Tuple{Int64}, UInt128}, Int64}}
+    @test pointer_from_objref(T) === pointer_from_objref(S)
+    @test isbitstype(T)
+end
+
+# avoid impossible normalization (don't try to form Tuple{Complex{String}} here)
+@test Tuple{Complex{T} where String<:T<:String} == Tuple{Complex{T} where String<:T<:String}
+
+# control over compilation/interpreter
+@testset "Experimental.@force_compile" begin
+    function trim_after_eval(str::AbstractString)
+        rng = findfirst("eval(", str)
+        @test !isempty(rng)
+        return str[1:first(rng)-1]
+    end
+    btc = eval(quote
+        Base.Experimental.@force_compile
+        backtrace()
+    end)
+    bti = eval(quote
+        backtrace()
+    end)
+    @test !occursin(r"(interpreter|do_call)", trim_after_eval(string(stacktrace(btc, true))))
+    @test  occursin(r"(interpreter|do_call)", trim_after_eval(string(stacktrace(bti, true))))
+end
+
+@testset "rest(svec, ...)" begin
+    x = Core.svec(1, 2, 3)
+    a..., = x
+    @test a == Core.svec(1, 2, 3)
+    a, b... = x
+    @test a == 1
+    @test b == Core.svec(2, 3)
+end
diff --git a/test/corelogging.jl b/test/corelogging.jl
index 698209661456b..9c5102d848013 100644
--- a/test/corelogging.jl
+++ b/test/corelogging.jl
@@ -341,20 +341,6 @@ end
         String(take!(io))
     end
 
-    function genmsg_out(level, message, _module, filepath, line; kws...)
-        fname = tempname()
-        f = open(fname, "w")
-        logger = SimpleLogger()
-        redirect_stdout(f) do
-            handle_message(logger, level, message, _module, :group, :id,
-                           filepath, line; kws...)
-        end
-        close(f)
-        buf = read(fname)
-        rm(fname)
-        String(buf)
-    end
-
     function genmsg_err(level, message, _module, filepath, line; kws...)
         fname = tempname()
         f = open(fname, "w")
@@ -370,7 +356,7 @@ end
     end
 
     # Simple
-    @test genmsg_out(Info, "msg", Main, "some/path.jl", 101) ==
+    @test genmsg_err(Info, "msg", Main, "some/path.jl", 101) ==
     """
     ┌ Info: msg
     └ @ Main some/path.jl:101
diff --git a/test/docs.jl b/test/docs.jl
index 05442df9d4cbe..762a481ee4801 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -1271,6 +1271,8 @@ end
 
 # issue #36378 (\u1e8b and x\u307 are the fully composed and decomposed forms of ẋ, respectively)
 @test sprint(repl_latex, "\u1e8b") == "\"x\u307\" can be typed by x\\dot<tab>\n\n"
+# issue 39814
+@test sprint(repl_latex, "\u2209") == "\"\u2209\" can be typed by \\notin<tab>\n\n"
 
 # issue #15684
 begin
@@ -1502,3 +1504,12 @@ end
 # Issue #13109
 eval(Expr(:block, Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), nothing, "...", Expr(:module, false, :MBareModuleEmpty, Expr(:block)))))
 @test docstrings_equal(@doc(MBareModuleEmpty), doc"...")
+
+# issue #41727
+"struct docstring"
+struct S41727
+    "x is $(2*2)"
+    x
+end
+@test S41727(1) isa S41727
+@test string(@repl S41727.x) == "x is 4\n"
diff --git a/test/embedding/embedding-test.jl b/test/embedding/embedding-test.jl
index f358ff2a74cdf..797f6dabd9a89 100644
--- a/test/embedding/embedding-test.jl
+++ b/test/embedding/embedding-test.jl
@@ -20,8 +20,7 @@ end
     close(out.in)
     close(err.in)
     out_task = @async readlines(out)
-    err = read(err, String)
-    @test err == "MethodError: no method matching this_function_has_no_methods()\n"
+    @test readline(err) == "MethodError: no method matching this_function_has_no_methods()"
     @test success(p)
     lines = fetch(out_task)
     @test length(lines) == 10
@@ -29,4 +28,5 @@ end
     @test lines[8] == "called bar"
     @test lines[9] == "calling new bar"
     @test lines[10] == "      From worker 2:\tTaking over the world..."
+    @test readline(err) == "exception caught from C"
 end
diff --git a/test/embedding/embedding.c b/test/embedding/embedding.c
index d1816947f3856..d082366c908de 100644
--- a/test/embedding/embedding.c
+++ b/test/embedding/embedding.c
@@ -32,6 +32,9 @@ jl_value_t *checked_eval_string(const char* code)
 
 int main()
 {
+    // check that setting options works
+    jl_options.opt_level = 1;
+
     jl_init();
 
     {
@@ -40,6 +43,12 @@ int main()
         checked_eval_string("println(sqrt(2.0))");
     }
 
+    if (jl_options.opt_level != 1) {
+        jl_printf(jl_stderr_stream(), "setting jl_options didn't work\n");
+        jl_atexit_hook(1);
+        exit(1);
+    }
+
     {
         // Accessing the return value
 
@@ -175,6 +184,13 @@ int main()
         checked_eval_string("f28825()");
     }
 
+    JL_TRY {
+        jl_error("exception thrown");
+    }
+    JL_CATCH {
+        jl_printf(jl_stderr_stream(), "exception caught from C\n");
+    }
+
     int ret = 0;
     jl_atexit_hook(ret);
     return ret;
diff --git a/test/errorshow.jl b/test/errorshow.jl
index 24d0241049da0..9572ccc4af224 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -48,8 +48,9 @@ include("testenv.jl")
     end
 end
 
-
-cfile = " at $(@__FILE__):"
+file = @__FILE__
+Base.stacktrace_contract_userdir() && (file = Base.contractuser(file))
+cfile = " at $file:"
 c1line = @__LINE__() + 1
 method_c1(x::Float64, s::AbstractString...) = true
 
@@ -184,6 +185,11 @@ addConstraint_15639(c::Int64; uncset=nothing) = addConstraint_15639(Int32(c), un
 Base.show_method_candidates(buf, MethodError(addConstraint_15639, (Int32(1),)), pairs((uncset = nothing,)))
 @test String(take!(buf)) == "\nClosest candidates are:\n  addConstraint_15639(::Int32)$cfile$(ac15639line + 1) got unsupported keyword argument \"uncset\"\n  addConstraint_15639(!Matched::Int64; uncset)$cfile$(ac15639line + 2)"
 
+# Busted Vararg method definitions
+bad_vararg_decl(x::Int, y::Vararg) = 1   # don't do this, instead use (x::Int, y...)
+Base.show_method_candidates(buf, try bad_vararg_decl("hello", 3) catch e e end)
+@test occursin("bad_vararg_decl(!Matched::$Int, ::Any...)", String(take!(buf)))
+
 macro except_str(expr, err_type)
     return quote
         let err = nothing
@@ -271,7 +277,7 @@ let
     @test occursin("column vector", err_str)
 end
 
-struct TypeWithIntParam{T <: Integer} end
+struct TypeWithIntParam{T<:Integer, Vector{T}<:A<:AbstractArray{T}} end
 struct Bounded  # not an AbstractArray
     bound::Int
 end
@@ -317,8 +323,14 @@ let undefvar
     @test err_str == "TypeError: in Type, in parameter, expected Type, got a value of type String"
     err_str = @except_str TypeWithIntParam{Any} TypeError
     @test err_str == "TypeError: in TypeWithIntParam, in T, expected T<:Integer, got Type{Any}"
+    err_str = @except_str TypeWithIntParam{Int64,Vector{Float64}} TypeError
+    @test err_str == "TypeError: in TypeWithIntParam, in A, expected Vector{Int64}<:A<:(AbstractArray{Int64}), got Type{Vector{Float64}}"
+    err_str = @except_str TypeWithIntParam{Int64}{Vector{Float64}} TypeError
+    @test err_str == "TypeError: in TypeWithIntParam, in A, expected Vector{Int64}<:A<:(AbstractArray{Int64}), got Type{Vector{Float64}}"
     err_str = @except_str Type{Vararg} TypeError
     @test err_str == "TypeError: in Type, in parameter, expected Type, got Vararg"
+    err_str = @except_str Ref{Vararg} TypeError
+    @test err_str == "TypeError: in Type, in parameter, expected Type, got Vararg"
 
     err_str = @except_str mod(1,0) DivideError
     @test err_str == "DivideError: integer division error"
@@ -723,7 +735,7 @@ end
 
 # Test that implementation detail of include() is hidden from the user by default
 let bt = try
-        include("testhelpers/include_error.jl")
+        @noinline include("testhelpers/include_error.jl")
     catch
         catch_backtrace()
     end
@@ -735,7 +747,7 @@ end
 # Test backtrace printing
 module B
     module C
-        f(x; y=2.0) = error()
+        @noinline f(x; y=2.0) = error()
     end
     module D
         import ..C: f
@@ -744,7 +756,8 @@ module B
 end
 
 @testset "backtrace" begin
-    bt = try B.D.g()
+    bt = try
+        B.D.g()
     catch
         catch_backtrace()
     end
@@ -772,7 +785,8 @@ if Sys.isapple() || (Sys.islinux() && Sys.ARCH === :x86_64)
     pair_repeater_b() = pair_repeater_a()
 
     @testset "repeated stack frames" begin
-        let bt = try single_repeater()
+        let bt = try
+                single_repeater()
             catch
                 catch_backtrace()
             end
@@ -780,7 +794,8 @@ if Sys.isapple() || (Sys.islinux() && Sys.ARCH === :x86_64)
             @test occursin(r"repeats \d+ times", bt_str)
         end
 
-        let bt = try pair_repeater_a()
+        let bt = try
+                pair_repeater_a()
             catch
                 catch_backtrace()
             end
@@ -789,3 +804,102 @@ if Sys.isapple() || (Sys.islinux() && Sys.ARCH === :x86_64)
         end
     end
 end  # Sys.isapple()
+
+@testset "ScheduledAfterSyncException" begin
+    t = :DummyTask
+    msg = sprint(showerror, Base.ScheduledAfterSyncException(Any[t]))
+    @test occursin(":DummyTask is registered after the end of a `@sync` block", msg)
+    msg = sprint(showerror, Base.ScheduledAfterSyncException(Any[t, t]))
+    @test occursin(
+        ":DummyTask and one more Symbol are registered after the end of a `@sync` block",
+        msg,
+    )
+    msg = sprint(showerror, Base.ScheduledAfterSyncException(Any[t, t, t]))
+    @test occursin(
+        ":DummyTask and 2 more objects are registered after the end of a `@sync` block",
+        msg,
+    )
+end
+
+@testset "error message hints relative modules #40959" begin
+    m = Module()
+    expr = :(module Foo
+        module Bar
+        end
+
+        using Bar
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        Bar = 3
+
+        using Bar
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test !contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        using Bar
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test !contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        module Bar end
+        module Buzz
+            using Bar
+        end
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test contains(err_str, "maybe you meant `import/using ..Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        Bar = 3
+        module Buzz
+            using Bar
+        end
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test !contains(err_str, "maybe you meant `import/using ..Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        module Bar end
+        module Buzz
+            module Bar end
+            using Bar
+        end
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+end
diff --git a/test/fastmath.jl b/test/fastmath.jl
index edaab1c6eb0cf..e93fb93330b4f 100644
--- a/test/fastmath.jl
+++ b/test/fastmath.jl
@@ -249,3 +249,13 @@ end
     @test (@fastmath "a" * "b") == "ab"
     @test (@fastmath "a" ^ 2) == "aa"
 end
+
+
+@testset "exp overflow and underflow" begin
+    for T in (Float32,Float64)
+        for func in (@fastmath exp2,exp,exp10)
+            @test func(T(2000)) == T(Inf)
+            @test func(T(-2000)) == T(0)
+        end
+    end
+end
diff --git a/test/file.jl b/test/file.jl
index 6c48cedb0ebb0..15dc5ef65bd5a 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -63,17 +63,32 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
 end
 
 if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
-    link = joinpath(dir, "afilelink.txt")
+    link = joinpath(dir, "afilesymlink.txt")
     symlink(file, link)
     @test stat(file) == stat(link)
 
     # relative link
-    rellink = joinpath(subdir, "rel_afilelink.txt")
+    rellink = joinpath(subdir, "rel_afilesymlink.txt")
     relfile = joinpath("..", "afile.txt")
     symlink(relfile, rellink)
     @test stat(rellink) == stat(file)
 end
 
+@testset "hardlink" begin
+    link = joinpath(dir, "afilehardlink.txt")
+    hardlink(file, link)
+    @test stat(file) == stat(link)
+
+    # when the destination exists
+    @test_throws Base.IOError hardlink(file, link)
+
+    rm(link)
+
+    # the source file does not exist
+    missing_file = joinpath(dir, "for-sure-missing-file.txt")
+    @test_throws Base.IOError hardlink(missing_file, link)
+end
+
 using Random
 
 @testset "that temp names are actually unique" begin
@@ -91,13 +106,15 @@ using Random
 end
 
 @testset "tempname with parent" begin
-    t = tempname()
-    @test dirname(t) == tempdir()
-    mktempdir() do d
-        t = tempname(d)
-        @test dirname(t) == d
+    withenv("TMPDIR" => nothing) do
+        t = tempname()
+        @test dirname(t) == tempdir()
+        mktempdir() do d
+            t = tempname(d)
+            @test dirname(t) == d
+        end
+        @test_throws ArgumentError tempname(randstring())
     end
-    @test_throws ArgumentError tempname(randstring())
 end
 
 child_eval(code::String) = eval(Meta.parse(readchomp(`$(Base.julia_cmd()) -E $code`)))
@@ -491,9 +508,35 @@ rm(c_tmpdir, recursive=true)
 @test_throws Base._UVError("unlink($(repr(c_tmpdir)))", Base.UV_ENOENT) rm(c_tmpdir, recursive=true)
 @test rm(c_tmpdir, force=true, recursive=true) === nothing
 
+# Some operations can return multiple different error codes depending on the system environment.
+function throws_matching_exception(f::Function, acceptable_exceptions::AbstractVector)
+    try
+        f()
+        @error "No exception was thrown."
+        return false
+    catch ex
+        if ex in acceptable_exceptions
+            return true
+        else
+            @error "The thrown exception is not in the list of acceptable exceptions" acceptable_exceptions exception=(ex, catch_backtrace())
+            return false
+        end
+    end
+end
+function throws_matching_uv_error(f::Function, pfx::AbstractString, codes::AbstractVector{<:Integer})
+    acceptable_exceptions = multiple_uv_errors(pfx, codes)
+    return throws_matching_exception(f, acceptable_exceptions)
+end
+function multiple_uv_errors(pfx::AbstractString, codes::AbstractVector{<:Integer})
+    return [Base._UVError(pfx, code) for code in codes]
+end
+
 if !Sys.iswindows()
     # chown will give an error if the user does not have permissions to change files
-    if get(ENV, "USER", "") == "root" || get(ENV, "HOME", "") == "/root"
+    uid = Libc.geteuid()
+    @test stat(file).uid == uid
+    @test uid == Libc.getuid()
+    if uid == 0 # root user
         chown(file, -2, -1)  # Change the file owner to nobody
         @test stat(file).uid != 0
         chown(file, 0, -2)  # Change the file group to nogroup (and owner back to root)
@@ -503,8 +546,12 @@ if !Sys.iswindows()
         @test stat(file).gid == 0
         @test stat(file).uid == 0
     else
-        @test_throws Base._UVError("chown($(repr(file)), -2, -1)", Base.UV_EPERM) chown(file, -2, -1)  # Non-root user cannot change ownership to another user
-        @test_throws Base._UVError("chown($(repr(file)), -1, -2)", Base.UV_EPERM) chown(file, -1, -2)  # Non-root user cannot change group to a group they are not a member of (eg: nogroup)
+        @test throws_matching_uv_error("chown($(repr(file)), -2, -1)", [Base.UV_EPERM, Base.UV_EINVAL]) do
+            chown(file, -2, -1)  # Non-root user cannot change ownership to another user
+        end
+        @test throws_matching_uv_error("chown($(repr(file)), -1, -2)", [Base.UV_EPERM, Base.UV_EINVAL]) do
+            chown(file, -1, -2)  # Non-root user cannot change group to a group they are not a member of (eg: nogroup)
+        end
     end
 else
     # test that chown doesn't cause any errors for Windows
@@ -1650,3 +1697,17 @@ end
         @test !isnothing(Base.Filesystem.getgroupname(s.gid))
     end
 end
+
+@testset "diskstat() works" begin
+    # Sanity check assuming disk is smaller than 32PB
+    PB = Int64(2)^44
+
+    dstat = diskstat()
+    @test dstat.total < 32PB
+    @test dstat.used + dstat.available == dstat.total
+    @test occursin(r"^DiskStat\(total=\d+, used=\d+, available=\d+\)$", sprint(show, dstat))
+    # Test diskstat(::AbstractString)
+    dstat = diskstat(pwd())
+    @test dstat.total < 32PB
+    @test dstat.used + dstat.available == dstat.total
+end
diff --git a/test/float16.jl b/test/float16.jl
index 804aba9ef741b..75f9b55b6d51c 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -202,3 +202,25 @@ const minsubf16_32 = Float32(minsubf16)
 
 # issues #33076
 @test Float16(1f5) == Inf16
+
+@testset "conversion to Float16 from" begin
+    for T in (Float32, Float64, BigFloat)
+        @testset "conversion from $T" begin
+            for i in 1:2^16
+                f = reinterpret(Float16, UInt16(i-1))
+                isfinite(f) || continue
+                if f < 0
+                    epsdown = T(eps(f))/2
+                    epsup   = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2
+                else
+                    epsup   = T(eps(f))/2
+                    epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2
+                end
+                @test isequal(f*(-1)^(f === Float16(0)),  Float16(nextfloat(T(f) - epsdown)))
+                @test isequal(f*(-1)^(f === -Float16(0)), Float16(prevfloat(T(f) + epsup)))
+                @test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown)))
+                @test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup)))
+            end
+        end
+    end
+end
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index e3f3203e3c069..7e9d8021ac5df 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -119,6 +119,23 @@ end
     @test round(Float32(1.2), sigdigits=5) === Float32(1.2)
     @test round(Float16(0.6), sigdigits=2) === Float16(0.6)
     @test round(Float16(1.1), sigdigits=70) === Float16(1.1)
+
+    # issue 37171
+    @test round(9.87654321e-308, sigdigits = 1) ≈ 1.0e-307
+    @test round(9.87654321e-308, sigdigits = 2) ≈ 9.9e-308
+    @test round(9.87654321e-308, sigdigits = 3) ≈ 9.88e-308
+    @test round(9.87654321e-308, sigdigits = 4) ≈ 9.877e-308
+    @test round(9.87654321e-308, sigdigits = 5) ≈ 9.8765e-308
+    @test round(9.87654321e-308, sigdigits = 6) ≈ 9.87654e-308
+    @test round(9.87654321e-308, sigdigits = 7) ≈ 9.876543e-308
+    @test round(9.87654321e-308, sigdigits = 8) ≈ 9.8765432e-308
+    @test round(9.87654321e-308, sigdigits = 9) ≈ 9.87654321e-308
+    @test round(9.87654321e-308, sigdigits = 10) ≈ 9.87654321e-308
+    @test round(9.87654321e-308, sigdigits = 11) ≈ 9.87654321e-308
+
+    @inferred round(Float16(1.), sigdigits=2)
+    @inferred round(Float32(1.), sigdigits=2)
+    @inferred round(Float64(1.), sigdigits=2)
 end
 
 @testset "literal pow matches runtime pow matches optimized pow" begin
diff --git a/test/generic_map_tests.jl b/test/generic_map_tests.jl
index 8e77533362fe3..b155370dd6465 100644
--- a/test/generic_map_tests.jl
+++ b/test/generic_map_tests.jl
@@ -53,6 +53,30 @@ function generic_map_tests(mapf, inplace_mapf=nothing)
         @test A == map(x->x*x*x, Float64[1:10...])
         @test A === B
     end
+
+    # Issue #28382: inferrability of map with Union eltype
+    @test isequal(map(+, [1, 2], [3.0, missing]), [4.0, missing])
+    @test Core.Compiler.return_type(map, Tuple{typeof(+), Vector{Int},
+                                               Vector{Union{Float64, Missing}}}) ==
+        Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
+    @test isequal(map(tuple, [1, 2], [3.0, missing]), [(1, 3.0), (2, missing)])
+    @test Core.Compiler.return_type(map, Tuple{typeof(tuple), Vector{Int},
+                                               Vector{Union{Float64, Missing}}}) ==
+        Vector{<:Tuple{Int, Any}}
+    # Check that corner cases do not throw an error
+    @test isequal(map(x -> x === 1 ? nothing : x, [1, 2, missing]),
+                  [nothing, 2, missing])
+    @test isequal(map(x -> x === 1 ? nothing : x, Any[1, 2, 3.0, missing]),
+                  [nothing, 2, 3, missing])
+    @test map((x,y)->(x==1 ? 1.0 : x, y), [1, 2, 3], ["a", "b", "c"]) ==
+        [(1.0, "a"), (2, "b"), (3, "c")]
+    @test map(typeof, [iszero, isdigit]) == [typeof(iszero), typeof(isdigit)]
+    @test map(typeof, [iszero, iszero]) == [typeof(iszero), typeof(iszero)]
+    @test isequal(map(identity, Vector{<:Union{Int, Missing}}[[1, 2],[missing, 1]]),
+                  [[1, 2],[missing, 1]])
+    @test map(x -> x < 0 ? false : x, Int[]) isa Vector{Integer}
+    @test map(i -> ((x=i, y=(i==1 ? 1 : "a")), 3), 1:4) isa
+        Vector{Tuple{NamedTuple{(:x, :y)}, Int}}
 end
 
 function testmap_equivalence(mapf, f, c...)
diff --git a/test/gmp.jl b/test/gmp.jl
index 875d8895b6b34..2eb1e9faf47da 100644
--- a/test/gmp.jl
+++ b/test/gmp.jl
@@ -278,10 +278,23 @@ end
     @test (|)(a, b, c, d) == parse(BigInt,"-1396834561")
     @test (|)(a, b, c, d, f) == parse(BigInt,"-1358954753")
     @test (|)(a, b, c, d, f, g) == parse(BigInt,"-1")
+end
+
+@testset "bit operations" begin
+    for x in (315135, 12412, 3426495623485904783478347)
+        @test trailing_ones(big(x)) == trailing_ones(x)
+        @test trailing_zeros(big(x)) == trailing_zeros(x)
+        @test count_ones(big(x)) == count_ones(x)
+        @test count_zeros(-big(x)) == count_zeros(-x)
+    end
 
-    @test trailing_ones(a) == 8
-    @test trailing_zeros(b) == 2
-    @test count_ones(a) == 14
+    @test_throws DomainError trailing_zeros(big(0))
+    @test_throws DomainError trailing_ones(big(-1)) # -1 is all ones
+
+    @test_throws DomainError count_zeros(big(0))
+    @test_throws DomainError count_zeros(big(rand(UInt)))
+    @test_throws DomainError count_ones(big(-1))
+    @test_throws DomainError count_ones(-big(rand(UInt))-1)
 end
 
 # Large Fibonacci to exercise BigInt
@@ -366,28 +379,24 @@ end
     @test_throws InexactError convert(BigInt, 2.1)
     @test_throws InexactError convert(BigInt, big(2.1))
 end
-@testset "issue #13367" begin
-    @test trunc(BigInt,2.1) == 2
-    @test round(BigInt,2.1) == 2
-    @test floor(BigInt,2.1) == 2
-    @test ceil(BigInt,2.1) == 3
-
-    @test trunc(BigInt,2.1f0) == 2
-    @test round(BigInt,2.1f0) == 2
-    @test floor(BigInt,2.1f0) == 2
-    @test ceil(BigInt,2.1f0) == 3
-
-    @test_throws InexactError trunc(BigInt,Inf)
-    @test_throws InexactError round(BigInt,Inf)
-    @test_throws InexactError floor(BigInt,Inf)
-    @test_throws InexactError ceil(BigInt,Inf)
-
-    @test string(big(3), base = 2) == "11"
-    @test string(big(9), base = 8) == "11"
-    @test string(-big(9), base = 8) == "-11"
-    @test string(big(12), base = 16) == "c"
+@testset "truncation" begin
+    # cf. issue #13367
+    for T = (Float16, Float32, Float64)
+        @test trunc(BigInt, T(2.1)) == 2
+        @test unsafe_trunc(BigInt, T(2.1)) == 2
+        @test round(BigInt, T(2.1)) == 2
+        @test floor(BigInt, T(2.1)) == 2
+        @test ceil(BigInt, T(2.1)) == 3
+
+        @test_throws InexactError trunc(BigInt, T(Inf))
+        @test_throws InexactError round(BigInt, T(Inf))
+        @test_throws InexactError floor(BigInt, T(Inf))
+        @test_throws InexactError ceil(BigInt, T(Inf))
+    end
 end
-@testset "Issue #18849" begin
+
+@testset "string(::BigInt)" begin
+    # cf. issue #18849"
     # bin, oct, dec, hex should not call sizeof on BigInts
     # when padding is desired
     padding = 4
@@ -412,14 +421,19 @@ end
     @test string(-high, pad = padding, base = 8) == "-4000000"
     @test string(-high, pad = padding, base = 10) == "-1048576"
     @test string(-high, pad = padding, base = 16) == "-100000"
-end
 
-# respect 0-padding on big(0)
-for base in (2, 8, 10, 16)
-    local base
-    @test string(big(0), base=base, pad=0) == ""
+    # cf. issue #13367
+    @test string(big(3), base = 2) == "11"
+    @test string(big(9), base = 8) == "11"
+    @test string(-big(9), base = 8) == "-11"
+    @test string(big(12), base = 16) == "c"
+
+    # respect 0-padding on big(0)
+    for base in (2, 8, 10, 16)
+        @test string(big(0), base=base, pad=0) == ""
+    end
+    @test string(big(0), base = rand(2:62), pad = 0) == ""
 end
-@test string(big(0), base = rand(2:62), pad = 0) == ""
 
 @test isqrt(big(4)) == 2
 @test isqrt(big(5)) == 2
diff --git a/test/int.jl b/test/int.jl
index 52f554718645e..b75337c405767 100644
--- a/test/int.jl
+++ b/test/int.jl
@@ -324,10 +324,16 @@ end
     end
 end
 
-@testset "issue #21092" begin
+@testset "Underscores in big_str" begin
     @test big"1_0_0_0" == BigInt(1000)
     @test_throws ArgumentError big"1_0_0_0_"
     @test_throws ArgumentError big"_1_0_0_0"
+
+    @test big"1_0.2_5" == BigFloat(10.25)
+    @test_throws ArgumentError big"_1_0.2_5"
+    @test_throws ArgumentError big"1_0.2_5_"
+    @test_throws ArgumentError big"1_0_.2_5"
+    @test_throws ArgumentError big"1_0._2_5"
 end
 
 # issue #26779
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index 27dccdbca9a28..0814229a5d41b 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -479,3 +479,25 @@ end
 for b in [-100:-2; 2:100;]
     @test Base.ndigits0z(0, b) == 0
 end
+
+@testset "constant prop in gcd" begin
+    ci = code_typed(() -> gcd(14, 21))[][1]
+    @test ci.code == Any[Core.ReturnNode(7)]
+
+    ci = code_typed(() -> 14 // 21)[][1]
+    @test ci.code == Any[Core.ReturnNode(2 // 3)]
+end
+@testset "binomial" begin
+    for T in (Int8, Int16, Int32, Int64)
+        for x in rand(-isqrt(typemax(T)):isqrt(typemax(T)), 1000)
+            @test binomial(x,T(1)) == x
+            x>=0 && @test binomial(x,x-T(1)) == x
+            @test binomial(x,T(2)) == div(x*(x-1), 2)
+            x>=0 && @test binomial(x,x-T(2)) == div(x*(x-1), 2)
+        end
+        @test @inferred(binomial(one(T),one(T))) isa T
+    end
+    for x in ((false,false), (false,true), (true,false), (true,true))
+        @test binomial(x...) == (x != (false,true))
+    end
+end
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 7fa8ecb0ebe27..2f2ef0cd505d5 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -100,6 +100,8 @@ let f = Core.Intrinsics.ashr_int
     @test f(Int32(2), -1) == 0
 end
 
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
 # issue #29929
 let p = Ptr{Nothing}(0)
     @test unsafe_store!(p, nothing) === C_NULL
@@ -107,9 +109,9 @@ let p = Ptr{Nothing}(0)
     @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
     @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p
     @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing
-    @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === (nothing, nothing)
-    @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === (nothing, true)
-    @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === (nothing, false)
+    @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing)
+    @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true))
+    @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
 end
 
 struct GhostStruct end
@@ -162,8 +164,17 @@ end
     @test_intrinsic Core.Intrinsics.fptoui UInt Float16(3.3) UInt(3)
 end
 
-@test Core.Intrinsics.atomic_fence(:sequentially_consistent) === nothing
+using Base.Experimental: @force_compile
+@test_throws ConcurrencyViolationError("invalid atomic ordering") (@force_compile; Core.Intrinsics.atomic_fence(:u)) === nothing
+@test_throws ConcurrencyViolationError("invalid atomic ordering") (@force_compile; Core.Intrinsics.atomic_fence(Symbol("u", "x"))) === nothing
+@test_throws ConcurrencyViolationError("invalid atomic ordering") Core.Intrinsics.atomic_fence(Symbol("u", "x")) === nothing
+for order in (:not_atomic, :monotonic, :acquire, :release, :acquire_release, :sequentially_consistent)
+    @test Core.Intrinsics.atomic_fence(order) === nothing
+    @test (order -> Core.Intrinsics.atomic_fence(order))(order) === nothing
+    @test Base.invokelatest(@eval () -> Core.Intrinsics.atomic_fence($(QuoteNode(order)))) === nothing
+end
 @test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) == nothing
+@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) == nothing
 
 primitive type Int256 <: Signed 256 end
 Int256(i::Int) = Core.Intrinsics.sext_int(Int256, i)
@@ -189,8 +200,8 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co
                 @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerswap(p, T(100), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
                 @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
@@ -199,24 +210,24 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10)
                 @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === (T(1), true)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === (T(100), false)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
-                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === (T(100), T(101))
-                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === (T(101), T(102))
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101))
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(102)
                 @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102)
-                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === (T(103), false)
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103)
             end
             if TT === Any
-                @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === (T(103), S(103))
+                @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === Pair{TT,TT}(T(103), S(103))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(103)
                 @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p
                 @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === (S(100), false)
-                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === (S(100), true)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false))
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(2)
             end
         end)(TT,)
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index 80972a7c65448..d8211aa7086b3 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -9,7 +9,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
 @testset "Read/write empty IOBuffer" begin
     io = IOBuffer()
     @test eof(io)
-    @test_throws EOFError read(io,UInt8)
+    @test_throws EOFError read(io, UInt8)
     @test write(io,"abc") === 3
     @test isreadable(io)
     @test iswritable(io)
@@ -18,7 +18,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     @test position(io) == 3
     @test eof(io)
     seek(io, 0)
-    @test read(io,UInt8) == convert(UInt8, 'a')
+    @test read(io, UInt8) == convert(UInt8, 'a')
     a = Vector{UInt8}(undef, 2)
     @test read!(io, a) == a
     @test a == UInt8['b','c']
@@ -34,22 +34,24 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     truncate(io, 10)
     @test position(io) == 0
     @test all(io.data .== 0)
-    @test write(io,Int16[1,2,3,4,5,6]) === 12
+    @test write(io, Int16[1, 2, 3, 4, 5, 6]) === 12
     seek(io, 2)
     truncate(io, 10)
     @test ioslength(io) == 10
     io.readable = false
-    @test_throws ArgumentError read!(io,UInt8[0])
+    @test_throws ArgumentError read!(io, UInt8[0])
     truncate(io, 0)
     @test write(io,"boston\ncambridge\n") > 0
     @test String(take!(io)) == "boston\ncambridge\n"
     @test String(take!(io)) == ""
     @test write(io, ComplexF64(0)) === 16
     @test write(io, Rational{Int64}(1//2)) === 16
-    close(io)
-    @test_throws ArgumentError write(io,UInt8[0])
-    @test_throws ArgumentError seek(io,0)
+    @test closewrite(io) === nothing
+    @test_throws ArgumentError write(io, UInt8[0])
     @test eof(io)
+    @test close(io) === nothing
+    @test_throws ArgumentError write(io, UInt8[0])
+    @test_throws ArgumentError seek(io, 0)
 end
 
 @testset "Read/write readonly IOBuffer" begin
@@ -237,7 +239,7 @@ end
     @test isreadable(bstream)
     @test iswritable(bstream)
     @test bytesavailable(bstream) == 0
-    @test sprint(show, bstream) == "BufferStream() bytes waiting:$(bytesavailable(bstream.buffer)), isopen:true"
+    @test sprint(show, bstream) == "BufferStream(bytes waiting=$(bytesavailable(bstream.buffer)), isopen=true)"
     a = rand(UInt8,10)
     write(bstream,a)
     @test !eof(bstream)
@@ -251,9 +253,10 @@ end
     @test !eof(bstream)
     read!(bstream,c)
     @test c == a[3:10]
-    @test close(bstream) === nothing
+    @test closewrite(bstream) === nothing
     @test eof(bstream)
     @test bytesavailable(bstream) == 0
+    @test close(bstream) === nothing
     flag = Ref{Bool}(false)
     event = Base.Event()
     bstream = Base.BufferStream()
diff --git a/test/iterators.jl b/test/iterators.jl
index fb8edcab92209..1b2498fb1f905 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -3,6 +3,7 @@
 using Base.Iterators
 using Random
 using Base: IdentityUnitRange
+using Dates: Date, Day
 
 @test Base.IteratorSize(Any) isa Base.SizeUnknown
 
@@ -123,7 +124,7 @@ end
 
 # countfrom
 # ---------
-let i = 0, k = 1
+let i = 0, k = 1, l = 0
     for j = countfrom(0, 2)
         @test j == i*2
         i += 1
@@ -134,6 +135,15 @@ let i = 0, k = 1
         k += 1
         k <= 10 || break
     end
+    # test that `start` promotes to `typeof(start+step)`
+    for j = countfrom(Int[0, 0], Float64[1.0, 2.0])
+        @test j isa Vector{Float64}
+        @test j == l*[1, 2]
+        l += 1
+        l <= 10 || break
+    end
+    # test with `start` and `step` having different types
+    @test collect(take(countfrom(Date(2020,12,25), Day(1)), 12)) == range(Date(2020,12,25), step=Day(1), length=12)
 end
 
 # take
@@ -202,6 +212,7 @@ end
     @test collect(takewhile(Returns(true),5:10)) == 5:10
     @test collect(takewhile(isodd,[1,1,2,3])) == [1,1]
     @test collect(takewhile(<(2), takewhile(<(3), [1,1,2,3]))) == [1,1]
+    @test Base.IteratorEltype(typeof(takewhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
 end
 
 # dropwhile
@@ -214,6 +225,7 @@ end
     @test isempty(dropwhile(Returns(true), 1:3))
     @test collect(dropwhile(isodd,[1,1,2,3])) == [2,3]
     @test collect(dropwhile(iseven,dropwhile(isodd,[1,1,2,3]))) == [3]
+    @test Base.IteratorEltype(typeof(dropwhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
 end
 
 # cycle
@@ -292,6 +304,18 @@ let (a, b) = (1:3, [4 6;
     end
 end
 
+# collect stateful iterator
+let itr
+    itr = Iterators.Stateful(Iterators.map(identity, 1:5))
+    @test collect(itr) == 1:5
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+    itr = (i+1 for i in Base.Stateful([1, 2, 3]))
+    @test collect(itr) == [2, 3, 4]
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+    itr = (i-1 for i in Base.Stateful(zeros(Int, 0, 0)))
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+end
+
 # with 1D inputs
 let a = 1:2,
     b = 1.0:10.0,
@@ -663,17 +687,26 @@ end
     for itr in (2:10, "∀ϵ>0", 1:0, "", (2,3,5,7,11), [2,3,5,7,11], rand(5,6), Z, 3, true, 'x', 4=>5,
                 eachindex("∀ϵ>0"), view(Z), view(rand(5,6),2:4,2:6), (x^2 for x in 1:10),
                 Iterators.Filter(isodd, 1:10), flatten((1:10, 50:60)), enumerate("foo"),
-                pairs(50:60), zip(1:10,21:30,51:60), product(1:3, 10:12), repeated(3.14159, 5))
-        @test squash(collect(Iterators.reverse(itr))) == reverse(squash(collect(itr)))
+                pairs(50:60), zip(1:10,21:30,51:60), product(1:3, 10:12), repeated(3.14159, 5),
+                (a=2, b=3, c=5, d=7, e=11))
+        arr = reverse(squash(collect(itr)))
+        itr = Iterators.reverse(itr)
+        @test squash(collect(itr)) == arr
+        if !isempty(arr)
+            @test first(itr) == first(arr)
+            @test last(itr) == last(arr)
+        end
     end
     @test collect(take(Iterators.reverse(cycle(1:3)), 7)) == collect(take(cycle(3:-1:1), 7))
     let r = repeated(3.14159)
         @test Iterators.reverse(r) === r
+        @test last(r) === 3.14159
     end
-    let t = (2,3,5,7,11)
+    for t in [(1,), (2, 3, 5, 7, 11), (a=1,), (a=2, b=3, c=5, d=7, e=11)]
         @test Iterators.reverse(Iterators.reverse(t)) === t
         @test first(Iterators.reverse(t)) === last(t)
         @test last(Iterators.reverse(t)) === first(t)
+        @test collect(Iterators.reverse(t)) == reverse(collect(t))
     end
 end
 
@@ -860,4 +893,9 @@ end
     @test Iterators.peel(1:10)[2] |> collect == 2:10
     @test Iterators.peel(x^2 for x in 2:4)[1] == 4
     @test Iterators.peel(x^2 for x in 2:4)[2] |> collect == [9, 16]
-end
\ No newline at end of file
+end
+
+@testset "last for iterators" begin
+    @test last(Iterators.map(identity, 1:3)) == 3
+    @test last(Iterators.filter(iseven, (Iterators.map(identity, 1:3)))) == 2
+end
diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile
index ef583fd451f07..a0b9cf977ede8 100644
--- a/test/llvmpasses/Makefile
+++ b/test/llvmpasses/Makefile
@@ -2,13 +2,13 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 JULIAHOME := $(abspath $(SRCDIR)/../..)
 include $(JULIAHOME)/Make.inc
 
-check: $(SRCDIR)
+check: .
 
 TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl $(SRCDIR)/*.ll))
 
-$(SRCDIR) $(TESTS):
+. $(TESTS):
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
 	LD_LIBRARY_PATH=${build_libdir}:$$LD_LIBRARY_PATH \
-	$(build_depsbindir)/lit/lit.py -v $@
+	$(build_depsbindir)/lit/lit.py -v $(addprefix $(SRCDIR)/,$@)
 
-.PHONY: $(TESTS) $(SRCDIR) check all
+.PHONY: $(TESTS) check all .
diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.jl
index 227569a545adb..605ff3d213276 100644
--- a/test/llvmpasses/alloc-opt-gcframe.jl
+++ b/test/llvmpasses/alloc-opt-gcframe.jl
@@ -12,14 +12,19 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 # CHECK-LABEL: @return_obj
 # CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: %v = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
+# CHECK: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+# CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 14
+# CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+# CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+# CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+# CHECK-NEXT: %v = call noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16)
+# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
 println("""
 define {} addrspace(10)* @return_obj() {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   ret {} addrspace(10)* %v
 }
 """)
@@ -35,9 +40,9 @@ define {} addrspace(10)* @return_obj() {
 println("""
 define i64 @return_load(i64 %i) {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
   %v64a11 = addrspacecast i64 addrspace(10)* %v64 to i64 addrspace(11)*
   store i64 %i, i64 addrspace(11)* %v64a11, align 16, !tbaa !4
@@ -50,16 +55,15 @@ define i64 @return_load(i64 %i) {
 
 # CHECK-LABEL: @ccall_obj
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: @jl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
+# CHECK: @ijl_gc_pool_alloc
+# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
 println("""
 define void @ccall_obj(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   %f = bitcast i8* %fptr to void ({} addrspace(10)*)*
   call void %f({} addrspace(10)* %v)
   ret void
@@ -70,7 +74,6 @@ define void @ccall_obj(i8* %fptr) {
 # CHECK-LABEL: @ccall_ptr
 # CHECK: alloca i64
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
 # CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
@@ -81,9 +84,9 @@ define void @ccall_obj(i8* %fptr) {
 println("""
 define void @ccall_ptr(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
   %ptr = bitcast {}* %ptrj to i8*
@@ -96,16 +99,15 @@ define void @ccall_ptr(i8* %fptr) {
 
 # CHECK-LABEL: @ccall_unknown_bundle
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: @jl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
+# CHECK: @ijl_gc_pool_alloc
+# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
 println("""
 define void @ccall_unknown_bundle(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
   %ptr = bitcast {}* %ptrj to i8*
@@ -119,7 +121,6 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 # CHECK-LABEL: @lifetime_branches
 # CHECK: alloca i64
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK: L1:
 # CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
 # CHECK: %f = bitcast i8* %fptr to void (i8*)*
@@ -136,12 +137,12 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 println("""
 define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
   br i1 %b, label %L1, label %L3
 
 L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
   %ptr = bitcast {}* %ptrj to i8*
@@ -162,16 +163,15 @@ L3:
 
 # CHECK-LABEL: @object_field
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
-# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !0
+# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
 println("""
 define void @object_field({} addrspace(10)* %field) {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %vab = bitcast {} addrspace(11)* %va to {} addrspace(10)* addrspace(11)*
   store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab, align 8
@@ -183,7 +183,6 @@ define void @object_field({} addrspace(10)* %field) {
 # CHECK-LABEL: @memcpy_opt
 # CHECK: alloca [16 x i8], align 16
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
 # CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
@@ -191,9 +190,9 @@ println("""
 define void @memcpy_opt(i8* %v22) {
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %v6 = call {}*** @julia.ptls_states()
-  %v18 = bitcast {}*** %v6 to i8*
-  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag)
   %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
   %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
   call void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* %v21, i8* %v22, i64 16, i32 8, i1 false)
@@ -204,7 +203,6 @@ top:
 
 # CHECK-LABEL: @preserve_opt
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
 # CHECK-NOT: @llvm.lifetime.end
@@ -213,9 +211,9 @@ println("""
 define void @preserve_opt(i8* %v22) {
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %v6 = call {}*** @julia.ptls_states()
-  %v18 = bitcast {}*** %v6 to i8*
-  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag)
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag)
   %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
   %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
   %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v19)
@@ -229,7 +227,6 @@ top:
 
 # CHECK-LABEL: @preserve_branches
 # CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: call {}*** @julia.ptls_states()
 # CHECK: L1:
 # CHECK-NEXT: @external_function()
 # CHECK-NEXT: br i1 %b2, label %L2, label %L3
@@ -242,12 +239,12 @@ top:
 println("""
 define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
   %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
   br i1 %b, label %L1, label %L3
 
 L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
   %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
@@ -262,13 +259,12 @@ L3:
 """)
 # CHECK-LABEL: }{{$}}
 
-# CHECK: declare noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8*,
-# CHECK: declare noalias nonnull {} addrspace(10)* @jl_gc_big_alloc(i8*,
+# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*,
+# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*,
 println("""
 declare void @external_function()
-declare {}*** @julia.ptls_states()
 declare {}*** @julia.get_pgcstack()
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, $isz, {} addrspace(10)*)
 declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
 declare token @llvm.julia.gc_preserve_begin(...)
diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll
new file mode 100644
index 0000000000000..58de757628e4c
--- /dev/null
+++ b/test/llvmpasses/cpu-features.ll
@@ -0,0 +1,42 @@
+; RUN: opt -load libjulia-internal%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
+
+declare i1 @julia.cpu.have_fma.f64()
+declare double @with_fma(double %0, double %1, double %2)
+declare double @without_fma(double %0, double %1, double %2)
+
+; CHECK: @fma1
+define double @fma1(double %0, double %1, double %2) #0 {
+top:
+  %3 = call i1 @julia.cpu.have_fma.f64()
+  br i1 %3, label %L1, label %L2
+
+; CHECK-NOT: @julia.cpu.have_fma
+; CHECK: @with_fma
+L1:                                               ; preds = %top
+  %4 = call double @with_fma(double %0, double %1, double %2)
+  ret double %4
+
+L2:                                               ; preds = %top
+  %5 = call double @without_fma(double %0, double %1, double %2)
+  ret double %5
+}
+
+; CHECK: @fma2
+define double @fma2(double %0, double %1, double %2) #1 {
+top:
+  %3 = call i1 @julia.cpu.have_fma.f64()
+  br i1 %3, label %L1, label %L2
+
+; CHECK-NOT: @julia.cpu.have_fma
+; CHECK: @without_fma
+L1:                                               ; preds = %top
+  %4 = call double @with_fma(double %0, double %1, double %2)
+  ret double %4
+
+L2:                                               ; preds = %top
+  %5 = call double @without_fma(double %0, double %1, double %2)
+  ret double %5
+}
+
+attributes #0 = { "target-features"="+fma" }
+attributes #1 = { "target-features"="-fma" }
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index e29ada14a0d00..6caf6dead7038 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -3,11 +3,9 @@
 @tag = external addrspace(10) global {}
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
-declare {} addrspace(10)* @jl_box_int64(i64)
+declare {} addrspace(10)* @ijl_box_int64(i64)
 declare {}*** @julia.ptls_states()
 declare {}*** @julia.get_pgcstack()
-declare void @jl_safepoint()
-declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
 
 declare noalias nonnull {} addrspace(10)** @julia.new_gc_frame(i32)
 declare void @julia.push_gc_frame({} addrspace(10)**, i32)
@@ -34,11 +32,11 @@ top:
 ; CHECK-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)***
 ; CHECK-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-  %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
+  %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
 ; CHECK: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3
   %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
   store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8
-  %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
+  %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b)
 ; CHECK: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
   %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
   store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8
@@ -59,7 +57,7 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %v = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc
+; CHECK: %v = call noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc
   %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index a7b8dc7caee38..9c3b26385417f 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -4,11 +4,10 @@
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
-declare {}*** @julia.ptls_states()
 declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
 declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 
 define void @gc_frame_lowering(i64 %a, i64 %b) {
@@ -39,13 +38,18 @@ define {} addrspace(10)* @gc_alloc_lowering() {
 top:
 ; CHECK-LABEL: @gc_alloc_lowering
     %pgcstack = call {}*** @julia.get_pgcstack()
-    %ptls = call {}*** @julia.ptls_states()
-    %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8)
+    %0 = bitcast {}*** %pgcstack to {}**
+    %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 14
+; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
 ; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
 ; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !0
-    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
 ; CHECK-NEXT: ret {} addrspace(10)* %v
     ret {} addrspace(10)* %v
 }
@@ -59,20 +63,25 @@ define void @gc_drop_aliasing() {
 top:
 ; CHECK-LABEL: @gc_drop_aliasing
     %pgcstack = call {}*** @julia.get_pgcstack()
-    %ptls = call {}*** @julia.ptls_states()
-    %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8)
+    %0 = bitcast {}*** %pgcstack to {}**
+    %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 14
+; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
 ; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
 ; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !0
-    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
 ; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !5
+; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !9
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !6
+; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !10
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !7, !range !5
+; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !9
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
@@ -104,12 +113,13 @@ top:
 !5 = !{!"jtbaa"}
 
 ; CHECK:      !0 = !{!1, !1, i64 0}
-; CHECK-NEXT: !1 = !{!"jtbaa_tag", !2, i64 0}
-; CHECK-NEXT: !2 = !{!"jtbaa_data", !3, i64 0}
-; CHECK-NEXT: !3 = !{!"jtbaa", !4, i64 0}
-; CHECK-NEXT: !4 = !{!"jtbaa"}
-; CHECK-NEXT: !5 = !{i64 0, i64 23}
-; CHECK-NEXT: !6 = distinct !{!6}
-; CHECK-NEXT: !7 = !{!8, !8, i64 0}
-; CHECK-NEXT: !8 = !{!"jtbaa_const", !9}
-; CHECK-NEXT: !9 = !{!"jtbaa"}
+; CHECK-NEXT: !1 = !{!"jtbaa_gcframe", !2, i64 0}
+; CHECK-NEXT: !2 = !{!"jtbaa", !3, i64 0}
+; CHECK-NEXT: !3 = !{!"jtbaa"}
+; CHECK-NEXT: !4 = !{!5, !5, i64 0}
+; CHECK-NEXT: !5 = !{!"jtbaa_tag", !6, i64 0}
+; CHECK-NEXT: !6 = !{!"jtbaa_data", !7, i64 0}
+; CHECK-NEXT: !7 = !{!"jtbaa", !8, i64 0}
+; CHECK-NEXT: !8 = !{!"jtbaa"}
+; CHECK-NEXT: !9 = !{i64 0, i64 23}
+; CHECK-NEXT: !10 = distinct !{!10}
\ No newline at end of file
diff --git a/test/llvmpasses/lit.cfg.py b/test/llvmpasses/lit.cfg.py
index 2054876ed9a5d..f53854faf2559 100644
--- a/test/llvmpasses/lit.cfg.py
+++ b/test/llvmpasses/lit.cfg.py
@@ -13,8 +13,6 @@
 config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if
     platform.system() == 'Windows' else '.so'))
 
-path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH']))
-config.environment['PATH'] = path
 config.environment['HOME'] = "/tmp"
 
 if platform.machine() == "x86_64":
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
index d9d5ac087b773..4191fa664c6cd 100644
--- a/test/llvmpasses/lower-handlers.ll
+++ b/test/llvmpasses/lower-handlers.ll
@@ -2,7 +2,7 @@
 
 attributes #1 = { returns_twice }
 declare i32 @julia.except_enter() #1
-declare void @jl_pop_handler(i32)
+declare void @ijl_pop_handler(i32)
 declare i8**** @julia.ptls_states()
 declare i8**** @julia.get_pgcstack()
 
@@ -10,7 +10,7 @@ define void @simple() {
 top:
     %pgcstack = call i8**** @julia.get_pgcstack()
 ; CHECK: call void @llvm.lifetime.start
-; CHECK: call void @jl_enter_handler
+; CHECK: call void @ijl_enter_handler
 ; CHECK: setjmp
     %r = call i32 @julia.except_enter()
     %cmp = icmp eq i32 %r, 0
@@ -20,7 +20,7 @@ try:
 catch:
     br label %after
 after:
-    call void @jl_pop_handler(i32 1)
+    call void @ijl_pop_handler(i32 1)
 ; CHECK: llvm.lifetime.end
     ret void
 }
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index b883a53554a0c..40dd020bca260 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -5,7 +5,7 @@ declare {}*** @julia.ptls_states()
 declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare void @one_arg_boxed({} addrspace(10)*)
-declare {} addrspace(10)* @jl_box_int64(i64)
+declare {} addrspace(10)* @ijl_box_int64(i64)
 
 define void @argument_refinement({} addrspace(10)* %a) {
 ; CHECK-LABEL: @argument_refinement
@@ -26,7 +26,7 @@ define void @heap_refinement1(i64 %a) {
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-    %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
+    %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -43,7 +43,7 @@ define void @heap_refinement2(i64 %a) {
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-    %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
+    %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
 ; CHECK: store {} addrspace(10)* %loaded1
@@ -211,7 +211,7 @@ declare void @julia.write_barrier({} addrspace(10)*, {} addrspace(10)*) #1
 define {} addrspace(10)* @setfield({} addrspace(10)* %p) {
 ; CHECK-LABEL: @setfield(
 ; CHECK-NOT: %gcframe
-; CHECK: call void @jl_gc_queue_root
+; CHECK: call void @ijl_gc_queue_root
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %c = call {} addrspace(10)* @allocate_some_value()
diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll
index 2f34cf55ffc08..f8b6de024bfdd 100644
--- a/test/llvmpasses/remove-addrspaces.ll
+++ b/test/llvmpasses/remove-addrspaces.ll
@@ -101,3 +101,10 @@ loop:
 exit:
   ret i64 %sum
 }
+
+
+; COM: check that address spaces in byval types are processed correctly
+define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) {
+; CHECK: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
+  ret void
+}
diff --git a/test/loading.jl b/test/loading.jl
index c56f6c463a21f..b9fb50a3287c1 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -132,19 +132,54 @@ end
 @test_throws ArgumentError parse(UUID, "not a UUID")
 @test tryparse(UUID, "either is this") === nothing
 
-function subset(v::Vector{T}, m::Int) where T
-    T[v[j] for j = 1:length(v) if ((m >>> (j - 1)) & 1) == 1]
-end
+@testset "explicit_project_deps_get" begin
+    mktempdir() do dir
+        project_file = joinpath(dir, "Project.toml")
+        touch(project_file) # dummy_uuid calls realpath
+        # various UUIDs to work with
+        proj_uuid = dummy_uuid(project_file)
+        root_uuid = uuid4()
+        this_uuid = uuid4()
 
-function perm(p::Vector, i::Int)
-    for j = length(p):-1:1
-        i, k = divrem(i, j)
-        p[j], p[k+1] = p[k+1], p[j]
+        old_load_path = copy(LOAD_PATH)
+        try
+            copy!(LOAD_PATH, [project_file])
+            write(project_file, """
+            name = "Root"
+            uuid = "$root_uuid"
+            [deps]
+            This = "$this_uuid"
+            """)
+            # look up various packages by name
+            root = Base.identify_package("Root")
+            this = Base.identify_package("This")
+            that = Base.identify_package("That")
+
+            @test root.uuid == root_uuid
+            @test this.uuid == this_uuid
+            @test that == nothing
+
+            write(project_file, """
+            name = "Root"
+            This = "$this_uuid"
+            [deps]
+            """)
+            # look up various packages by name
+            root = Base.identify_package("Root")
+            this = Base.identify_package("This")
+            that = Base.identify_package("That")
+
+            @test root.uuid == proj_uuid
+            @test this == nothing
+            @test that == nothing
+        finally
+            copy!(LOAD_PATH, old_load_path)
+        end
     end
-    return p
 end
 
-@testset "explicit_project_deps_get" begin
+# extras
+@testset "extras" begin
     mktempdir() do dir
         project_file = joinpath(dir, "Project.toml")
         touch(project_file) # dummy_uuid calls realpath
@@ -152,52 +187,47 @@ end
         proj_uuid = dummy_uuid(project_file)
         root_uuid = uuid4()
         this_uuid = uuid4()
-        # project file to subset/permute
-        lines = split("""
-        name = "Root"
-        uuid = "$root_uuid"
-        [deps]
-        This = "$this_uuid"
-        """, '\n')
-        N = length(lines)
-        # test every permutation of every subset of lines
-        for m = 0:2^N-1
-            s = subset(lines, m) # each subset of lines
-            for i = 1:factorial(count_ones(m))
-                p = perm(s, i) # each permutation of the subset
-                open(project_file, write=true) do io
-                    for line in p
-                        println(io, line)
-                    end
-                end
-                # look at lines and their order
-                n = findfirst(line -> startswith(line, "name"), p)
-                u = findfirst(line -> startswith(line, "uuid"), p)
-                d = findfirst(line -> line == "[deps]", p)
-                t = findfirst(line -> startswith(line, "This"), p)
-                # look up various packages by name
-                root = Base.explicit_project_deps_get(project_file, "Root")
-                this = Base.explicit_project_deps_get(project_file, "This")
-                that = Base.explicit_project_deps_get(project_file, "That")
-                # test that the correct answers are given
-                @test root == (something(n, N+1) ≥ something(d, N+1) ? nothing :
-                               something(u, N+1) < something(d, N+1) ? root_uuid : proj_uuid)
-                @test this == (something(d, N+1) < something(t, N+1) ≤ N ? this_uuid : nothing)
-                @test that == nothing
-            end
+
+        old_load_path = copy(LOAD_PATH)
+        try
+            copy!(LOAD_PATH, [project_file])
+            write(project_file, """
+            name = "Root"
+            uuid = "$root_uuid"
+            [extras]
+            This = "$this_uuid"
+            """)
+            # look up various packages by name
+            root = Base.identify_package("Root")
+            this = Base.identify_package("This")
+            that = Base.identify_package("That")
+
+            @test root.uuid == root_uuid
+            @test this == nothing
+            @test that == nothing
+
+            @test Base.get_uuid_name(project_file, this_uuid) == "This"
+        finally
+            copy!(LOAD_PATH, old_load_path)
         end
     end
 end
 
+
 ## functional testing of package identification, location & loading ##
 
 saved_load_path = copy(LOAD_PATH)
 saved_depot_path = copy(DEPOT_PATH)
 saved_active_project = Base.ACTIVE_PROJECT[]
+watcher_counter = Ref(0)
+push!(Base.active_project_callbacks, () -> watcher_counter[] += 1)
+push!(Base.active_project_callbacks, () -> error("broken"))
 
 push!(empty!(LOAD_PATH), joinpath(@__DIR__, "project"))
 append!(empty!(DEPOT_PATH), [mktempdir(), joinpath(@__DIR__, "depot")])
-Base.ACTIVE_PROJECT[] = nothing
+@test watcher_counter[] == 0
+@test_logs (:error, r"active project callback .* failed") Base.set_active_project(nothing)
+@test watcher_counter[] == 1
 
 @test load_path() == [joinpath(@__DIR__, "project", "Project.toml")]
 
@@ -609,10 +639,10 @@ end == "opening file $(repr(joinpath(@__DIR__, "notarealfile.jl")))"
 old_act_proj = Base.ACTIVE_PROJECT[]
 pushfirst!(LOAD_PATH, "@")
 try
-    Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
+    Base.set_active_project(joinpath(@__DIR__, "TestPkg"))
     @eval using TestPkg
 finally
-    Base.ACTIVE_PROJECT[] = old_act_proj
+    Base.set_active_project(old_act_proj)
     popfirst!(LOAD_PATH)
 end
 
@@ -695,7 +725,9 @@ end
 
 append!(empty!(LOAD_PATH), saved_load_path)
 append!(empty!(DEPOT_PATH), saved_depot_path)
-Base.ACTIVE_PROJECT[] = saved_active_project
+for _ = 1:2 pop!(Base.active_project_callbacks) end
+Base.set_active_project(saved_active_project)
+@test watcher_counter[] == 3
 
 # issue #28190
 module Foo; import Libdl; end
@@ -756,3 +788,17 @@ end
         @test Base.get_deps(raw_manifest) == deps
     end
 end
+
+@testset "error message loading pkg bad module name" begin
+    mktempdir() do tmp
+        old_loadpath = copy(LOAD_PATH)
+        try
+            push!(LOAD_PATH, tmp)
+            write(joinpath(tmp, "BadCase.jl"), "module badcase end")
+            @test_throws ErrorException("package `BadCase` did not define the expected module `BadCase`, \
+                                        check for typos in package module name") (@eval using BadCase)
+        finally
+            copy!(LOAD_PATH, old_loadpath)
+        end
+    end
+end
diff --git a/test/math.jl b/test/math.jl
index 67522b9be4c7b..664a1f637314c 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -53,6 +53,11 @@ end
     @test occursin("3.14159", sprint(show, MIME"text/plain"(), π))
     @test repr(Any[pi ℯ; ℯ pi]) == "Any[π ℯ; ℯ π]"
     @test string(pi) == "π"
+
+    @test sin(π) === sinpi(1) == tan(π) == sinpi(1 // 1) == 0
+    @test cos(π) === cospi(1) == sec(π) == cospi(1 // 1) == -1
+    @test csc(π) == 1/0 && cot(π) == -1/0
+    @test sincos(π) === sincospi(1) == (0, -1)
 end
 
 @testset "frexp,ldexp,significand,exponent" begin
@@ -141,14 +146,18 @@ end
 # We compare to BigFloat instead of hard-coding
 # values, assuming that BigFloat has an independently tested implementation.
 @testset "basic math functions" begin
-    @testset "$T" for T in (Float32, Float64)
+    @testset "$T" for T in (Float16, Float32, Float64)
         x = T(1//3)
         y = T(1//2)
         yi = 4
         @testset "Random values" begin
-            @test x^y ≈ big(x)^big(y)
+            @test x^y === T(big(x)^big(y))
             @test x^1 === x
-            @test x^yi ≈ big(x)^yi
+            @test x^yi === T(big(x)^yi)
+            # test for large negative exponent where error compensation matters
+            @test 0.9999999955206014^-1.0e8 == 1.565084574870928
+            @test (-x)^yi == x^yi
+            @test (-x)^(yi+1) == -(x^(yi+1))
             @test acos(x) ≈ acos(big(x))
             @test acosh(1+x) ≈ acosh(big(1+x))
             @test asin(x) ≈ asin(big(x))
@@ -192,7 +201,7 @@ end
             @test atan(T(1),T(1)) ≈ T(pi)/4 atol=eps(T)
             @test isequal(cbrt(T(0)), T(0))
             @test isequal(cbrt(T(1)), T(1))
-            @test isequal(cbrt(T(1000000000)), T(1000))
+            @test isequal(cbrt(T(1000000000))^3, T(1000)^3)
             @test isequal(cos(T(0)), T(1))
             @test cos(T(pi)/2) ≈ T(0) atol=eps(T)
             @test isequal(cos(T(pi)), T(-1))
@@ -221,7 +230,7 @@ end
             @test sin(T(pi)) ≈ T(0) atol=eps(T)
             @test isequal(sqrt(T(0)), T(0))
             @test isequal(sqrt(T(1)), T(1))
-            @test isequal(sqrt(T(100000000)), T(10000))
+            @test isequal(sqrt(T(100000000))^2, T(10000)^2)
             @test isequal(tan(T(0)), T(0))
             @test tan(T(pi)/4) ≈ T(1) atol=eps(T)
             @test isequal(sec(T(pi)), -one(T))
@@ -230,6 +239,14 @@ end
             @test isequal(cscd(T(90)), one(T))
             @test isequal(sech(log(one(T))), one(T))
             @test isequal(csch(zero(T)), T(Inf))
+            @test zero(T)^y === zero(T)
+            @test zero(T)^zero(T) === one(T)
+            @test zero(T)^(-y) === T(Inf)
+            @test zero(T)^T(NaN) === T(NaN)
+            @test one(T)^y === one(T)
+            @test one(T)^zero(T) === one(T)
+            @test one(T)^T(NaN) === one(T)
+            @test isnan(T(NaN)^T(-.5))
         end
         @testset "Inverses" begin
             @test acos(cos(x)) ≈ x
@@ -297,47 +314,32 @@ end
     end
 end
 
-@testset "exp function" for T in (Float64, Float32)
-    @testset "$T accuracy" begin
-        X = map(T, vcat(-10:0.0002:10, -80:0.001:80, 2.0^-27, 2.0^-28, 2.0^-14, 2.0^-13))
-        for x in X
-            y, yb = exp(x), exp(big(x))
-            @test abs(y-yb) <= 1.0*eps(T(yb))
-        end
-    end
-    @testset "$T edge cases" begin
-        @test isnan_type(T, exp(T(NaN)))
-        @test exp(T(-Inf)) === T(0.0)
-        @test exp(T(Inf)) === T(Inf)
-        @test exp(T(0.0)) === T(1.0) # exact
-        @test exp(T(5000.0)) === T(Inf)
-        @test exp(T(-5000.0)) === T(0.0)
-    end
-end
-
-@testset "exp10 function" begin
-    @testset "accuracy" begin
-        X = map(Float64, vcat(-10:0.00021:10, -35:0.0023:100, -300:0.001:300))
-        for x in X
-            y, yb = exp10(x), exp10(big(x))
-            @test abs(y-yb) <= 1.2*eps(Float64(yb))
+@testset "exponential functions" for T in (Float64, Float32, Float16)
+    for (func, invfunc) in ((exp2, log2), (exp, log), (exp10, log10))
+        @testset "$T $func accuracy" begin
+            minval, maxval = invfunc(floatmin(T)),prevfloat(invfunc(floatmax(T)))
+            # Test range and extensively test numbers near 0.
+            X = Iterators.flatten((minval:T(.1):maxval,
+                                   minval/100:T(.0021):maxval/100,
+                                   minval/10000:T(.000021):maxval/10000,
+                                   nextfloat(zero(T)),
+                                   T(-100):T(1):T(100) ))
+            for x in X
+                y, yb = func(x), func(widen(x))
+                if isfinite(eps(T(yb)))
+                    @test abs(y-yb) <= 1.2*eps(T(yb))
+                end
+            end
         end
-        X = map(Float32, vcat(-10:0.00021:10, -35:0.0023:35, -35:0.001:35))
-        for x in X
-            y, yb = exp10(x), exp10(big(x))
-            @test abs(y-yb) <= 1.2*eps(Float32(yb))
+        @testset "$T $func edge cases" begin
+            @test func(T(-Inf)) === T(0.0)
+            @test func(T(Inf)) === T(Inf)
+            @test func(T(NaN)) === T(NaN)
+            @test func(T(0.0)) === T(1.0) # exact
+            @test func(T(5000.0)) === T(Inf)
+            @test func(T(-5000.0)) === T(0.0)
         end
     end
-    @testset "$T edge cases" for T in (Float64, Float32)
-        @test isnan_type(T, exp10(T(NaN)))
-        @test exp10(T(-Inf)) === T(0.0)
-        @test exp10(T(Inf)) === T(Inf)
-        @test exp10(T(0.0)) === T(1.0) # exact
-        @test exp10(T(1.0)) === T(10.0)
-        @test exp10(T(3.0)) === T(1000.0)
-        @test exp10(T(5000.0)) === T(Inf)
-        @test exp10(T(-5000.0)) === T(0.0)
-    end
 end
 
 @testset "test abstractarray trig functions" begin
@@ -652,14 +654,14 @@ end
 end
 
 @testset "modf" begin
-    @testset "$elty" for elty in (Float16, Float32, Float64)
-        @test modf( convert(elty,1.2) )[1] ≈ convert(elty,0.2)
-        @test modf( convert(elty,1.2) )[2] ≈ convert(elty,1.0)
-        @test modf( convert(elty,1.0) )[1] ≈ convert(elty,0.0)
-        @test modf( convert(elty,1.0) )[2] ≈ convert(elty,1.0)
-        @test isequal(modf( convert(elty,-Inf) ), (-0.0, -Inf))
-        @test isequal(modf( convert(elty,Inf) ), (0.0, Inf))
-        @test isequal(modf( convert(elty,NaN) ), (NaN, NaN))
+    @testset "$T" for T in (Float16, Float32, Float64)
+        @test modf(T(1.25)) === (T(0.25), T(1.0))
+        @test modf(T(1.0))  === (T(0.0), T(1.0))
+        @test modf(T(-Inf)) === (T(-0.0), T(-Inf))
+        @test modf(T(Inf))  === (T(0.0), T(Inf))
+        @test modf(T(NaN))  === (T(NaN), T(NaN))
+        @test modf(T(-0.0)) === (T(-0.0), T(-0.0))
+        @test modf(T(-1.0)) === (T(-0.0), T(-1.0))
     end
 end
 
@@ -1285,3 +1287,31 @@ end
         @test_throws MethodError f(x)
     end
 end
+
+@testset "fma" begin
+    if !(@static Sys.iswindows() && Int===Int64) # windows fma currently seems broken somehow.
+        for func in (fma, Base.fma_emulated)
+            @test func(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16
+            @test func(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7
+            @testset "$T" for T in (Float32, Float64)
+                @test func(floatmax(T), T(2), -floatmax(T)) === floatmax(T)
+                @test func(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf)
+                @test func(T(Inf), T(Inf), T(Inf)) === T(Inf)
+                @test func(floatmax(T), floatmax(T), -T(Inf)) === -T(Inf)
+                @test func(floatmax(T), -floatmax(T), T(Inf)) === T(Inf)
+                @test isnan_type(T, func(T(Inf), T(1), -T(Inf)))
+                @test isnan_type(T, func(T(Inf), T(0), -T(0)))
+                @test func(-zero(T), zero(T), -zero(T)) === -zero(T)
+                for _ in 1:2^18
+                    a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3))
+                    @test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c)
+                end
+            end
+            @test func(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292
+            @test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31
+            @test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf
+            @test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf
+            @test func(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6
+        end
+    end
+end
diff --git a/test/misc.jl b/test/misc.jl
index e765dc9279b86..9a7c20d880ff5 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -242,6 +242,35 @@ v11801, t11801 = @timed sin(1)
 
 @test names(@__MODULE__, all = true) == names_before_timing
 
+# Accepted @time argument formats
+@test @time true
+@test @time "message" true
+let msg = "message"
+    @test @time msg true
+end
+let foo() = "message"
+    @test @time foo() true
+end
+
+# Accepted @timev argument formats
+@test @timev true
+@test @timev "message" true
+let msg = "message"
+    @test @timev msg true
+end
+let foo() = "message"
+    @test @timev foo() true
+end
+
+# @showtime
+@test @showtime true
+let foo() = true
+    @test @showtime foo()
+end
+let foo() = false
+    @test (@showtime foo()) == false
+end
+
 # PR #39133, ensure that @time evaluates in the same scope
 function time_macro_scope()
     try # try/throw/catch bypasses printing
@@ -263,6 +292,22 @@ function timev_macro_scope()
 end
 @test timev_macro_scope() == 1
 
+before = Base.cumulative_compile_time_ns_before();
+
+# exercise concurrent calls to `@time` for reentrant compilation time measurement.
+t1 = @async @time begin
+    sleep(2)
+    @eval module M ; f(x,y) = x+y ; end
+    @eval M.f(2,3)
+end
+t2 = @async begin
+    sleep(1)
+    @time 2 + 2
+end
+
+after = Base.cumulative_compile_time_ns_after();
+@test after >= before;
+
 # interactive utilities
 
 struct ambigconvert; end # inject a problematic `convert` method to ensure it still works
@@ -992,11 +1037,30 @@ end
 # Test that read fault on a prot-none region does not incorrectly give
 # ReadOnlyMemoryEror, but rather crashes the program
 const MAP_ANONYMOUS_PRIVATE = Sys.isbsd() ? 0x1002 : 0x22
-let script = :(let ptr = Ptr{Cint}(ccall(:jl_mmap, Ptr{Cvoid},
-    (Ptr{Cvoid}, Csize_t, Cint, Cint, Cint, Int),
-    C_NULL, 16*1024, 0, $MAP_ANONYMOUS_PRIVATE, -1, 0)); try
-    unsafe_load(ptr)
-    catch e; println(e) end; end)
-    @test !success(`$(Base.julia_cmd()) -e $script`)
+let script = :(
+        let ptr = Ptr{Cint}(ccall(:jl_mmap, Ptr{Cvoid},
+                                  (Ptr{Cvoid}, Csize_t, Cint, Cint, Cint, Int),
+                                  C_NULL, 16*1024, 0, $MAP_ANONYMOUS_PRIVATE, -1, 0))
+            try
+                unsafe_load(ptr)
+            catch e
+                println(e)
+            end
+        end
+    )
+    cmd = if Sys.isunix()
+        # Set the maximum core dump size to 0 to keep this expected crash from
+        # producing a (and potentially overwriting an existing) core dump file
+        `sh -c "ulimit -c 0; $(Base.shell_escape(Base.julia_cmd())) -e '$script'"`
+    else
+        `$(Base.julia_cmd()) -e '$script'`
+    end
+    @test !success(cmd)
 end
 
+# issue #41656
+@test success(`$(Base.julia_cmd()) -e 'isempty(x) = true'`)
+
+@testset "Base/timing.jl" begin
+    @test Base.jit_total_bytes() >= 0
+end
diff --git a/test/missing.jl b/test/missing.jl
index e1042f76fc7a7..0be8cb8ec9be4 100644
--- a/test/missing.jl
+++ b/test/missing.jl
@@ -465,10 +465,10 @@ end
             @test_throws BoundsError x[3, 1]
             @test findfirst(==(2), x) === nothing
             @test isempty(findall(==(2), x))
-            @test_throws ArgumentError argmin(x)
-            @test_throws ArgumentError findmin(x)
-            @test_throws ArgumentError argmax(x)
-            @test_throws ArgumentError findmax(x)
+            @test_throws "reducing over an empty collection is not allowed" argmin(x)
+            @test_throws "reducing over an empty collection is not allowed" findmin(x)
+            @test_throws "reducing over an empty collection is not allowed" argmax(x)
+            @test_throws "reducing over an empty collection is not allowed" findmax(x)
         end
     end
 
@@ -525,8 +525,8 @@ end
         for n in 0:3
             itr = skipmissing(Vector{Union{Int,Missing}}(fill(missing, n)))
             @test sum(itr) == reduce(+, itr) == mapreduce(identity, +, itr) === 0
-            @test_throws ArgumentError reduce(x -> x/2, itr)
-            @test_throws ArgumentError mapreduce(x -> x/2, +, itr)
+            @test_throws "reducing over an empty collection is not allowed" reduce(x -> x/2, itr)
+            @test_throws "reducing over an empty collection is not allowed" mapreduce(x -> x/2, +, itr)
         end
 
         # issue #35504
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 0b6ff30e372dc..a1039a7c5a810 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -475,7 +475,8 @@ end
     @test precision(z) == 240
     x = BigFloat(12)
     @test precision(x) == old_precision
-    @test_throws DomainError setprecision(1)
+    @test precision(setprecision(1) do; BigFloat(23); end) == 1  # minimum-precision
+    @test_throws DomainError setprecision(0)
     @test_throws DomainError BigFloat(1, precision = 0)
     @test_throws DomainError BigFloat(big(1.1), precision = 0)
     @test_throws DomainError BigFloat(2.5, precision = -900)
@@ -1021,3 +1022,16 @@ end
         @test big(typeof(complex(x, x))) == typeof(big(complex(x, x)))
     end
 end
+
+@testset "precision base" begin
+    setprecision(53) do
+        @test precision(Float64, base=10) == precision(BigFloat, base=10) == 15
+    end
+    for (p, b) in ((100,10), (50,100))
+        setprecision(p, base=b) do
+            @test precision(BigFloat, base=10) == 100
+            @test precision(BigFloat, base=100) == 50
+            @test precision(BigFloat) == precision(BigFloat, base=2) == 333
+        end
+    end
+end
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index 82bbc26349a7a..3298a1c7a2562 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -330,3 +330,6 @@ end
 # issue #37926
 @test nextind((a=1,), 1) == nextind((1,), 1) == 2
 @test prevind((a=1,), 2) == prevind((1,), 2) == 1
+
+# issue #43045
+@test merge(NamedTuple(), Iterators.reverse(pairs((a=1,b=2)))) === (b = 2, a = 1)
diff --git a/test/numbers.jl b/test/numbers.jl
index b5fbd7348f860..4875de7fc3bb2 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -2523,6 +2523,17 @@ end
     @test rem(T(-1.5), T(2), RoundNearest) == 0.5
     @test rem(T(-1.5), T(2), RoundDown)    == 0.5
     @test rem(T(-1.5), T(2), RoundUp)      == -1.5
+    for mode in [RoundToZero, RoundNearest, RoundDown, RoundUp]
+        @test isnan(rem(T(1), T(0), mode))
+        @test isnan(rem(T(Inf), T(2), mode))
+        @test isnan(rem(T(1), T(NaN), mode))
+        # FIXME: The broken case erroneously returns -Inf
+        @test rem(T(4), floatmin(T) * 2, mode) == 0 broken=(T == BigFloat && mode == RoundUp)
+    end
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundToZero),  -0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundNearest), -0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundDown),    0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundUp),      0.0)
 end
 
 @testset "rem for $T RoundNearest" for T in (Int8, Int16, Int32, Int64, Int128)
@@ -2538,6 +2549,41 @@ end
     end
 end
 
+@testset "divrem rounded" begin
+    #rounded Floats
+    for T in (Float16, Float32, Float64, BigFloat)
+        @test divrem(T(1.5), T(2), RoundToZero)[2]  == 1.5
+        @test divrem(T(1.5), T(2), RoundNearest)[2] == -0.5
+        @test divrem(T(1.5), T(2), RoundDown)[2]    == 1.5
+        @test divrem(T(1.5), T(2), RoundUp)[2]      == -0.5
+        @test divrem(T(-1.5), T(2), RoundToZero)[2]  == -1.5
+        @test divrem(T(-1.5), T(2), RoundNearest)[2] == 0.5
+        @test divrem(T(-1.5), T(2), RoundDown)[2]    == 0.5
+        @test divrem(T(-1.5), T(2), RoundUp)[2]      == -1.5
+    end
+    #rounded Integers
+    for (a, b) in (
+            (3, 2),
+            (5, 3),
+            (-3, 2),
+            (5, 2),
+            (-5, 2),
+            (-5, 3),
+            (5, -3))
+        for sign in (+1, -1)
+            (a, b) = (a*sign, b*sign)
+            @test divrem(a, b, RoundNearest) == (div(a, b, RoundNearest),rem(a, b, RoundNearest))
+        end
+    end
+
+    a = 122322388883338838388383888823233122323
+    b = 343443
+    c = 122322388883338838388383888823233122333
+    @test divrem(a, b) == (div(a,b), rem(a,b))
+    @test divrem(a, c) == (div(a,c), rem(a,c))
+    @test divrem(a,-(a-20), RoundDown) == (div(a,-(a-20), RoundDown), rem(a,-(a-20), RoundDown))
+end
+
 @testset "rem2pi $T" for T in (Float16, Float32, Float64, BigFloat)
     @test rem2pi(T(1), RoundToZero)  == 1
     @test rem2pi(T(1), RoundNearest) == 1
@@ -2641,6 +2687,10 @@ end
     @test !isone(triu(fill(1, 5, 5)))
     @test !isone(zeros(Int, 5, 5))
     @test isone(Matrix(1I, 5, 5))
+    @test !isone(view(rand(5,5), [1,3,4], :))
+    Dv = view(Diagonal([1,1, 1]), [1,2], 1:2)
+    @test isone(Dv)
+    @test (@allocated isone(Dv)) == 0
     @test isone(Matrix(1I, 1000, 1000)) # sizeof(X) > 2M == ISONE_CUTOFF
 end
 
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index 1c7985c37256b..7621e14013627 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -801,3 +801,24 @@ end
     @test reshape(a, :) === a
     @test reshape(a, (:,)) === a
 end
+
+@testset "issue #41630: replace_ref_begin_end!/@view on offset-like arrays" begin
+    x = OffsetArray([1 2; 3 4], -10:-9, 9:10)  # 2×2 OffsetArray{...} with indices -10:-9×9:10
+
+    # begin/end with offset indices
+    @test (@view x[begin, 9])[] == 1
+    @test (@view x[-10, end])[] == 2
+    @test (@view x[-9, begin])[] == 3
+    @test (@view x[end, 10])[] == 4
+    @test (@view x[begin, begin])[] == 1
+    @test (@view x[begin, end])[] == 2
+    @test (@view x[end, begin])[] == 3
+    @test (@view x[end, end])[] == 4
+
+    # nested usages of begin/end
+    y = OffsetArray([-10, -9], (5,))
+    @test (@view x[begin, -y[end]])[] == 1
+    @test (@view x[y[begin], end])[] == 2
+    @test (@view x[end, -y[end]])[] == 3
+    @test (@view x[y[end], end])[] == 4
+end
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index 796aac7072c60..ddea4f974ccf1 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -192,7 +192,7 @@ end
 
 # OpaqueClosure ABI
 f_oc_noinline(x) = @opaque function (y)
-    @Base._noinline_meta
+    @noinline
     x + y
 end
 
@@ -206,3 +206,8 @@ end
 @test f_oc_noinline_call(1, 2) == 3
 
 @test_throws MethodError (@opaque x->x+1)(1, 2)
+
+# https://github.com/JuliaLang/julia/issues/40409
+const GLOBAL_OPAQUE_CLOSURE = @opaque () -> 123
+call_global_opaque_closure() = GLOBAL_OPAQUE_CLOSURE()
+@test call_global_opaque_closure() == 123
diff --git a/test/operators.jl b/test/operators.jl
index d07f3382f53a5..97edebc0ea6f3 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -81,6 +81,14 @@ import Base.<
 @test isequal(minmax(TO23094(2), TO23094(1))[1], TO23094(1))
 @test isequal(minmax(TO23094(2), TO23094(1))[2], TO23094(2))
 
+let m = Module()
+    @eval m begin
+        struct Foo end
+        foo(xs) = isequal(xs[1], Foo())
+    end
+    @test !(@inferred m.foo(Any[42]))
+end
+
 @test isless('a','b')
 
 @testset "isgreater" begin
diff --git a/test/ordering.jl b/test/ordering.jl
index 9c9d0277c3db4..547d8d8dd0e8b 100644
--- a/test/ordering.jl
+++ b/test/ordering.jl
@@ -38,3 +38,5 @@ struct SomeOtherOrder <: Base.Order.Ordering end
 
 @test_throws ErrorException sort([1, 2, 3], lt=(a, b) -> a - b < 0, order=SomeOtherOrder())
 
+@test reverse(Forward) === Reverse
+@test reverse(Reverse) === Forward
diff --git a/test/parse.jl b/test/parse.jl
index 2deeecd516f2a..ae07936b3a18e 100644
--- a/test/parse.jl
+++ b/test/parse.jl
@@ -236,6 +236,13 @@ end
     @test_throws ArgumentError parse(Int, "2", base = 63)
 end
 
+@testset "issue #42616" begin
+    @test tryparse(Bool, "") === nothing
+    @test tryparse(Bool, " ") === nothing
+    @test_throws ArgumentError parse(Bool, "")
+    @test_throws ArgumentError parse(Bool, " ")
+end
+
 # issue #17333: tryparse should still throw on invalid base
 for T in (Int32, BigInt), base in (0,1,100)
     @test_throws ArgumentError tryparse(T, "0", base = base)
diff --git a/test/precompile.jl b/test/precompile.jl
index f69e7a4766f2c..999bd07c9e12b 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -823,6 +823,10 @@ precompile_test_harness("Issue #25971") do load_path
     chmod(sourcefile, 0o600)
     cachefile = Base.compilecache(Base.PkgId("Foo25971"))
     @test filemode(sourcefile) == filemode(cachefile)
+    chmod(sourcefile, 0o444)
+    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    # Check writable
+    @test touch(cachefile) == cachefile
 end
 
 precompile_test_harness("Issue #38312") do load_path
@@ -871,3 +875,47 @@ precompile_test_harness("Renamed Imports") do load_path
     Base.compilecache(Base.PkgId("RenameImports"))
     @test (@eval (using RenameImports; RenameImports.test())) isa Module
 end
+
+# issue #41872 (example from #38983)
+precompile_test_harness("No external edges") do load_path
+    write(joinpath(load_path, "NoExternalEdges.jl"),
+          """
+          module NoExternalEdges
+          bar(x::Int) = hcat(rand())
+          @inline bar() = hcat(rand())
+          bar(x::Float64) = bar()
+          foo1() = bar(1)
+          foo2() = bar(1.0)
+          foo3() = bar()
+          foo4() = hcat(rand())
+          precompile(foo1, ())
+          precompile(foo2, ())
+          precompile(foo3, ())
+          precompile(foo4, ())
+          end
+          """)
+    Base.compilecache(Base.PkgId("NoExternalEdges"))
+    @eval begin
+        using NoExternalEdges
+        @test only(methods(NoExternalEdges.foo1)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo2)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo3)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo4)).specializations[1].cache.max_world != 0
+    end
+end
+
+@testset "issue 38149" begin
+    M = Module()
+    @eval M begin
+        @nospecialize
+        f(x, y) = x + y
+        f(x::Int, y) = 2x + y
+    end
+    precompile(M.f, (Int, Any))
+    precompile(M.f, (AbstractFloat, Any))
+    mis = map(methods(M.f)) do m
+        m.specializations[1]
+    end
+    @test any(mi -> mi.specTypes.parameters[2] === Any, mis)
+    @test all(mi -> isa(mi.cache, Core.CodeInstance), mis)
+end
diff --git a/test/profile_spawnmany_exec.jl b/test/profile_spawnmany_exec.jl
new file mode 100644
index 0000000000000..a061de40d5172
--- /dev/null
+++ b/test/profile_spawnmany_exec.jl
@@ -0,0 +1,14 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Profile
+
+function spawnmany(n)
+    if n > 2
+        m = n ÷ 2
+        t = Threads.@spawn spawnmany(m)
+        spawnmany(m)
+        wait(t)
+    end
+end
+
+@profile spawnmany(parse(Int, get(ENV, "NTASKS", "2000000")))
diff --git a/test/ranges.jl b/test/ranges.jl
index b622fc05b3ca6..b9ac8a7899977 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -20,6 +20,20 @@ using Base.Checked: checked_length
     # the next ones use ==, because it changes the eltype
     @test r ==  range(first(r),       last(r),      length(r)       )
     @test r ==  range(start=first(r), stop=last(r), length=length(r))
+    @test r === range(                stop=last(r), length=length(r))
+
+    r = 1:5
+    o = Base.OneTo(5)
+    let start=first(r), step=step(r), stop=last(r), length=length(r)
+        @test o === range(;              stop        )
+        @test o === range(;                    length)
+        @test r === range(; start,       stop        )
+        @test r === range(;              stop, length)
+        # the next three lines uses ==, because it changes the eltype
+        @test r ==  range(; start,       stop, length)
+        @test r ==  range(; start, step,       length)
+        @test r ==  range(; stop=Float64(stop))
+    end
 
     for T = (Int8, Rational{Int16}, UInt32, Float64, Char)
         @test typeof(range(start=T(5), length=3)) === typeof(range(stop=T(5), length=3))
@@ -334,6 +348,7 @@ end
         end
     end
     @testset "findfirst" begin
+        @test findfirst(==(1), Base.IdentityUnitRange(-1:1)) == 1
         @test findfirst(isequal(3), Base.OneTo(10)) == 3
         @test findfirst(==(0), Base.OneTo(10)) == nothing
         @test findfirst(==(11), Base.OneTo(10)) == nothing
@@ -403,6 +418,9 @@ end
         @test intersect(1:3, 2) === intersect(2, 1:3) === 2:2
         @test intersect(1.0:3.0, 2) == intersect(2, 1.0:3.0) == [2.0]
 
+        @test intersect(1:typemax(Int), [1, 3]) == [1, 3]
+        @test intersect([1, 3], 1:typemax(Int)) == [1, 3]
+
         @testset "Support StepRange with a non-numeric step" begin
             start = Date(1914, 7, 28)
             stop = Date(1918, 11, 11)
@@ -412,6 +430,21 @@ end
             @test intersect(start-Day(10):Day(1):stop-Day(10), start:Day(5):stop) ==
                 start:Day(5):stop-Day(10)-mod(stop-start, Day(5))
         end
+
+        @testset "Two AbstractRanges" begin
+            struct DummyRange{T} <: AbstractRange{T}
+                r
+            end
+            Base.iterate(dr::DummyRange) = iterate(dr.r)
+            Base.iterate(dr::DummyRange, state) = iterate(dr.r, state)
+            Base.length(dr::DummyRange) = length(dr.r)
+            Base.in(x::Int, dr::DummyRange) = in(x, dr.r)
+            Base.unique(dr::DummyRange) = unique(dr.r)
+            r1 = DummyRange{Int}([1, 2, 3, 3, 4, 5])
+            r2 = DummyRange{Int}([3, 3, 4, 5, 6])
+            @test intersect(r1, r2) == [3, 4, 5]
+            @test intersect(r2, r1) == [3, 4, 5]
+        end
     end
     @testset "issubset" begin
         @test issubset(1:3, 1:typemax(Int)) #32461
@@ -544,6 +577,21 @@ end
     end
 end
 
+# A number type with the overflow behavior of `UInt8`. Conversion to `Integer` returns an
+# `Int32`, i.e., a type with different `typemin`/`typemax`. See  #41479
+struct OverflowingReal <: Real
+    val::UInt8
+end
+OverflowingReal(x::OverflowingReal) = x
+Base.:<(x::OverflowingReal, y::OverflowingReal) = x.val < y.val
+Base.:(==)(x::OverflowingReal, y::OverflowingReal) = x.val == y.val
+Base.:<=(x::OverflowingReal, y::OverflowingReal) = x.val <= y.val
+Base.:+(x::OverflowingReal, y::OverflowingReal) = OverflowingReal(x.val + y.val)
+Base.:-(x::OverflowingReal, y::OverflowingReal) = OverflowingReal(x.val - y.val)
+Base.round(x::OverflowingReal, ::RoundingMode) = x
+Base.Integer(x::OverflowingReal) = Int32(x.val)
+@test length(OverflowingReal(1):OverflowingReal(0)) == 0
+
 @testset "loops involving typemin/typemax" begin
     n = 0
     s = 0
@@ -642,18 +690,18 @@ end
 end
 @testset "broadcasted operations with scalars" for T in (Int, UInt, Int128)
     @test broadcast(-, T(1):3, 2) === T(1)-2:1
-    @test broadcast(-, T(1):3, 0.25) === T(1)-0.25:3-0.25
+    @test broadcast(-, T(1):3, 0.25) === range(T(1)-0.25, length=T(3)) == T(1)-0.25:3-0.25
     @test broadcast(+, T(1):3) === T(1):3
     @test broadcast(+, T(1):3, 2) === T(3):5
-    @test broadcast(+, T(1):3, 0.25) === T(1)+0.25:3+0.25
+    @test broadcast(+, T(1):3, 0.25) === range(T(1)+0.25, length=T(3)) == T(1)+0.25:3+0.25
     @test broadcast(+, T(1):2:6, 1) === T(2):2:6
-    @test broadcast(+, T(1):2:6, 0.3) === T(1)+0.3:2:5+0.3
+    @test broadcast(+, T(1):2:6, 0.3) === range(T(1)+0.3, step=2, length=T(3)) == T(1)+0.3:2:5+0.3
     @test broadcast(-, T(1):2:6, 1) === T(0):2:4
-    @test broadcast(-, T(1):2:6, 0.3) === T(1)-0.3:2:5-0.3
+    @test broadcast(-, T(1):2:6, 0.3) === range(T(1)-0.3, step=2, length=T(3)) == T(1)-0.3:2:5-0.3
     is_unsigned = T <: Unsigned
     is_unsigned && @test length(broadcast(-, T(1):3, 2)) === length(T(1)-2:T(3)-2)
-    @test broadcast(-, T(1):3) == -T(1):-1:-T(3) broken=is_unsigned
-    @test broadcast(-, 2, T(1):3) == T(1):-1:-T(1) broken=is_unsigned
+    @test broadcast(-, T(1):3) == -T(1):-T(1):-T(3)
+    @test broadcast(-, 2, T(1):3) == T(1):-T(1):-T(1)
 end
 @testset "operations between ranges and arrays" for T in (Int, UInt, Int128)
     @test all(([T(1):5;] + (T(5):-1:1)) .=== T(6))
@@ -1001,13 +1049,19 @@ end
     @test length(map(identity, UInt64(1):UInt64(5))) == 5
     @test length(map(identity, UInt128(1):UInt128(5))) == 5
 end
-@testset "issue #8531" begin
+@testset "issue #8531, issue #29801" begin
     smallint = (Int === Int64 ?
-                (Int8,UInt8,Int16,UInt16,Int32,UInt32) :
-                (Int8,UInt8,Int16,UInt16))
+                (Int8, UInt8, Int16, UInt16, Int32, UInt32) :
+                (Int8, UInt8, Int16, UInt16))
     for T in smallint
         s = typemin(T):typemax(T)
-        @test length(s) == checked_length(s) == 2^(8*sizeof(T))
+        @test length(s) === checked_length(s) === 2^(8*sizeof(T))
+        s = T(10):typemax(T):T(10)
+        @test length(s) === checked_length(s) === 1
+        s = T(10):typemax(T):T(0)
+        @test length(s) === checked_length(s) === 0
+        s = T(10):typemax(T):typemin(T)
+        @test length(s) === checked_length(s) === 0
     end
 end
 
@@ -1095,7 +1149,7 @@ end
 @testset "issue 10950" begin
     r = 1//2:3
     @test length(r) == 3
-    @test_throws MethodError checked_length(r) == 3 # this would work if checked_sub is defined on Rational
+    @test checked_length(r) == 3
     i = 1
     for x in r
         @test x == i//2
@@ -1108,10 +1162,11 @@ end
     # repr/show should display the range nicely
     # to test print_range in range.jl
     replrepr(x) = repr("text/plain", x; context=IOContext(stdout, :limit=>true, :displaysize=>(24, 80)))
+    nb = Sys.WORD_SIZE
     @test replrepr(1:4) == "1:4"
     @test repr("text/plain", 1:4) == "1:4"
     @test repr("text/plain", range(1, stop=5, length=7)) == "1.0:0.6666666666666666:5.0"
-    @test repr("text/plain", LinRange{Float64}(1,5,7)) == "7-element LinRange{Float64}:\n 1.0,1.66667,2.33333,3.0,3.66667,4.33333,5.0"
+    @test repr("text/plain", LinRange{Float64}(1,5,7)) == "7-element LinRange{Float64, Int$nb}:\n 1.0,1.66667,2.33333,3.0,3.66667,4.33333,5.0"
     @test repr(range(1, stop=5, length=7)) == "1.0:0.6666666666666666:5.0"
     @test repr(LinRange{Float64}(1,5,7)) == "range(1.0, stop=5.0, length=7)"
     @test replrepr(0:100.) == "0.0:1.0:100.0"
@@ -1119,7 +1174,7 @@ end
     # only examines spacing of the left and right edges of the range, sufficient
     # to cover the designated screen size.
     @test replrepr(range(0, stop=100, length=10000)) == "0.0:0.010001000100010001:100.0"
-    @test replrepr(LinRange{Float64}(0,100, 10000)) == "10000-element LinRange{Float64}:\n 0.0,0.010001,0.020002,0.030003,0.040004,…,99.95,99.96,99.97,99.98,99.99,100.0"
+    @test replrepr(LinRange{Float64}(0,100, 10000)) == "10000-element LinRange{Float64, Int$nb}:\n 0.0,0.010001,0.020002,0.030003,0.040004,…,99.95,99.96,99.97,99.98,99.99,100.0"
 
     @test sprint(show, UnitRange(1, 2)) == "1:2"
     @test sprint(show, StepRange(1, 2, 5)) == "1:2:5"
@@ -1443,6 +1498,7 @@ using .Main.Furlongs
 @testset "dimensional correctness" begin
     @test length(Vector(Furlong(2):Furlong(10))) == 9
     @test length(range(Furlong(2), length=9)) == checked_length(range(Furlong(2), length=9)) == 9
+    @test @inferred(length(StepRange(Furlong(2), Furlong(1), Furlong(1)))) == 0
     @test Vector(Furlong(2):Furlong(1):Furlong(10)) == Vector(range(Furlong(2), step=Furlong(1), length=9)) == Furlong.(2:10)
     @test Vector(Furlong(1.0):Furlong(0.5):Furlong(10.0)) ==
           Vector(Furlong(1):Furlong(0.5):Furlong(10)) == Furlong.(1:0.5:10)
@@ -1487,14 +1543,23 @@ end
     @test @inferred(x .\ r) === 0.5:0.5:2.5
 
     @test @inferred(2 .* (r .+ 1) .+ 2) == 6:2:14
+
+    # issue #42291
+    @test length((1:5) .- 1/7) == 5
+    @test length((1:5) .+ -1/7) == 5
+    @test length(-1/7 .+ (1:5)) == 5
 end
 
 @testset "Bad range calls" begin
     @test_throws ArgumentError range(1)
     @test_throws ArgumentError range(nothing)
     @test_throws ArgumentError range(1, step=4)
-    @test_throws ArgumentError range(nothing, length=2)
+    @test_throws ArgumentError range(; step=1, length=6)
+    @test_throws ArgumentError range(; step=2, stop=7.5)
     @test_throws ArgumentError range(1.0, step=0.25, stop=2.0, length=5)
+    @test_throws ArgumentError range(; stop=nothing)
+    @test_throws ArgumentError range(; length=nothing)
+    @test_throws TypeError range(; length=5.5)
 end
 
 @testset "issue #23300#issuecomment-371575548" begin
@@ -1516,6 +1581,8 @@ end
     @test view(1:10, 1:5) === 1:5
     @test view(1:10, 1:2:5) === 1:2:5
     @test view(1:2:9, 1:5) === 1:2:9
+    @test view(1:10, :) === 1:10
+    @test view(1:2:9, :) === 1:2:9
 
     # Ensure we don't hit a fallback `view` if there's a better `getindex` implementation
     vmt = collect(methods(view, Tuple{AbstractRange, AbstractRange}))
@@ -1864,6 +1931,40 @@ end
     @test typeof(step(StepRangeLen(Int8(1), Int8(2), 3, 2))) === Int8
 end
 
+@testset "LinRange eltype for element types that wrap integers" begin
+    struct RealWrapper{T <: Real} <: Real
+        x :: T
+    end
+    Base.promote_rule(::Type{S}, ::Type{RealWrapper{T}}) where {T,S<:Real} = RealWrapper{promote_type(S, T)}
+    Base.:(-)(w::RealWrapper) = RealWrapper(-w.x)
+    for f in [:(+), :(-), :(*), :(/)]
+        @eval Base.$f(w::RealWrapper, y::RealWrapper) = RealWrapper($f(w.x, y.x))
+    end
+    for f in [:(<), :(==), :(<=)]
+        @eval Base.$f(w::RealWrapper, y::RealWrapper) = $f(w.x, y.x)
+    end
+    for T in [:Float32, :Float64]
+        @eval Base.$T(w::RealWrapper) = $T(w.x)
+    end
+    (::Type{RealWrapper{T}})(w::RealWrapper) where {T<:Real} = RealWrapper{T}(T(w.x))
+    (::Type{T})(w::RealWrapper{T}) where {T<:Real} = T(w.x)
+    Base.:(==)(w::RealWrapper, y::RealWrapper) = w.x == y.x
+    Base.isfinite(w::RealWrapper) = isfinite(w.x)
+    Base.signbit(w::RealWrapper) = signbit(w.x)
+
+    x = RealWrapper(2)
+    r1 = range(x, stop = 2x, length = 10)
+    r2 = range(Int(x), stop = Int(2x), length = 10)
+    for i in eachindex(r1, r2)
+        @test r1[i] ≈ r2[i]
+    end
+    r3 = LinRange(x, 2x, 10)
+    r4 = LinRange(x, 2x, 10)
+    for i in eachindex(r3, r4)
+        @test r3[i] ≈ r4[i]
+    end
+end
+
 @testset "Bool indexing of ranges" begin
     @test_throws ArgumentError Base.OneTo(true)
     @test_throws ArgumentError Base.OneTo(true:true:true)
@@ -2045,3 +2146,83 @@ end
         @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
     end
 end
+
+@testset "non 1-based ranges indexing" begin
+    struct ZeroBasedUnitRange{T,A<:AbstractUnitRange{T}} <: AbstractUnitRange{T}
+        a :: A
+        function ZeroBasedUnitRange(a::AbstractUnitRange{T}) where {T}
+            @assert !Base.has_offset_axes(a)
+            new{T, typeof(a)}(a)
+        end
+    end
+
+    Base.parent(A::ZeroBasedUnitRange) = A.a
+    Base.first(A::ZeroBasedUnitRange) = first(parent(A))
+    Base.length(A::ZeroBasedUnitRange) = length(parent(A))
+    Base.last(A::ZeroBasedUnitRange) = last(parent(A))
+    Base.size(A::ZeroBasedUnitRange) = size(parent(A))
+    Base.axes(A::ZeroBasedUnitRange) = map(x -> Base.IdentityUnitRange(0:x-1), size(parent(A)))
+    Base.getindex(A::ZeroBasedUnitRange, i::Int) = parent(A)[i + 1]
+    Base.getindex(A::ZeroBasedUnitRange, i::Integer) = parent(A)[i + 1]
+    Base.firstindex(A::ZeroBasedUnitRange) = 0
+    function Base.show(io::IO, A::ZeroBasedUnitRange)
+        show(io, parent(A))
+        print(io, " with indices $(axes(A,1))")
+    end
+
+    r = ZeroBasedUnitRange(5:8)
+    @test r[0:2] == r[0]:r[2]
+    @test r[0:1:2] == r[0]:1:r[2]
+end
+
+@test length(range(1, 100, length=big(100)^100)) == big(100)^100
+@test length(range(big(1), big(100)^100, length=big(100)^100)) == big(100)^100
+@test length(0 * (1:big(100)^100)) == big(100)^100
+
+@testset "issue #41784" begin
+    # tests `in` when step equals 0
+    # test for Int
+    x = 41784
+    @test (x in StepRangeLen(x, 0, 0)) == false
+    @test (x in StepRangeLen(x, 0, rand(1:100))) == true
+    @test ((x - 1) in StepRangeLen(x, 0, rand(1:100))) == false
+    @test ((x + 1) in StepRangeLen(x, 0, rand(1:100))) == false
+
+    # test for Char
+    x = 'z'
+    @test (x in StepRangeLen(x, 0, 0)) == false
+    @test (x in StepRangeLen(x, 0, rand(1:100))) == true
+    @test ((x - 1) in StepRangeLen(x, 0, rand(1:100))) == false
+    @test ((x + 1) in StepRangeLen(x, 0, rand(1:100))) == false
+end
+
+@testset "issue #42528" begin
+    struct Fix42528 <: Unsigned
+        val::UInt
+    end
+    Fix42528(a::Fix42528) = a
+    Base.:(<)(a::Fix42528, b::Fix42528) = a.val < b.val
+    Base.:(>=)(a::Fix42528, b::Fix42528) = a.val >= b.val
+    Base.:(+)(a::Fix42528, b::Fix42528) = a.val+b.val
+    Base.promote_rule(::Type{Fix42528}, ::Type{<:Unsigned}) = Fix42528
+    Base.show(io::IO, ::MIME"text/plain", a::Fix42528) = print(io, "Fix42528(", a.val, ')')
+    Base.show(io::IO, a::Fix42528) = print(io, "Fix42528(", a.val, ')')
+    function Base.:(-)(a::Fix42528, b::Fix42528)
+        a.val < b.val && throw(DomainError("Can't subtract, result outside of domain"))
+        return a.val - b.val
+    end
+    Base.one(::Type{Fix42528}) = Fix42528(0x1)
+    @test Fix42528(0x0):Fix42528(0x1) == [Fix42528(0x0), Fix42528(0x01)]
+    @test iszero(length(Fix42528(0x1):Fix42528(0x0)))
+    @test_throws DomainError Fix42528(0x0) - Fix42528(0x1)
+end
+
+let r = Ptr{Cvoid}(20):-UInt(2):Ptr{Cvoid}(10)
+    @test isempty(r)
+    @test length(r) == 0
+    @test count(i -> true, r) == 0
+    @test isempty(collect(r))
+    @test first(r) === Ptr{Cvoid}(20)
+    @test step(r) === -UInt(2)
+    @test last(r) === Ptr{Cvoid}(10)
+end
diff --git a/test/rational.jl b/test/rational.jl
index a329a1ac5f93d..1618156212af7 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -630,3 +630,11 @@ end
     A=Rational[1 1 1; 2 2 2; 3 3 3]
     @test @inferred(A*A) isa Matrix{Rational}
 end
+
+@testset "issue #42560" begin
+    @test rationalize(0.5 + 0.5im) == 1//2 + 1//2*im
+    @test rationalize(float(pi)im) == 0//1 + 165707065//52746197*im
+    @test rationalize(Int8, float(pi)im) == 0//1 + 22//7*im
+    @test rationalize(1.192 + 2.233im) == 149//125 + 2233//1000*im
+    @test rationalize(Int8, 1.192 + 2.233im) == 118//99 + 67//30*im
+end
diff --git a/test/read.jl b/test/read.jl
index 78ecded83c80a..81ee7fea21fba 100644
--- a/test/read.jl
+++ b/test/read.jl
@@ -461,7 +461,7 @@ rm(f)
 io = Base.Filesystem.open(f, Base.Filesystem.JL_O_WRONLY | Base.Filesystem.JL_O_CREAT | Base.Filesystem.JL_O_EXCL, 0o000)
 @test write(io, "abc") == 3
 close(io)
-if !Sys.iswindows() && get(ENV, "USER", "") != "root" && get(ENV, "HOME", "") != "/root"
+if !Sys.iswindows() && Libc.geteuid() != 0 # root user
     # msvcrt _wchmod documentation states that all files are readable,
     # so we don't test that it correctly set the umask on windows
     @test_throws SystemError open(f)
@@ -511,7 +511,7 @@ close(f1)
 close(f2)
 @test eof(f1)
 @test_throws Base.IOError eof(f2)
-if get(ENV, "USER", "") != "root" && get(ENV, "HOME", "") != "/root"
+if Libc.geteuid() != 0 # root user
     @test_throws SystemError open(f, "r+")
     @test_throws Base.IOError Base.Filesystem.open(f, Base.Filesystem.JL_O_RDWR)
 else
diff --git a/test/reduce.jl b/test/reduce.jl
index 1e136af11b68a..78ac22c13f366 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -49,8 +49,8 @@ end
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1:5; init=1000) == (1000 + 1 + 2 + 3 + 4 + 5)
 @test reduce(+, 1) == 1
-@test_throws ArgumentError reduce(*, ())
-@test_throws ArgumentError reduce(*, Union{}[])
+@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, ())
+@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, Union{}[])
 
 # mapreduce
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
@@ -87,8 +87,10 @@ end
 @test mapreduce(abs2, *, Float64[]) === 1.0
 @test mapreduce(abs2, max, Float64[]) === 0.0
 @test mapreduce(abs, max, Float64[]) === 0.0
-@test_throws ArgumentError mapreduce(abs2, &, Float64[])
-@test_throws ArgumentError mapreduce(abs2, |, Float64[])
+@test_throws ["reducing over an empty collection is not allowed",
+              "consider supplying `init`"] mapreduce(abs2, &, Float64[])
+@test_throws str -> !occursin("Closest candidates are", str) mapreduce(abs2, &, Float64[])
+@test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, |, Float64[])
 
 # mapreduce() type stability
 @test typeof(mapreduce(*, +, Int8[10])) ===
@@ -138,8 +140,9 @@ fz = float(z)
 @test sum(z) === 136
 @test sum(fz) === 136.0
 
-@test_throws ArgumentError sum(Union{}[])
-@test_throws ArgumentError sum(sin, Int[])
+@test_throws "reducing with add_sum over an empty collection of element type Union{} is not allowed" sum(Union{}[])
+@test_throws ["reducing over an empty collection is not allowed",
+              "consider supplying `init`"] sum(sin, Int[])
 @test sum(sin, 3) == sin(3.0)
 @test sum(sin, [3]) == sin(3.0)
 a = sum(sin, z)
@@ -170,7 +173,7 @@ for f in (sum2, sum5, sum6, sum9, sum10)
 end
 for f in (sum3, sum4, sum7, sum8)
     @test sum(z) == f(z)
-    @test_throws ArgumentError f(Int[])
+    @test_throws "reducing over an empty" f(Int[])
     @test sum(Int[7]) == f(Int[7]) == 7
 end
 @test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
@@ -239,8 +242,8 @@ prod2(itr) = invoke(prod, Tuple{Any}, itr)
 
 # maximum & minimum & extrema
 
-@test_throws ArgumentError maximum(Int[])
-@test_throws ArgumentError minimum(Int[])
+@test_throws "reducing over an empty" maximum(Int[])
+@test_throws "reducing over an empty" minimum(Int[])
 
 @test maximum(Int[]; init=-1) == -1
 @test minimum(Int[]; init=-1) == -1
@@ -594,14 +597,22 @@ end
 # issue #18695
 test18695(r) = sum( t^2 for t in r )
 @test @inferred(test18695([1.0,2.0,3.0,4.0])) == 30.0
-@test_throws ArgumentError test18695(Any[])
+@test_throws str -> ( occursin("reducing over an empty", str) &&
+                      occursin("consider supplying `init`", str) &&
+                     !occursin("or defining", str)) test18695(Any[])
+
+# For Core.IntrinsicFunction
+@test_throws str -> ( occursin("reducing over an empty", str) &&
+                      occursin("consider supplying `init`", str) &&
+                     !occursin("or defining", str)) reduce(Base.xor_int, Int[])
 
 # issue #21107
 @test foldr(-,2:2) == 2
 
 # test neutral element not picked incorrectly for &, |
 @test @inferred(foldl(&, Int[1])) === 1
-@test_throws ArgumentError foldl(&, Int[])
+@test_throws ["reducing over an empty",
+              "consider supplying `init`"] foldl(&, Int[])
 
 # prod on Chars
 @test prod(Char[]) == ""
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 93287efc5eb1c..f009a2384ca51 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -90,7 +90,7 @@ end
 
 # Combining dims and init
 A = Array{Int}(undef, 0, 3)
-@test_throws ArgumentError maximum(A; dims=1)
+@test_throws "reducing over an empty collection is not allowed" maximum(A; dims=1)
 @test maximum(A; dims=1, init=-1) == reshape([-1,-1,-1], 1, 3)
 
 # Test reduction along first dimension; this is special-cased for
@@ -169,8 +169,9 @@ end
     A = Matrix{Int}(undef, 0,1)
     @test sum(A) === 0
     @test prod(A) === 1
-    @test_throws ArgumentError minimum(A)
-    @test_throws ArgumentError maximum(A)
+    @test_throws ["reducing over an empty",
+                  "consider supplying `init`"] minimum(A)
+    @test_throws "consider supplying `init`" maximum(A)
 
     @test isequal(sum(A, dims=1), zeros(Int, 1, 1))
     @test isequal(sum(A, dims=2), zeros(Int, 0, 1))
@@ -182,9 +183,9 @@ end
     @test isequal(prod(A, dims=3), fill(1, 0, 1))
 
     for f in (minimum, maximum)
-        @test_throws ArgumentError f(A, dims=1)
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=1)
         @test isequal(f(A, dims=2), zeros(Int, 0, 1))
-        @test_throws ArgumentError f(A, dims=(1, 2))
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2))
         @test isequal(f(A, dims=3), zeros(Int, 0, 1))
     end
     for f in (findmin, findmax)
diff --git a/test/reflection.jl b/test/reflection.jl
index 4faa367817104..70dfde07048c2 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -952,3 +952,16 @@ end
     @test only(code_typed(mod.foo, (); world=world1)).second == Int
     @test only(code_typed(mod.foo, (); world=world2)).second == Float64
 end
+
+@testset "default_tt" begin
+    m = Module()
+    @eval m f1() = return
+    @test Base.default_tt(m.f1) == Tuple{}
+    @eval m f2(a) = return
+    @test Base.default_tt(m.f2) == Tuple{Any}
+    @eval m f3(a::Integer) = return
+    @test Base.default_tt(m.f3) == Tuple{Integer}
+    @eval m f4() = return
+    @eval m f4(a) = return
+    @test Base.default_tt(m.f4) == Tuple
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index ea94fca877057..bcfc280f03f29 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,7 +10,7 @@ using Base: Experimental
 include("choosetests.jl")
 include("testenv.jl")
 
-tests, net_on, exit_on_error, use_revise, seed = choosetests(ARGS)
+(; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
 tests = unique(tests)
 
 if Sys.islinux()
@@ -102,7 +102,15 @@ cd(@__DIR__) do
     #   * https://github.com/JuliaLang/julia/pull/29384
     #   * https://github.com/JuliaLang/julia/pull/40348
     n = 1
-    if net_on
+    JULIA_TEST_USE_MULTIPLE_WORKERS = get(ENV, "JULIA_TEST_USE_MULTIPLE_WORKERS", "") |>
+                                      strip |>
+                                      lowercase |>
+                                      s -> tryparse(Bool, s) |>
+                                      x -> x === true
+    # If the `JULIA_TEST_USE_MULTIPLE_WORKERS` environment variable is set to `true`, we use
+    # multiple worker processes regardless of the value of `net_on`.
+    # Otherwise, we use multiple worker processes if and only if `net_on` is true.
+    if net_on || JULIA_TEST_USE_MULTIPLE_WORKERS
         n = min(Sys.CPU_THREADS, length(tests))
         n > 1 && addprocs_with_testenv(n)
         LinearAlgebra.BLAS.set_num_threads(1)
@@ -199,6 +207,7 @@ cd(@__DIR__) do
 
     local stdin_monitor
     all_tasks = Task[]
+    o_ts_duration = nothing
     try
         # Monitor stdin and kill this task on ^C
         # but don't do this on Windows, because it may deadlock in the kernel
@@ -229,7 +238,7 @@ cd(@__DIR__) do
                 end
             end
         end
-        @Experimental.sync begin
+        o_ts_duration = @elapsed @Experimental.sync begin
             for p in workers()
                 @async begin
                     push!(all_tasks, current_task())
@@ -237,14 +246,16 @@ cd(@__DIR__) do
                         test = popfirst!(tests)
                         running_tests[test] = now()
                         wrkr = p
-                        resp = try
-                                remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
+                        before = time()
+                        resp, duration = try
+                                r = remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
+                                r, time() - before
                             catch e
                                 isa(e, InterruptException) && return
-                                Any[CapturedException(e, catch_backtrace())]
+                                Any[CapturedException(e, catch_backtrace())], time() - before
                             end
                         delete!(running_tests, test)
-                        push!(results, (test, resp))
+                        push!(results, (test, resp, duration))
                         if length(resp) == 1
                             print_testworker_errored(test, wrkr, exit_on_error ? nothing : resp[1])
                             if exit_on_error
@@ -295,18 +306,20 @@ cd(@__DIR__) do
             # to the overall aggregator
             isolate = true
             t == "SharedArrays" && (isolate = false)
-            resp = try
-                    Base.invokelatest(runtests, t, test_path(t), isolate, seed=seed) # runtests is defined by the include above
+            before = time()
+            resp, duration = try
+                    r = Base.invokelatest(runtests, t, test_path(t), isolate, seed=seed) # runtests is defined by the include above
+                    r, time() - before
                 catch e
                     isa(e, InterruptException) && rethrow()
-                    Any[CapturedException(e, catch_backtrace())]
+                    Any[CapturedException(e, catch_backtrace())], time() - before
                 end
             if length(resp) == 1
                 print_testworker_errored(t, 1, resp[1])
             else
                 print_testworker_stats(t, 1, resp)
             end
-            push!(results, (t, resp))
+            push!(results, (t, resp, duration))
         end
     catch e
         isa(e, InterruptException) || rethrow()
@@ -352,16 +365,19 @@ cd(@__DIR__) do
     =#
     Test.TESTSET_PRINT_ENABLE[] = false
     o_ts = Test.DefaultTestSet("Overall")
+    o_ts.time_end = o_ts.time_start + o_ts_duration # manually populate the timing
     Test.push_testset(o_ts)
     completed_tests = Set{String}()
-    for (testname, (resp,)) in results
+    for (testname, (resp,), duration) in results
         push!(completed_tests, testname)
         if isa(resp, Test.DefaultTestSet)
+            resp.time_end = resp.time_start + duration
             Test.push_testset(resp)
             Test.record(o_ts, resp)
             Test.pop_testset()
         elseif isa(resp, Test.TestSetException)
             fake = Test.DefaultTestSet(testname)
+            fake.time_end = fake.time_start + duration
             for i in 1:resp.pass
                 Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, LineNumberNode(@__LINE__, @__FILE__)))
             end
@@ -383,6 +399,7 @@ cd(@__DIR__) do
             # the test runner itself had some problem, so we may have hit a segfault,
             # deserialization errors or something similar.  Record this testset as Errored.
             fake = Test.DefaultTestSet(testname)
+            fake.time_end = fake.time_start + duration
             Test.record(fake, Test.Error(:nontest_error, testname, nothing, Any[(resp, [])], LineNumberNode(1)))
             Test.push_testset(fake)
             Test.record(o_ts, fake)
@@ -399,6 +416,7 @@ cd(@__DIR__) do
     end
     Test.TESTSET_PRINT_ENABLE[] = true
     println()
+    # o_ts.verbose = true # set to true to show all timings when successful
     Test.print_test_results(o_ts, 1)
     if !o_ts.anynonpass
         println("    \033[32;1mSUCCESS\033[0m")
diff --git a/test/sets.jl b/test/sets.jl
index 1033c56420ace..f7a7dc14d7387 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -22,6 +22,7 @@ using Dates
         @test isa(Set(sin(x) for x = 1:3), Set{Float64})
         @test isa(Set(f17741(x) for x = 1:3), Set{Int})
         @test isa(Set(f17741(x) for x = -1:1), Set{Integer})
+        @test isa(Set(f17741(x) for x = 1:0), Set{Integer})
     end
     let s1 = Set(["foo", "bar"]), s2 = Set(s1)
         @test s1 == s2
@@ -138,6 +139,10 @@ end
     @test !in(200,s)
 end
 
+@testset "copy(::KeySet) (issue #41537)" begin
+    @test union(keys(Dict(1=>2, 3=>4))) == copy(keys(Dict(1=>2, 3=>4))) == Set([1,3])
+end
+
 @testset "copy!" begin
     for S = (Set, BitSet)
         s = S([1, 2])
@@ -220,6 +225,16 @@ end
     s2 = Set([nothing])
     union!(s2, [nothing])
     @test s2 == Set([nothing])
+
+    @testset "promotion" begin
+        ints = [1:5, [1, 2], Set([1, 2])]
+        floats = [2:0.1:3, [2.0, 3.5], Set([2.0, 3.5])]
+
+        for a in ints, b in floats
+            @test eltype(union(a, b)) == Float64
+            @test eltype(union(b, a)) == Float64
+        end
+    end
 end
 
 @testset "intersect" begin
@@ -227,6 +242,9 @@ end
         s = S([1,2]) ∩ S([3,4])
         @test s == S()
         s = intersect(S([5,6,7,8]), S([7,8,9]))
+        slong = S(collect(3:63))
+        # test #36339 length/order short-cut
+        @test intersect(S([5,6,7,8]), slong) == intersect(slong, S([5,6,7,8]))
         @test s == S([7,8])
         @test intersect(S([2,3,1]), S([4,2,3]), S([5,4,3,2])) == S([2,3])
         let s1 = S([1,2,3])
@@ -238,7 +256,9 @@ end
         end
     end
     @test intersect(Set([1]), BitSet()) isa Set{Int}
-    @test intersect(BitSet([1]), Set()) isa BitSet
+    @test intersect(BitSet([1]), Set()) isa Set{Any}
+    @test intersect(BitSet([1]), Set([1])) isa BitSet
+    @test intersect(BitSet([1]), Set([1]), Set([1])) isa BitSet
     @test intersect([1], BitSet()) isa Vector{Int}
     # intersect must uniquify
     @test intersect([1, 2, 1]) == intersect!([1, 2, 1]) == [1, 2]
@@ -249,7 +269,22 @@ end
     y = () ∩ (42,)
     @test isempty(x)
     @test isempty(y)
-    @test eltype(x) == eltype(y) == Union{}
+
+    # Discussed in PR#41769
+    @testset "promotion" begin
+        ints = [1:5, [1, 2], Set([1, 2])]
+        floats = [2:0.1:3, [2.0, 3.5], Set([2.0, 3.5])]
+
+        for a in ints, b in floats
+            @test eltype(intersect(a, b)) == Float64
+            @test eltype(intersect(b, a)) == Float64
+            @test eltype(intersect(a, a, b)) == Float64
+        end
+    end
+
+    # 3-argument version is correctly covered
+    @test intersect(Set([1,2]), Set([2]), Set([1,2,3])) == Set([2])
+    @test intersect(Set([1,2]), Set([2]), Set([1.,2,3])) == Set([2.])
 end
 
 @testset "setdiff" begin
@@ -777,3 +812,8 @@ end
     A = empty!(A)
     @test isempty(A)
 end
+
+@testset "⊊, ⊋" begin
+    @test !((1, 2) ⊊ (1, 2, 2))
+    @test !((1, 2, 2) ⊋ (1, 2))
+end
diff --git a/test/show.jl b/test/show.jl
index e4bfd3f95af32..0c93901bef5ba 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -239,6 +239,15 @@ end
 @test repr(:(-(;x))) == ":(-(; x))"
 @test repr(:(+(1, 2;x))) == ":(+(1, 2; x))"
 @test repr(:(1:2...)) == ":(1:2...)"
+
+@test repr(:(1 := 2)) == ":(1 := 2)"
+@test repr(:(1 ≔ 2)) == ":(1 ≔ 2)"
+@test repr(:(1 ⩴ 2)) == ":(1 ⩴ 2)"
+@test repr(:(1 ≕ 2)) == ":(1 ≕ 2)"
+
+@test repr(:(∓ 1)) == ":(∓1)"
+@test repr(:(± 1)) == ":(±1)"
+
 for ex in [Expr(:call, :f, Expr(:(=), :x, 1)),
            Expr(:ref, :f, Expr(:(=), :x, 1)),
            Expr(:vect, 1, 2, Expr(:kw, :x, 1)),
@@ -259,6 +268,7 @@ end
 @test repr(Expr(:import, :Foo)) == ":(\$(Expr(:import, :Foo)))"
 @test repr(Expr(:import, Expr(:(.), ))) == ":(\$(Expr(:import, :(\$(Expr(:.))))))"
 
+
 @test repr(Expr(:using, Expr(:(.), :A))) == ":(using A)"
 @test repr(Expr(:using, Expr(:(.), :A),
                         Expr(:(.), :B))) == ":(using A, B)"
@@ -1347,6 +1357,20 @@ test_repr("(:).a")
 @test repr(Tuple{Float64, Float64, Float64, Float64}) == "NTuple{4, Float64}"
 @test repr(Tuple{Float32, Float32, Float32}) == "Tuple{Float32, Float32, Float32}"
 
+@testset "issue #42931" begin
+    @test repr(NTuple{4, :A}) == "NTuple{4, :A}"
+    @test repr(NTuple{3, :A}) == "Tuple{:A, :A, :A}"
+    @test repr(NTuple{2, :A}) == "Tuple{:A, :A}"
+    @test repr(NTuple{1, :A}) == "Tuple{:A}"
+    @test repr(NTuple{0, :A}) == "Tuple{}"
+
+    @test repr(Tuple{:A, :A, :A, :B}) == "Tuple{:A, :A, :A, :B}"
+    @test repr(Tuple{:A, :A, :A, :A}) == "NTuple{4, :A}"
+    @test repr(Tuple{:A, :A, :A}) == "Tuple{:A, :A, :A}"
+    @test repr(Tuple{:A}) == "Tuple{:A}"
+    @test repr(Tuple{}) == "Tuple{}"
+end
+
 # Test that REPL/mime display of invalid UTF-8 data doesn't throw an exception:
 @test isa(repr("text/plain", String(UInt8[0x00:0xff;])), String)
 
@@ -1377,6 +1401,11 @@ let m = which(T20332{Int}(), (Int,)),
     mi = Core.Compiler.specialize_method(m, Tuple{T20332{T}, Int} where T, Core.svec())
     # test that this doesn't throw an error
     @test occursin("MethodInstance for", repr(mi))
+    # issue #41928
+    str = sprint(mi; context=:color=>true) do io, mi
+        printstyled(io, mi; color=:light_cyan)
+    end
+    @test !occursin("\U1b[0m", str)
 end
 
 @test sprint(show, Main) == "Main"
@@ -1769,7 +1798,7 @@ end
     # spurious binding resolutions
     show(IOContext(b, :module => TestShowType), Base.Pair)
     @test !Base.isbindingresolved(TestShowType, :Pair)
-    @test String(take!(b)) == "Base.Pair"
+    @test String(take!(b)) == "Core.Pair"
     show(IOContext(b, :module => TestShowType), Base.Complex)
     @test Base.isbindingresolved(TestShowType, :Complex)
     @test String(take!(b)) == "Complex"
@@ -1825,7 +1854,7 @@ end
     @test showstr(Dict(true=>false)) == "Dict{Bool, Bool}(1 => 0)"
     @test showstr(Dict((1 => 2) => (3 => 4))) == "Dict((1 => 2) => (3 => 4))"
 
-    # issue #27979 (dislaying arrays of pairs containing arrays as first member)
+    # issue #27979 (displaying arrays of pairs containing arrays as first member)
     @test replstr([[1.0]=>1.0]) == "1-element Vector{Pair{Vector{Float64}, Float64}}:\n [1.0] => 1.0"
 
     # issue #28159
@@ -1838,6 +1867,12 @@ end
     # issue #34343
     @test showstr([[1], Int[]]) == "[[1], $Int[]]"
     @test showstr([Dict(1=>1), Dict{Int,Int}()]) == "[Dict(1 => 1), Dict{$Int, $Int}()]"
+
+    # issue #42719, NamedTuple with @var_str
+    @test replstr((; var"a b"=1)) == """(var"a b" = 1,)"""
+    @test replstr((; var"#var#"=1)) == """(var"#var#" = 1,)"""
+    @test replstr((; var"a"=1, b=2)) == "(a = 1, b = 2)"
+    @test replstr((; a=1, b=2)) == "(a = 1, b = 2)"
 end
 
 @testset "#14684: `display` should print associative types in full" begin
@@ -2304,3 +2339,19 @@ end
     @test replstr([[1;;]]) == "1-element Vector{Matrix{$Int}}:\n [1;;]"
     @test replstr([[1;;;]]) == "1-element Vector{Array{$Int, 3}}:\n [1;;;]"
 end
+
+@testset "ncat and nrow" begin
+    @test_repr "[1;;]"
+    @test_repr "[1;;;]"
+    @test_repr "[1;; 2]"
+    @test_repr "[1;;; 2]"
+    @test_repr "[1;;; 2 3;;; 4]"
+    @test_repr "[1;;; 2;;;; 3;;; 4]"
+
+    @test_repr "T[1;;]"
+    @test_repr "T[1;;;]"
+    @test_repr "T[1;; 2]"
+    @test_repr "T[1;;; 2]"
+    @test_repr "T[1;;; 2 3;;; 4]"
+    @test_repr "T[1;;; 2;;;; 3;;; 4]"
+end
diff --git a/test/some.jl b/test/some.jl
index b2111c8b86085..27d50ca354a49 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -87,6 +87,11 @@ end
 
     @test @something(1, error("failed")) === 1
     @test_throws ErrorException @something(nothing, error("failed"))
+
+    # Ensure that the internal variable doesn't conflict with a user defined variable
+    @test let val = 1
+        @something(val)
+    end == 1
 end
 
 # issue #26927
diff --git a/test/sorting.jl b/test/sorting.jl
index 718a7f819e203..e90138549afd8 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -35,6 +35,8 @@ end
     @test sort([2,3,1], rev=true) == [3,2,1] == sort([2,3,1], order=Reverse)
     @test sort(['z':-1:'a';]) == ['a':'z';]
     @test sort(['a':'z';], rev=true) == ['z':-1:'a';]
+    @test sort(OffsetVector([3,1,2], -2)) == OffsetVector([1,2,3], -2)
+    @test sort(OffsetVector([3.0,1.0,2.0], 2), rev=true) == OffsetVector([3.0,2.0,1.0], 2)
 end
 
 @testset "sortperm" begin
@@ -46,6 +48,7 @@ end
         @test r === s
     end
     @test_throws ArgumentError sortperm!(view([1,2,3,4], 1:4), [2,3,1])
+    @test sortperm(OffsetVector([8.0,-2.0,0.5], -4)) == OffsetVector([-2, -1, -3], -4)
 end
 
 @testset "misc sorting" begin
@@ -142,6 +145,26 @@ end
         @test searchsortedlast(500:1.0:600, 1.0e20) == 101
     end
 
+    @testset "issue 10966" begin
+        for R in numTypes, T in numTypes
+            @test searchsortedfirst(R(2):R(2), T(0)) == 1
+            @test searchsortedfirst(R(2):R(2), T(2)) == 1
+            @test searchsortedfirst(R(2):R(2), T(3)) == 2
+            @test searchsortedfirst(R(1):1//2:R(5), T(0)) == 1
+            @test searchsortedfirst(R(1):1//2:R(5), T(2)) == 3
+            @test searchsortedfirst(R(1):1//2:R(5), T(6)) == 10
+            @test searchsortedlast(R(2):R(2), T(0)) == 0
+            @test searchsortedlast(R(2):R(2), T(2)) == 1
+            @test searchsortedlast(R(2):R(2), T(3)) == 1
+            @test searchsortedlast(R(1):1//2:R(5), T(0)) == 0
+            @test searchsortedlast(R(1):1//2:R(5), T(2)) == 3
+            @test searchsortedlast(R(1):1//2:R(5), T(6)) == 9
+            @test searchsorted(R(2):R(2), T(0)) === 1:0
+            @test searchsorted(R(2):R(2), T(2)) == 1:1
+            @test searchsorted(R(2):R(2), T(3)) === 2:1
+        end
+    end
+
     @testset "issue 32568" begin
         for R in numTypes, T in numTypes
             for arr in Any[R[1:5;], R(1):R(5), R(1):2:R(5)]
@@ -644,4 +667,20 @@ end
     @test issorted(a)
 end
 
+@testset "sort!(::OffsetMatrix; dims)" begin
+    x = OffsetMatrix(rand(5,5), 5, -5)
+    sort!(x; dims=1)
+    for i in axes(x, 2)
+        @test issorted(x[:,i])
+    end
+end
+
+@testset "searchsortedfirst/last with generalized indexing" begin
+    o = OffsetVector(1:3, -2)
+    @test searchsortedfirst(o, 4) == lastindex(o) + 1
+    @test searchsortedfirst(o, 1.5) == 0
+    @test searchsortedlast(o, 0) == firstindex(o) - 1
+    @test searchsortedlast(o, 1.5) == -1
+end
+
 end
diff --git a/test/spawn.jl b/test/spawn.jl
index 95915a5ad804b..5007b7ef0f993 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -579,8 +579,8 @@ end
 @test_throws ArgumentError run(Base.AndCmds(`$truecmd`, ``))
 
 # tests for reducing over collection of Cmd
-@test_throws ArgumentError reduce(&, Base.AbstractCmd[])
-@test_throws ArgumentError reduce(&, Base.Cmd[])
+@test_throws "reducing over an empty collection is not allowed" reduce(&, Base.AbstractCmd[])
+@test_throws "reducing over an empty collection is not allowed" reduce(&, Base.Cmd[])
 @test reduce(&, [`$echocmd abc`, `$echocmd def`, `$echocmd hij`]) == `$echocmd abc` & `$echocmd def` & `$echocmd hij`
 
 # readlines(::Cmd), accidentally broken in #20203
@@ -650,7 +650,7 @@ end
 psep = if Sys.iswindows() ";" else ":" end
 withenv("PATH" => "$(Sys.BINDIR)$(psep)$(ENV["PATH"])") do
     julia_exe = joinpath(Sys.BINDIR, Base.julia_exename())
-    @test Sys.which("julia") == abspath(julia_exe)
+    @test Sys.which(Base.julia_exename()) == abspath(julia_exe)
     @test Sys.which(julia_exe) == abspath(julia_exe)
 end
 
@@ -765,7 +765,21 @@ let text = "input-test-text"
     @test read(proc, String) == string(length(text), '\n')
     @test success(proc)
     @test String(take!(b)) == text
+
+    out = Base.BufferStream()
+    proc = run(catcmd, IOBuffer(text), out, wait=false)
+    @test proc.out === out
+    @test read(out, String) == text
+    @test success(proc)
+
+    out = PipeBuffer()
+    proc = run(catcmd, IOBuffer(SubString(text)), out)
+    @test success(proc)
+    @test proc.out === proc.err === proc.in === devnull
+    @test String(take!(out)) == text
 end
+
+
 @test repr(Base.CmdRedirect(``, devnull, 0, false)) == "pipeline(``, stdin>Base.DevNull())"
 @test repr(Base.CmdRedirect(``, devnull, 1, true)) == "pipeline(``, stdout<Base.DevNull())"
 @test repr(Base.CmdRedirect(``, devnull, 11, true)) == "pipeline(``, 11<Base.DevNull())"
@@ -803,6 +817,33 @@ end
         cmd2 = addenv(cmd, "FOO" => "foo2", "BAR" => "bar"; inherit=true)
         @test strip(String(read(cmd2))) == "foo2 bar"
     end
+    # Keys with value === nothing are deleted
+    cmd = Cmd(`$shcmd -c "echo \$FOO \$BAR"`, env=Dict("FOO" => "foo", "BAR" => "bar"))
+    cmd2 = addenv(cmd, "FOO" => nothing)
+    @test strip(String(read(cmd2))) == "bar"
+    # addenv keeps the cmd's dir (#42131)
+    dir = joinpath(pwd(), "dir")
+    cmd = addenv(setenv(`julia`; dir=dir), Dict())
+    @test cmd.dir == dir
+end
+
+@testset "setenv with dir (with tests for #42131)" begin
+    dir1 = joinpath(pwd(), "dir1")
+    dir2 = joinpath(pwd(), "dir2")
+    cmd = Cmd(`julia`; dir=dir1)
+    @test cmd.dir == dir1
+    @test Cmd(cmd).dir == dir1
+    @test Cmd(cmd; dir=dir2).dir == dir2
+    @test Cmd(cmd; dir="").dir == ""
+    @test setenv(cmd).dir == dir1
+    @test setenv(cmd; dir=dir2).dir == dir2
+    @test setenv(cmd; dir="").dir == ""
+    @test setenv(cmd, "FOO"=>"foo").dir == dir1
+    @test setenv(cmd, "FOO"=>"foo"; dir=dir2).dir == dir2
+    @test setenv(cmd, "FOO"=>"foo"; dir="").dir == ""
+    @test setenv(cmd, Dict("FOO"=>"foo")).dir == dir1
+    @test setenv(cmd, Dict("FOO"=>"foo"); dir=dir2).dir == dir2
+    @test setenv(cmd, Dict("FOO"=>"foo"); dir="").dir == ""
 end
 
 
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 2b08e2819e33b..b313a0fa1af4a 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
+
 @testset "padding (lpad and rpad)" begin
     @test lpad("foo", 2) == "foo"
     @test rpad("foo", 2) == "foo"
@@ -486,35 +488,91 @@ end
 end
 
 @testset "chomp/chop" begin
-    @test chomp("foo\n") == "foo"
-    @test chomp("fo∀\n") == "fo∀"
-    @test chomp("foo\r\n") == "foo"
-    @test chomp("fo∀\r\n") == "fo∀"
-    @test chomp("fo∀") == "fo∀"
-    @test chop("") == ""
-    @test chop("fooε") == "foo"
-    @test chop("foεo") == "foε"
-    @test chop("∃∃∃∃") == "∃∃∃"
-    @test chop("∀ϵ∃Δ", head=0, tail=0) == "∀ϵ∃Δ"
-    @test chop("∀ϵ∃Δ", head=0, tail=1) == "∀ϵ∃"
-    @test chop("∀ϵ∃Δ", head=0, tail=2) == "∀ϵ"
-    @test chop("∀ϵ∃Δ", head=0, tail=3) == "∀"
-    @test chop("∀ϵ∃Δ", head=0, tail=4) == ""
-    @test chop("∀ϵ∃Δ", head=0, tail=5) == ""
-    @test chop("∀ϵ∃Δ", head=1, tail=0) == "ϵ∃Δ"
-    @test chop("∀ϵ∃Δ", head=2, tail=0) == "∃Δ"
-    @test chop("∀ϵ∃Δ", head=3, tail=0) == "Δ"
-    @test chop("∀ϵ∃Δ", head=4, tail=0) == ""
-    @test chop("∀ϵ∃Δ", head=5, tail=0) == ""
-    @test chop("∀ϵ∃Δ", head=1, tail=1) == "ϵ∃"
-    @test chop("∀ϵ∃Δ", head=2, tail=2) == ""
-    @test chop("∀ϵ∃Δ", head=3, tail=3) == ""
-    @test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=3)
-    @test_throws ArgumentError chop("∀ϵ∃Δ", head=3, tail=-3)
-    @test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=-3)
-
-    @test isa(chomp("foo"), SubString)
-    @test isa(chop("foo"), SubString)
+    for S in (String, SubStr, Test.GenericString)
+        @test chomp(S("foo\n")) == "foo"
+        @test chomp(S("fo∀\n")) == "fo∀"
+        @test chomp(S("foo\r\n")) == "foo"
+        @test chomp(S("fo∀\r\n")) == "fo∀"
+        @test chomp(S("fo∀")) == "fo∀"
+        @test chop(S("")) == ""
+        @test chop(S("fooε")) == "foo"
+        @test chop(S("foεo")) == "foε"
+        @test chop(S("∃∃∃∃")) == "∃∃∃"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=0) == "∀ϵ∃Δ"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=1) == "∀ϵ∃"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=2) == "∀ϵ"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=3) == "∀"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=4) == ""
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=5) == ""
+        @test chop(S("∀ϵ∃Δ"), head=1, tail=0) == "ϵ∃Δ"
+        @test chop(S("∀ϵ∃Δ"), head=2, tail=0) == "∃Δ"
+        @test chop(S("∀ϵ∃Δ"), head=3, tail=0) == "Δ"
+        @test chop(S("∀ϵ∃Δ"), head=4, tail=0) == ""
+        @test chop(S("∀ϵ∃Δ"), head=5, tail=0) == ""
+        @test chop(S("∀ϵ∃Δ"), head=1, tail=1) == "ϵ∃"
+        @test chop(S("∀ϵ∃Δ"), head=2, tail=2) == ""
+        @test chop(S("∀ϵ∃Δ"), head=3, tail=3) == ""
+        @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=3)
+        @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=3, tail=-3)
+        @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=-3)
+
+        for T in (String, SubStr, Test.GenericString, Regex)
+            S === Test.GenericString && T === Regex && continue # not supported
+            @test chopprefix(S("fo∀\n"), T("bog")) == "fo∀\n"
+            @test chopprefix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
+            @test chopprefix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
+            @test chopprefix(S("fo∀\n"), T("f")) == "o∀\n"
+            @test chopprefix(S("fo∀\n"), T("fo")) == "∀\n"
+            @test chopprefix(S("fo∀\n"), T("fo∀")) == "\n"
+            @test chopprefix(S("fo∀\n"), T("fo∀\n")) == ""
+            @test chopprefix(S("\nfo∀"), T("bog")) == "\nfo∀"
+            @test chopprefix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
+            @test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopprefix(S("\nfo∀"), T("\n")) == "fo∀"
+            @test chopprefix(S("\nfo∀"), T("\nf")) == "o∀"
+            @test chopprefix(S("\nfo∀"), T("\nfo")) == "∀"
+            @test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopprefix(S(""), T("")) == ""
+            @test chopprefix(S(""), T("asdf")) == ""
+            @test chopprefix(S(""), T("∃∃∃")) == ""
+            @test chopprefix(S("εfoo"), T("ε")) == "foo"
+            @test chopprefix(S("ofoε"), T("o")) == "foε"
+            @test chopprefix(S("∃∃∃∃"), T("∃")) == "∃∃∃"
+            @test chopprefix(S("∃∃∃∃"), T("")) == "∃∃∃∃"
+
+            @test chopsuffix(S("fo∀\n"), T("bog")) == "fo∀\n"
+            @test chopsuffix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
+            @test chopsuffix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
+            @test chopsuffix(S("fo∀\n"), T("\n")) == "fo∀"
+            @test chopsuffix(S("fo∀\n"), T("∀\n")) == "fo"
+            @test chopsuffix(S("fo∀\n"), T("o∀\n")) == "f"
+            @test chopsuffix(S("fo∀\n"), T("fo∀\n")) == ""
+            @test chopsuffix(S("\nfo∀"), T("bog")) == "\nfo∀"
+            @test chopsuffix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
+            @test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopsuffix(S("\nfo∀"), T("∀")) == "\nfo"
+            @test chopsuffix(S("\nfo∀"), T("o∀")) == "\nf"
+            @test chopsuffix(S("\nfo∀"), T("fo∀")) == "\n"
+            @test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopsuffix(S(""), T("")) == ""
+            @test chopsuffix(S(""), T("asdf")) == ""
+            @test chopsuffix(S(""), T("∃∃∃")) == ""
+            @test chopsuffix(S("fooε"), T("ε")) == "foo"
+            @test chopsuffix(S("εofo"), T("o")) == "εof"
+            @test chopsuffix(S("∃∃∃∃"), T("∃")) == "∃∃∃"
+            @test chopsuffix(S("∃∃∃∃"), T("")) == "∃∃∃∃"
+        end
+        @test isa(chomp(S("foo")), SubString)
+        @test isa(chop(S("foo")), SubString)
+
+        if S !== Test.GenericString
+            @test chopprefix(S("∃∃∃b∃"), r"∃+") == "b∃"
+            @test chopsuffix(S("∃b∃∃∃"), r"∃+") == "∃b"
+        end
+
+        @test isa(chopprefix(S("foo"), "fo"), SubString)
+        @test isa(chopsuffix(S("foo"), "oo"), SubString)
+    end
 end
 
 @testset "bytes2hex and hex2bytes" begin
diff --git a/test/subarray.jl b/test/subarray.jl
index 334211d3e3975..cc8aab94e4c42 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -718,3 +718,22 @@ end
     s = @view v[1]
     @test copy(s) == fill([1])
 end
+
+@testset "issue 40314: views of CartesianIndices" begin
+    c = CartesianIndices((1:2, 1:4))
+    @test (@view c[c]) === c
+    for inds in Any[(1:1, 1:2), (1:1:1, 1:2)]
+        c2 = @view c[inds...]
+        @test c2 isa CartesianIndices{2}
+        for i2 in inds[2], i1 in inds[1]
+            @test c2[i1, i2] == c[i1, i2]
+        end
+    end
+    for inds in Any[(Colon(), 1:2), (Colon(), 1:1:2)]
+        c2 = @view c[inds...]
+        @test c2 isa CartesianIndices{2}
+        for i2 in inds[2], i1 in axes(c, 1)
+            @test c2[i1, i2] == c[i1, i2]
+        end
+    end
+end
diff --git a/test/subtype.jl b/test/subtype.jl
index 244ce3c4b7900..3eca685aee84c 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -587,7 +587,7 @@ function test_old()
     @test !(Type{Tuple{Nothing}} <: Tuple{Type{Nothing}})
 end
 
-const menagerie =
+const easy_menagerie =
     Any[Bottom, Any, Int, Int8, Integer, Real,
         Array{Int,1}, AbstractArray{Int,1},
         Tuple{Int,Vararg{Integer}}, Tuple{Integer,Vararg{Int}}, Tuple{},
@@ -607,12 +607,14 @@ const menagerie =
         Array{(@UnionAll T<:Int T), 1},
         (@UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S}),
         Union{Int,Ref{Union{Int,Int8}}},
-        (@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}}),
         ]
 
-let new = Any[]
-    # add variants of each type
-    for T in menagerie
+const hard_menagerie =
+    Any[(@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}})]
+
+function add_variants!(types)
+    new = Any[]
+    for T in types
         push!(new, Ref{T})
         push!(new, Tuple{T})
         push!(new, Tuple{T,T})
@@ -620,9 +622,14 @@ let new = Any[]
         push!(new, @UnionAll S<:T S)
         push!(new, @UnionAll S<:T Ref{S})
     end
-    append!(menagerie, new)
+    append!(types, new)
 end
 
+add_variants!(easy_menagerie)
+add_variants!(hard_menagerie)
+
+const menagerie = [easy_menagerie; hard_menagerie]
+
 function test_properties()
     x→y = !x || y
     ¬T = @UnionAll X>:T Ref{X}
@@ -1057,14 +1064,15 @@ function test_intersection()
 end
 
 function test_intersection_properties()
-    approx = Tuple{Vector{Vector{T}} where T, Vector{Vector{T}} where T}
-    for T in menagerie
-        for S in menagerie
+    for i in eachindex(menagerie)
+        T = menagerie[i]
+        for j in eachindex(menagerie)
+            S = menagerie[j]
             I = _type_intersect(T,S)
             I2 = _type_intersect(S,T)
             @test isequal_type(I, I2)
-            if I == approx
-                # TODO: some of these cases give a conservative answer
+            if i > length(easy_menagerie) || j > length(easy_menagerie)
+                # TODO: these cases give a conservative answer
                 @test issub(I, T) || issub(I, S)
             else
                 @test issub(I, T) && issub(I, S)
@@ -1569,7 +1577,7 @@ f31082(::Pair{B, C}, ::C, ::C) where {B, C} = 1
                Tuple{Type{Val{T}},Int,T} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T,
                Tuple{Type,Int,Integer},
-               Tuple{Type{Val{T}},Int,T} where T<:Integer)
+               Tuple{Type{Val{T}},Int,Integer} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T>:Integer,
                Tuple{Type,Int,Integer},
                Tuple{Type{Val{T}},Int,Integer} where T>:Integer)
@@ -1796,7 +1804,7 @@ let X1 = Tuple{AlmostLU, Vector{T}} where T,
     # TODO: the quality of this intersection is not great; for now just test that it
     # doesn't stack overflow
     @test I<:X1 || I<:X2
-    actual = Tuple{AlmostLU{S, X} where X<:Matrix{S}, Vector{S}} where S<:Union{Float32, Float64}
+    actual = Tuple{Union{AlmostLU{S, X} where X<:Matrix{S}, AlmostLU{S, <:Matrix}}, Vector{S}} where S<:Union{Float32, Float64}
     @test I == actual
 end
 
@@ -1858,7 +1866,7 @@ let A = Tuple{Type{T} where T<:Ref, Ref, Union{T, Union{Ref{T}, T}} where T<:Ref
     I = typeintersect(A,B)
     # this was a case where <: disagreed with === (due to a badly-normalized type)
     @test I == typeintersect(A,B)
-    @test I == Tuple{Type{T}, Ref{T}, Union{Ref{T}, T}} where T<:Ref
+    @test I == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
 end
 
 # issue #39218
@@ -1898,8 +1906,8 @@ end
 # issue #39948
 let A = Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
     I = typeintersect(A, Tuple{Vararg{Vector{T}}} where T)
-    @test_broken I <: A
-    @test_broken !Base.has_free_typevars(I)
+    @test I <: A
+    @test !Base.has_free_typevars(I)
 end
 
 # issue #8915
@@ -1914,3 +1922,57 @@ end
 f18985(x::T, y...) where {T<:Union{Int32,Int64}} = (length(y), f18985(y[1], y[2:end]...)...)
 f18985(x::T) where {T<:Union{Int32,Int64}} = 100
 @test f18985(1, 2, 3) == (2, 1, 100)
+
+# issue #40048
+let A = Tuple{Ref{T}, Vararg{T}} where T,
+    B = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Int}, Union{Ref{S}, S}} where S where U,
+    C = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Ref{W}}, Union{Ref{S}, W, V}} where V<:AbstractArray where W where S where U
+    I = typeintersect(A, B)
+    @test I != Union{}
+    @test I <: A
+    @test I <: B
+    # avoid stack overflow
+    J = typeintersect(A, C)
+    @test_broken J != Union{}
+end
+
+let A = Tuple{Dict{I,T}, I, T} where T where I,
+    B = Tuple{AbstractDict{I,T}, T, I} where T where I
+    # TODO: we should probably have I == T here
+    @test typeintersect(A, B) == Tuple{Dict{I,T}, I, T} where {I, T}
+end
+
+let A = Tuple{UnionAll, Vector{Any}},
+    B = Tuple{Type{T}, T} where T<:AbstractArray,
+    I = typeintersect(A, B)
+    @test !isconcretetype(I)
+    @test I == Tuple{Type{T}, Vector{Any}} where T<:AbstractArray
+end
+
+@testintersect(Tuple{Type{Vector{<:T}}, T} where {T<:Integer},
+               Tuple{Type{T}, AbstractArray} where T<:Array,
+               Bottom)
+
+struct S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}
+end
+
+@testintersect(Tuple{Type{S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}} where _Z14 where _Z13 where _Z12 where _Z11 where _Z10 where _Z9 where _Z8 where _Z7 where _Z6 where _Z5 where _Z4 where _Z3 where _Z2 where _Z1 where _Z where _Y where _X where _W where _V where _U where _T where _S where _R where _Q where _P where _O where _N where _M where _L where _K where _J where _I where _H where _G where _F where _E where _D where _C where _B where _A, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any},
+               Tuple{Type{S40{A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1}, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1,
+               Bottom)
+
+let A = Tuple{Any, Type{Ref{_A}} where _A},
+    B = Tuple{Type{T}, Type{<:Union{Ref{T}, T}}} where T,
+    I = typeintersect(A, B)
+    @test I != Union{}
+    # TODO: this intersection result is still too narrow
+    @test_broken Tuple{Type{Ref{Integer}}, Type{Ref{Integer}}} <: I
+end
+
+@testintersect(Tuple{Type{T}, T} where T<:(Tuple{Vararg{_A, _B}} where _B where _A),
+               Tuple{Type{Tuple{Vararg{_A, N}} where _A<:F}, Pair{N, F}} where F where N,
+               Bottom)
+
+# issue #42409
+@testintersect(Tuple{Type{Pair{_A, S} where S<:AbstractArray{<:_A, 2}}, Dict} where _A,
+               Tuple{Type{Pair{_A, S} where S<:AbstractArray{<:_A, 2}} where _A, Union{Array, Pair}},
+               Bottom)
diff --git a/test/syntax.jl b/test/syntax.jl
index 95242b44b4871..beff019d72e80 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -1509,8 +1509,18 @@ let ex = Meta.parse("@test27521(2) do y; y; end")
     @test macroexpand(@__MODULE__, ex) == Expr(:tuple, fex, 2)
 end
 
+# issue #43018
+module M43018
+    macro test43018(fn)
+        quote $(fn)() end
+    end
+end
+@test :(@M43018.test43018() do; end) == :(M43018.@test43018() do; end)
+@test @macroexpand(@M43018.test43018() do; end) == @macroexpand(M43018.@test43018() do; end)
+@test @M43018.test43018() do; 43018 end == 43018
+
 # issue #27129
-f27129(x = 1) = (@Base._inline_meta; x)
+f27129(x = 1) = (@inline; x)
 for meth in methods(f27129)
     @test ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), meth, C_NULL, meth.source).inlineable
 end
@@ -2493,6 +2503,10 @@ import .Mod: x as x2
 @test x2 == 1
 @test !@isdefined(x)
 
+module_names = names(@__MODULE__; all=true, imported=true)
+@test :x2 ∈ module_names
+@test :x ∉ module_names
+
 import .Mod2.y as y2
 
 @test y2 == 2
@@ -2959,3 +2973,118 @@ end
     ex.args = fill!(Vector{Any}(undef, 600000), 1)
     @test_throws ErrorException("syntax: expression too large") eval(ex)
 end
+
+# issue 25678
+@generated f25678(x::T) where {T} = code_lowered(sin, Tuple{x})[]
+@test f25678(pi/6) === sin(pi/6)
+
+@generated g25678(x) = return :x
+@test g25678(7) === 7
+
+# issue #19012
+@test Meta.parse("\U2200", raise=false) == Symbol("∀")
+@test Meta.parse("\U2203", raise=false) == Symbol("∃")
+@test Meta.parse("a\U2203", raise=false) == Symbol("a∃")
+@test Meta.parse("\U2204", raise=false) == Symbol("∄")
+
+# issue 42220
+macro m42220()
+    return quote
+        function foo(::Type{T}=Float64) where {T}
+            return Vector{T}(undef, 10)
+        end
+    end
+end
+@test @m42220()() isa Vector{Float64}
+@test @m42220()(Bool) isa Vector{Bool}
+
+@testset "try else" begin
+    fails(f) = try f() catch; true else false end
+    @test fails(error)
+    @test !fails(() -> 1 + 2)
+
+    @test_throws ParseError Meta.parse("try foo() else bar() end")
+    @test_throws ParseError Meta.parse("try foo() else bar() catch; baz() end")
+    @test_throws ParseError Meta.parse("try foo() catch; baz() finally foobar() else bar() end")
+    @test_throws ParseError Meta.parse("try foo() finally foobar() else bar() catch; baz() end")
+
+    err = try
+        try
+            1 + 2
+        catch
+        else
+            error("foo")
+        end
+    catch e
+        e
+    end
+    @test err == ErrorException("foo")
+
+    x = 0
+    err = try
+        try
+            1 + 2
+        catch
+        else
+            error("foo")
+        finally
+            x += 1
+        end
+    catch e
+        e
+    end
+    @test err == ErrorException("foo")
+    @test x == 1
+
+    x = 0
+    err = try
+        try
+            1 + 2
+        catch
+            5 + 6
+        else
+            3 + 4
+        finally
+            x += 1
+        end
+    catch e
+        e
+    end
+    @test err == 3 + 4
+    @test x == 1
+
+    x = 0
+    err = try
+        try
+            error()
+        catch
+            5 + 6
+        else
+            3 + 4
+        finally
+            x += 1
+        end
+    catch e
+        e
+    end
+    @test err == 5 + 6
+    @test x == 1
+end
+
+@test_throws ParseError Meta.parse("""
+function checkUserAccess(u::User)
+	if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066"
+		return true
+	end
+	return false
+end
+""")
+
+@test_throws ParseError Meta.parse("""
+function checkUserAccess(u::User)
+	#=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =#
+		return true
+	#= end admin only \u202e \u2066end\u2069 \u2066=#
+	return false
+end
+""")
diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl
index f3583a532215a..67c2023a0bc84 100644
--- a/test/testhelpers/Furlongs.jl
+++ b/test/testhelpers/Furlongs.jl
@@ -14,7 +14,7 @@ struct Furlong{p,T<:Number} <: Number
 end
 Furlong(x::T) where {T<:Number} = Furlong{1,T}(x)
 Furlong(x::Furlong) = x
-(::Type{T})(x::Furlong) where {T<:Number} = T(x.val)::T
+(::Type{T})(x::Furlong{0}) where {T<:Number} = T(x.val)::T
 Furlong{p}(v::Number) where {p} = Furlong{p,typeof(v)}(v)
 Furlong{p}(x::Furlong{q}) where {p,q} = (@assert(p==q); Furlong{p,typeof(x.val)}(x.val))
 Furlong{p,T}(x::Furlong{q}) where {T,p,q} = (@assert(p==q); Furlong{p,T}(T(x.val)))
diff --git a/test/testhelpers/SizedArrays.jl b/test/testhelpers/SizedArrays.jl
new file mode 100644
index 0000000000000..64c816f740fb2
--- /dev/null
+++ b/test/testhelpers/SizedArrays.jl
@@ -0,0 +1,40 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# SizedArrays
+
+# This test file defines an array wrapper with statical size. It can be used to
+# test the action of LinearAlgebra with non-number eltype.
+
+module SizedArrays
+
+import Base: +, *, ==
+
+export SizedArray
+
+struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
+    data::A
+    function SizedArray{SZ}(data::AbstractArray{T,N}) where {SZ,T,N}
+        SZ == size(data) || throw(ArgumentError("size mismatch!"))
+        new{SZ,T,N,typeof(data)}(data)
+    end
+    function SizedArray{SZ,T,N,A}(data::AbstractArray{T,N}) where {SZ,T,N,A}
+        SZ == size(data) || throw(ArgumentError("size mismatch!"))
+        new{SZ,T,N,A}(A(data))
+    end
+end
+Base.convert(::Type{SizedArray{SZ,T,N,A}}, data::AbstractArray) where {SZ,T,N,A} = SizedArray{SZ,T,N,A}(data)
+
+# Minimal AbstractArray interface
+Base.size(a::SizedArray) = size(typeof(a))
+Base.size(::Type{<:SizedArray{SZ}}) where {SZ} = SZ
+Base.getindex(A::SizedArray, i...) = getindex(A.data, i...)
+Base.zero(::Type{T}) where T <: SizedArray = SizedArray{size(T)}(zeros(eltype(T), size(T)))
++(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = SizedArray{SZ}(S1.data + S2.data)
+==(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = S1.data == S2.data
+function *(S1::SizedArray, S2::SizedArray)
+    0 < ndims(S1) < 3 && 0 < ndims(S2) < 3 && size(S1, 2) == size(S2, 1) || throw(ArgumentError("size mismatch!"))
+    data = S1.data * S2.data
+    SZ = ndims(data) == 1 ? (size(S1, 1), ) : (size(S1, 1), size(S2, 2))
+    SizedArray{SZ}(data)
+end
+end
diff --git a/test/testhelpers/coverage_file.info b/test/testhelpers/coverage_file.info
index 9b4b1c1f2f96e..c83e75dee8060 100644
--- a/test/testhelpers/coverage_file.info
+++ b/test/testhelpers/coverage_file.info
@@ -4,14 +4,15 @@ DA:4,1
 DA:5,0
 DA:7,1
 DA:8,1
-DA:9,5
+DA:9,3
+DA:10,5
 DA:11,1
 DA:12,1
 DA:14,0
 DA:17,1
-DA:19,1
+DA:19,2
 DA:20,1
 DA:22,1
-LH:10
-LF:13
+LH:12
+LF:14
 end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad b/test/testhelpers/coverage_file.info.bad
index 44e33a9df68c7..311f6379381ee 100644
--- a/test/testhelpers/coverage_file.info.bad
+++ b/test/testhelpers/coverage_file.info.bad
@@ -4,16 +4,17 @@ DA:4,1
 DA:5,0
 DA:7,1
 DA:8,1
-DA:9,5
+DA:9,3
+DA:10,5
 DA:11,1
 DA:12,1
 DA:14,0
 DA:17,1
 DA:18,0
-DA:19,1
+DA:19,2
 DA:20,1
 DA:22,1
 DA:1234,0
-LH:11
-LF:15
+LH:12
+LF:16
 end_of_record
diff --git a/test/threads.jl b/test/threads.jl
index 736cecada3cd8..4464c2a2c8859 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -2,6 +2,27 @@
 
 using Test
 
+# simple sanity tests for locks under cooperative concurrent access
+let lk = ReentrantLock()
+    c1 = Base.Event()
+    c2 = Base.Event()
+    @test trylock(lk)
+    @test trylock(lk)
+    t1 = @async (notify(c1); lock(lk); unlock(lk); trylock(lk))
+    t2 = @async (notify(c2); trylock(lk))
+    wait(c1)
+    wait(c2)
+    @test t1.queue === lk.cond_wait.waitq
+    @test t2.queue !== lk.cond_wait.waitq
+    @test istaskdone(t2)
+    @test !fetch(t2)
+    unlock(lk)
+    @test t1.queue === lk.cond_wait.waitq
+    unlock(lk)
+    @test t1.queue !== lk.cond_wait.waitq
+    @test fetch(t1)
+end
+
 let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no threads_exec.jl`
     for test_nthreads in (1, 2, 4, 4) # run once to try single-threaded mode, then try a couple times to trigger bad races
         new_env = copy(ENV)
@@ -147,3 +168,49 @@ end
 
 # We don't need the watchdog anymore
 close(proc.in)
+
+# https://github.com/JuliaLang/julia/pull/42973
+@testset "spawn and wait *a lot* of tasks in @profile" begin
+    # Not using threads_exec.jl for better isolation, reproducibility, and a
+    # tighter timeout.
+    script = "profile_spawnmany_exec.jl"
+    cmd_base = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $script`
+    @testset for n in [20000, 200000, 2000000]
+        cmd = ignorestatus(setenv(cmd_base, "NTASKS" => n; dir = @__DIR__))
+        cmd = pipeline(cmd; stdout = stderr, stderr)
+        proc = run(cmd; wait = false)
+        done = Threads.Atomic{Bool}(false)
+        timeout = false
+        timer = Timer(100) do _
+            timeout = true
+            for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL]
+                for _ in 1:1000
+                    kill(proc, sig)
+                    if done[]
+                        if sig != Base.SIGTERM
+                            @warn "Terminating `$script` required signal $sig"
+                        end
+                        return
+                    end
+                    sleep(0.001)
+                end
+            end
+        end
+        try
+            wait(proc)
+        finally
+            done[] = true
+            close(timer)
+        end
+        if ( !success(proc) ) || ( timeout )
+            @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout
+        end
+        if Sys.iswindows()
+            # Known failure: https://github.com/JuliaLang/julia/issues/43124
+            @test_skip success(proc)
+        else
+            @test success(proc)
+            @test !timeout
+        end
+    end
+end
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index f3d2dc9577c64..6f43ad917a484 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -27,6 +27,27 @@ end
 # (expected test duration is about 18-180 seconds)
 Timer(t -> killjob("KILLING BY THREAD TEST WATCHDOG\n"), 1200)
 
+# basic lock check
+if nthreads() > 1
+    let lk = Base.Threads.SpinLock()
+        c1 = Base.Event()
+        c2 = Base.Event()
+        @test trylock(lk)
+        @test !trylock(lk)
+        t1 = Threads.@spawn (notify(c1); lock(lk); unlock(lk); trylock(lk))
+        t2 = Threads.@spawn (notify(c2); trylock(lk))
+        Libc.systemsleep(0.1) # block our thread from scheduling for a bit
+        wait(c1)
+        wait(c2)
+        @test !fetch(t2)
+        @test istaskdone(t2)
+        @test !istaskdone(t1)
+        unlock(lk)
+        @test fetch(t1)
+        @test istaskdone(t1)
+    end
+end
+
 # threading constructs
 
 let a = zeros(Int, 2 * nthreads())
@@ -496,15 +517,6 @@ if cfunction_closure
     test_thread_cfunction()
 end
 
-# Compare the two ways of checking if threading is enabled.
-# `jl_tls_states` should only be defined on non-threading build.
-if ccall(:jl_threading_enabled, Cint, ()) == 0
-    @test nthreads() == 1
-    cglobal(:jl_tls_states) != C_NULL
-else
-    @test_throws ErrorException cglobal(:jl_tls_states)
-end
-
 function test_thread_range()
     a = zeros(Int, nthreads())
     @threads for i in 1:threadid()
@@ -912,3 +924,92 @@ end
         @test reproducible_rand(r, 10) == val
     end
 end
+
+# @spawn racying with sync_end
+
+hidden_spawn(f) = Threads.@spawn f()
+
+function sync_end_race()
+    y = Ref(:notset)
+    local t
+    @sync begin
+        for _ in 1:6  # tweaked to maximize `nerror` below
+            Threads.@spawn nothing
+        end
+        t = hidden_spawn() do
+            Threads.@spawn y[] = :completed
+        end
+    end
+    try
+        wait(t)
+    catch
+        return :notscheduled
+    end
+    return y[]
+end
+
+function check_sync_end_race()
+    @sync begin
+        done = Threads.Atomic{Bool}(false)
+        try
+            # `Threads.@spawn` must fail to be scheduled or complete its execution:
+            ncompleted = 0
+            nnotscheduled = 0
+            nerror = 0
+            for i in 1:1000
+                y = try
+                    yield()
+                    sync_end_race()
+                catch err
+                    if err isa CompositeException
+                        if err.exceptions[1] isa Base.ScheduledAfterSyncException
+                            nerror += 1
+                            continue
+                        end
+                    end
+                    rethrow()
+                end
+                y in (:completed, :notscheduled) || return (; i, y)
+                ncompleted += y === :completed
+                nnotscheduled += y === :notscheduled
+            end
+            # Useful for tuning the test:
+            @debug "`check_sync_end_race` done" nthreads() ncompleted nnotscheduled nerror
+        finally
+            done[] = true
+        end
+    end
+    return nothing
+end
+
+@testset "Racy `@spawn`" begin
+    @test check_sync_end_race() === nothing
+end
+
+# issue #41546, thread-safe package loading
+@testset "package loading" begin
+    ch = Channel{Bool}(nthreads())
+    barrier = Base.Event()
+    old_act_proj = Base.ACTIVE_PROJECT[]
+    try
+        pushfirst!(LOAD_PATH, "@")
+        Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
+        @sync begin
+            for _ in 1:nthreads()
+                Threads.@spawn begin
+                    put!(ch, true)
+                    wait(barrier)
+                    @eval using TestPkg
+                end
+            end
+            for _ in 1:nthreads()
+                take!(ch)
+            end
+            notify(barrier)
+        end
+        @test Base.root_module(@__MODULE__, :TestPkg) isa Module
+    finally
+        Base.ACTIVE_PROJECT[] = old_act_proj
+        popfirst!(LOAD_PATH)
+    end
+end
diff --git a/test/tuple.jl b/test/tuple.jl
index 9b44e421184d3..e7f0eb24ae100 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -283,7 +283,7 @@ end
     @test mapfoldl(abs, =>, (-1,-2,-3,-4), init=-10) == ((((-10=>1)=>2)=>3)=>4)
     @test mapfoldl(abs, =>, (), init=-10) == -10
     @test mapfoldl(abs, Pair{Any,Any}, (-30:-1...,)) == mapfoldl(abs, Pair{Any,Any}, [-30:-1...,])
-    @test_throws ArgumentError mapfoldl(abs, =>, ())
+    @test_throws "reducing over an empty collection" mapfoldl(abs, =>, ())
 end
 
 @testset "filter" begin
@@ -361,6 +361,24 @@ end
     @test prod(()) === 1
     @test prod((1,2,3)) === 6
 
+    # issue 39182
+    @test sum((0xe1, 0x1f)) === sum([0xe1, 0x1f])
+    @test sum((Int8(3),)) === Int(3)
+    @test sum((UInt8(3),)) === UInt(3)
+    @test sum((3,)) === Int(3)
+    @test sum((3.0,)) === 3.0
+    @test sum(("a",)) == sum(["a"])
+    @test sum((0xe1, 0x1f), init=0x0) == sum([0xe1, 0x1f], init=0x0)
+
+    # issue 39183
+    @test prod((Int8(100), Int8(100))) === 10000
+    @test prod((Int8(3),)) === Int(3)
+    @test prod((UInt8(3),)) === UInt(3)
+    @test prod((3,)) === Int(3)
+    @test prod((3.0,)) === 3.0
+    @test prod(("a",)) == prod(["a"])
+    @test prod((0xe1, 0x1f), init=0x1) == prod([0xe1, 0x1f], init=0x1)
+
     @testset "all" begin
         @test all(()) === true
         @test all((false,)) === false
@@ -506,6 +524,20 @@ end
         @test findnext(isequal(1), (1, 1), UInt(2)) isa Int
         @test findprev(isequal(1), (1, 1), UInt(1)) isa Int
     end
+
+    # recursive implementation should allow constant-folding for small tuples
+    @test Base.return_types() do
+        findfirst(==(2), (1.0,2,3f0))
+    end == Any[Int]
+    @test Base.return_types() do
+        findfirst(==(0), (1.0,2,3f0))
+    end == Any[Nothing]
+    @test Base.return_types() do
+        findlast(==(2), (1.0,2,3f0))
+    end == Any[Int]
+    @test Base.return_types() do
+        findlast(==(0), (1.0,2,3f0))
+    end == Any[Nothing]
 end
 
 @testset "properties" begin
@@ -639,3 +671,12 @@ end
 
 # https://github.com/JuliaLang/julia/issues/40814
 @test Base.return_types(NTuple{3,Int}, (Vector{Int},)) == Any[NTuple{3,Int}]
+
+# issue #42457
+f42457(a::NTuple{3,Int}, b::Tuple)::Bool = Base.isequal(a, Base.inferencebarrier(b)::Tuple)
+@test f42457((1, 1, 1), (1, 1, 1))
+@test !isempty(methods(Base._isequal, (NTuple{3, Int}, Tuple)))
+g42457(a, b) = Base.isequal(a, b) ? 1 : 2.0
+@test only(Base.return_types(g42457, (NTuple{3, Int}, Tuple))) === Union{Float64, Int}
+@test only(Base.return_types(g42457, (NTuple{3, Int}, NTuple))) === Union{Float64, Int}
+@test only(Base.return_types(g42457, (NTuple{3, Int}, NTuple{4}))) === Float64
diff --git a/test/vecelement.jl b/test/vecelement.jl
index 5652ea10d3aa6..6638f06f4f358 100644
--- a/test/vecelement.jl
+++ b/test/vecelement.jl
@@ -96,7 +96,7 @@ const _llvmtypes = Dict{DataType, String}(
     ret <$(N) x $(llvmT)> %3
     """
     return quote
-        Base.@_inline_meta
+        Base.@inline
         Core.getfield(Base, :llvmcall)($exp, Vec{$N, $T}, Tuple{Vec{$N, $T}, Vec{$N, $T}}, x, y)
     end
 end
diff --git a/test/worlds.jl b/test/worlds.jl
index 2b4f575e1905a..8a0c936d3df8d 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -107,8 +107,24 @@ end
 
 g265() = [f265(x) for x in 1:3.]
 wc265 = get_world_counter()
-f265(::Any) = 1.0
-@test wc265 + 1 == get_world_counter()
+wc265_41332a = Task(tls_world_age)
+@test tls_world_age() == wc265
+(function ()
+    global wc265_41332b = Task(tls_world_age)
+    @eval f265(::Any) = 1.0
+    global wc265_41332c = Base.invokelatest(Task, tls_world_age)
+    global wc265_41332d = Task(tls_world_age)
+    nothing
+end)()
+@test wc265 + 2 == get_world_counter() == tls_world_age()
+schedule(wc265_41332a)
+schedule(wc265_41332b)
+schedule(wc265_41332c)
+schedule(wc265_41332d)
+@test wc265 == fetch(wc265_41332a)
+@test wc265 + 1 == fetch(wc265_41332b)
+@test wc265 + 2 == fetch(wc265_41332c)
+@test wc265 + 1 == fetch(wc265_41332d)
 chnls, tasks = Base.channeled_tasks(2, wfunc)
 t265 = tasks[1]
 
@@ -136,7 +152,9 @@ f265(::Int) = 1
 
 # test for method errors
 h265() = true
-loc_h265 = "$(@__FILE__):$(@__LINE__() - 1)"
+file = @__FILE__
+Base.stacktrace_contract_userdir() && (file = Base.contractuser(file))
+loc_h265 = "$file:$(@__LINE__() - 3)"
 @test h265()
 @test_throws TaskFailedException(t265) put_n_take!(h265, ())
 @test_throws TaskFailedException(t265) fetch(t265)