diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp
index 0feda605ea66bf..50ff149e38776c 100644
--- a/clang-tools-extra/clang-tidy/ClangTidy.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp
@@ -529,24 +529,8 @@ runClangTidy(clang::tidy::ClangTidyContext &Context,
         return AdjustedArgs;
       };
 
-  // Remove plugins arguments.
-  ArgumentsAdjuster PluginArgumentsRemover =
-      [](const CommandLineArguments &Args, StringRef Filename) {
-        CommandLineArguments AdjustedArgs;
-        for (size_t I = 0, E = Args.size(); I < E; ++I) {
-          if (I + 4 < Args.size() && Args[I] == "-Xclang" &&
-              (Args[I + 1] == "-load" || Args[I + 1] == "-add-plugin" ||
-               StringRef(Args[I + 1]).startswith("-plugin-arg-")) &&
-              Args[I + 2] == "-Xclang") {
-            I += 3;
-          } else
-            AdjustedArgs.push_back(Args[I]);
-        }
-        return AdjustedArgs;
-      };
-
   Tool.appendArgumentsAdjuster(PerFileExtraArgumentsInserter);
-  Tool.appendArgumentsAdjuster(PluginArgumentsRemover);
+  Tool.appendArgumentsAdjuster(getStripPluginsAdjuster());
   Context.setEnableProfiling(EnableCheckProfile);
   Context.setProfileStoragePrefix(StoreCheckProfile);
 
diff --git a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp
index c0bdbfbfe04248..bac7f898785a01 100644
--- a/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp
@@ -23,8 +23,8 @@ AST_MATCHER(CXXRecordDecl, hasMethods) {
   return std::distance(Node.method_begin(), Node.method_end()) != 0;
 }
 
-AST_MATCHER(CXXRecordDecl, hasNonStaticMethod) {
-  return hasMethod(unless(isStaticStorageClass()))
+AST_MATCHER(CXXRecordDecl, hasNonStaticNonImplicitMethod) {
+  return hasMethod(unless(anyOf(isStaticStorageClass(), isImplicit())))
       .matches(Node, Finder, Builder);
 }
 
@@ -67,10 +67,11 @@ void NonPrivateMemberVariablesInClassesCheck::registerMatchers(
       IgnorePublicMemberVariables ? isProtected() : unless(isPrivate()));
 
   // We only want the records that not only contain the mutable data (non-static
-  // member variables), but also have some logic (non-static member functions).
-  // We may optionally ignore records where all the member variables are public.
+  // member variables), but also have some logic (non-static, non-implicit
+  // member functions).  We may optionally ignore records where all the member
+  // variables are public.
   Finder->addMatcher(cxxRecordDecl(anyOf(isStruct(), isClass()), hasMethods(),
-                                   hasNonStaticMethod(),
+                                   hasNonStaticNonImplicitMethod(),
                                    unless(ShouldIgnoreRecord),
                                    forEach(InterestingField.bind("field")))
                          .bind("record"),
diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp
index dd19483295e8db..6ef45ee855244b 100644
--- a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp
@@ -31,6 +31,12 @@ AST_MATCHER(clang::RecordDecl, isExternCContext) {
   return Node.isExternCContext();
 }
 
+AST_MATCHER(clang::ParmVarDecl, isArgvOfMain) {
+  const clang::DeclContext *DC = Node.getDeclContext();
+  const auto *FD = llvm::dyn_cast<clang::FunctionDecl>(DC);
+  return FD ? FD->isMain() : false;
+}
+
 } // namespace
 
 namespace clang {
@@ -44,7 +50,8 @@ void AvoidCArraysCheck::registerMatchers(MatchFinder *Finder) {
 
   Finder->addMatcher(
       typeLoc(hasValidBeginLoc(), hasType(arrayType()),
-              unless(anyOf(hasParent(varDecl(isExternC())),
+              unless(anyOf(hasParent(parmVarDecl(isArgvOfMain())),
+                           hasParent(varDecl(isExternC())),
                            hasParent(fieldDecl(
                                hasParent(recordDecl(isExternCContext())))),
                            hasAncestor(functionDecl(isExternC())))))
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 80792eddc69a9b..97a5e89c47837b 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -290,7 +290,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
   if (UseDirBasedCDB)
     BaseCDB = llvm::make_unique<DirectoryBasedGlobalCompilationDatabase>(
         CompileCommandsDir);
-  CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags);
+  CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags,
+              ClangdServerOpts.ResourceDir);
   Server.emplace(*CDB, FSProvider, static_cast<DiagnosticsConsumer &>(*this),
                  ClangdServerOpts);
   applyConfiguration(Params.initializationOptions.ConfigSettings);
diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 53a27bdb91d5f4..a4ab622e101ed6 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -38,11 +38,6 @@ namespace clang {
 namespace clangd {
 namespace {
 
-std::string getStandardResourceDir() {
-  static int Dummy; // Just an address in this process.
-  return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy);
-}
-
 class RefactoringResultCollector final
     : public tooling::RefactoringResultConsumer {
 public:
@@ -108,8 +103,6 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB,
                            DiagnosticsConsumer &DiagConsumer,
                            const Options &Opts)
     : CDB(CDB), FSProvider(FSProvider),
-      ResourceDir(Opts.ResourceDir ? *Opts.ResourceDir
-                                   : getStandardResourceDir()),
       DynamicIdx(Opts.BuildDynamicSymbolIndex
                      ? new FileIndex(Opts.HeavyweightDynamicSymbolIndex)
                      : nullptr),
@@ -137,7 +130,7 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB,
     AddIndex(Opts.StaticIndex);
   if (Opts.BackgroundIndex) {
     BackgroundIdx = llvm::make_unique<BackgroundIndex>(
-        Context::current().clone(), ResourceDir, FSProvider, CDB,
+        Context::current().clone(), FSProvider, CDB,
         BackgroundIndexStorage::createDiskBackedStorageFactory(),
         Opts.BackgroundIndexRebuildPeriodMs);
     AddIndex(BackgroundIdx.get());
@@ -462,10 +455,6 @@ tooling::CompileCommand ClangdServer::getCompileCommand(PathRef File) {
   llvm::Optional<tooling::CompileCommand> C = CDB.getCompileCommand(File);
   if (!C) // FIXME: Suppress diagnostics? Let the user know?
     C = CDB.getFallbackCommand(File);
-
-  // Inject the resource dir.
-  // FIXME: Don't overwrite it if it's already there.
-  C->CommandLine.push_back("-resource-dir=" + ResourceDir);
   return std::move(*C);
 }
 
diff --git a/clang-tools-extra/clangd/ExpectedTypes.cpp b/clang-tools-extra/clangd/ExpectedTypes.cpp
index 4bbf0651260b6e..59d9e149162a69 100644
--- a/clang-tools-extra/clangd/ExpectedTypes.cpp
+++ b/clang-tools-extra/clangd/ExpectedTypes.cpp
@@ -35,8 +35,10 @@ static llvm::Optional<QualType>
 typeOfCompletion(const CodeCompletionResult &R) {
   auto *VD = dyn_cast_or_null<ValueDecl>(R.Declaration);
   if (!VD)
-    return None; // We handle only variables and functions below.
+    return llvm::None; // We handle only variables and functions below.
   auto T = VD->getType();
+  if (T.isNull())
+    return llvm::None;
   if (auto FuncT = T->getAs<FunctionType>()) {
     // Functions are a special case. They are completed as 'foo()' and we want
     // to match their return type rather than the function type itself.
diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
index c2fff7b20f37bb..8c7aa194a1f8e2 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
@@ -9,12 +9,36 @@
 
 #include "GlobalCompilationDatabase.h"
 #include "Logger.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Tooling/ArgumentsAdjusters.h"
 #include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 
 namespace clang {
 namespace clangd {
+namespace {
+
+void adjustArguments(tooling::CompileCommand &Cmd,
+                     llvm::StringRef ResourceDir) {
+  // Strip plugin related command line arguments. Clangd does
+  // not support plugins currently. Therefore it breaks if
+  // compiler tries to load plugins.
+  Cmd.CommandLine =
+      tooling::getStripPluginsAdjuster()(Cmd.CommandLine, Cmd.Filename);
+  // Inject the resource dir.
+  // FIXME: Don't overwrite it if it's already there.
+  if (!ResourceDir.empty())
+    Cmd.CommandLine.push_back(("-resource-dir=" + ResourceDir).str());
+}
+
+std::string getStandardResourceDir() {
+  static int Dummy; // Just an address in this process.
+  return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy);
+}
+
+} // namespace
 
 static std::string getFallbackClangPath() {
   static int Dummy;
@@ -106,8 +130,11 @@ DirectoryBasedGlobalCompilationDatabase::getCDBForFile(
 }
 
 OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base,
-                       std::vector<std::string> FallbackFlags)
-    : Base(Base), FallbackFlags(std::move(FallbackFlags)) {
+                       std::vector<std::string> FallbackFlags,
+                       llvm::Optional<std::string> ResourceDir)
+    : Base(Base), ResourceDir(ResourceDir ? std::move(*ResourceDir)
+                                          : getStandardResourceDir()),
+      FallbackFlags(std::move(FallbackFlags)) {
   if (Base)
     BaseChanged = Base->watch([this](const std::vector<std::string> Changes) {
       OnCommandChanged.broadcast(Changes);
@@ -116,16 +143,22 @@ OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base,
 
 llvm::Optional<tooling::CompileCommand>
 OverlayCDB::getCompileCommand(PathRef File, ProjectInfo *Project) const {
+  llvm::Optional<tooling::CompileCommand> Cmd;
   {
     std::lock_guard<std::mutex> Lock(Mutex);
     auto It = Commands.find(File);
     if (It != Commands.end()) {
       if (Project)
         Project->SourceRoot = "";
-      return It->second;
+      Cmd = It->second;
     }
   }
-  return Base ? Base->getCompileCommand(File, Project) : None;
+  if (!Cmd && Base)
+    Cmd = Base->getCompileCommand(File, Project);
+  if (!Cmd)
+    return llvm::None;
+  adjustArguments(*Cmd, ResourceDir);
+  return Cmd;
 }
 
 tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const {
diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h
index 181b1781f73b35..6411fd48ced006 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h
@@ -12,6 +12,7 @@
 
 #include "Function.h"
 #include "Path.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringMap.h"
 #include <memory>
 #include <mutex>
@@ -98,7 +99,8 @@ class OverlayCDB : public GlobalCompilationDatabase {
   // Base may be null, in which case no entries are inherited.
   // FallbackFlags are added to the fallback compile command.
   OverlayCDB(const GlobalCompilationDatabase *Base,
-             std::vector<std::string> FallbackFlags = {});
+             std::vector<std::string> FallbackFlags = {},
+             llvm::Optional<std::string> ResourceDir = llvm::None);
 
   llvm::Optional<tooling::CompileCommand>
   getCompileCommand(PathRef File, ProjectInfo * = nullptr) const override;
@@ -113,6 +115,7 @@ class OverlayCDB : public GlobalCompilationDatabase {
   mutable std::mutex Mutex;
   llvm::StringMap<tooling::CompileCommand> Commands; /* GUARDED_BY(Mut) */
   const GlobalCompilationDatabase *Base;
+  std::string ResourceDir;
   std::vector<std::string> FallbackFlags;
   CommandChanged::Subscription BaseChanged;
 };
diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index b9b945c52bdaf4..752d5c2876e000 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -127,13 +127,12 @@ llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
 } // namespace
 
 BackgroundIndex::BackgroundIndex(
-    Context BackgroundContext, llvm::StringRef ResourceDir,
-    const FileSystemProvider &FSProvider, const GlobalCompilationDatabase &CDB,
+    Context BackgroundContext, const FileSystemProvider &FSProvider,
+    const GlobalCompilationDatabase &CDB,
     BackgroundIndexStorage::Factory IndexStorageFactory,
     size_t BuildIndexPeriodMs, size_t ThreadPoolSize)
-    : SwapIndex(llvm::make_unique<MemIndex>()), ResourceDir(ResourceDir),
-      FSProvider(FSProvider), CDB(CDB),
-      BackgroundContext(std::move(BackgroundContext)),
+    : SwapIndex(llvm::make_unique<MemIndex>()), FSProvider(FSProvider),
+      CDB(CDB), BackgroundContext(std::move(BackgroundContext)),
       BuildIndexPeriodMs(BuildIndexPeriodMs),
       SymbolsUpdatedSinceLastIndex(false),
       IndexStorageFactory(std::move(IndexStorageFactory)),
@@ -230,7 +229,6 @@ void BackgroundIndex::enqueue(tooling::CompileCommand Cmd,
                               BackgroundIndexStorage *Storage) {
   enqueueTask(Bind(
                   [this, Storage](tooling::CompileCommand Cmd) {
-                    Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
                     // We can't use llvm::StringRef here since we are going to
                     // move from Cmd during the call below.
                     const std::string FileName = Cmd.Filename;
diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h
index 1a1fee68a571ef..81675be55b5a05 100644
--- a/clang-tools-extra/clangd/index/Background.h
+++ b/clang-tools-extra/clangd/index/Background.h
@@ -68,9 +68,7 @@ class BackgroundIndex : public SwapIndex {
   /// If BuildIndexPeriodMs is greater than 0, the symbol index will only be
   /// rebuilt periodically (one per \p BuildIndexPeriodMs); otherwise, index is
   /// rebuilt for each indexed file.
-  // FIXME: resource-dir injection should be hoisted somewhere common.
-  BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir,
-                  const FileSystemProvider &,
+  BackgroundIndex(Context BackgroundContext, const FileSystemProvider &,
                   const GlobalCompilationDatabase &CDB,
                   BackgroundIndexStorage::Factory IndexStorageFactory,
                   size_t BuildIndexPeriodMs = 0,
@@ -99,7 +97,6 @@ class BackgroundIndex : public SwapIndex {
               BackgroundIndexStorage *IndexStorage);
 
   // configuration
-  std::string ResourceDir;
   const FileSystemProvider &FSProvider;
   const GlobalCompilationDatabase &CDB;
   Context BackgroundContext;
diff --git a/clang-tools-extra/docs/clang-tidy/Contributing.rst b/clang-tools-extra/docs/clang-tidy/Contributing.rst
new file mode 100644
index 00000000000000..6d61809ecb337a
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/Contributing.rst
@@ -0,0 +1,512 @@
+================
+Getting Involved
+================
+
+:program:`clang-tidy` has several own checks and can run Clang static analyzer
+checks, but its power is in the ability to easily write custom checks.
+
+Checks are organized in modules, which can be linked into :program:`clang-tidy`
+with minimal or no code changes in :program:`clang-tidy`.
+
+Checks can plug into the analysis on the preprocessor level using `PPCallbacks`_
+or on the AST level using `AST Matchers`_. When an error is found, checks can
+report them in a way similar to how Clang diagnostics work. A fix-it hint can be
+attached to a diagnostic message.
+
+The interface provided by :program:`clang-tidy` makes it easy to write useful
+and precise checks in just a few lines of code. If you have an idea for a good
+check, the rest of this document explains how to do this.
+
+There are a few tools particularly useful when developing clang-tidy checks:
+  * ``add_new_check.py`` is a script to automate the process of adding a new
+    check, it will create the check, update the CMake file and create a test;
+  * ``rename_check.py`` does what the script name suggests, renames an existing
+    check;
+  * :program:`clang-query` is invaluable for interactive prototyping of AST
+    matchers and exploration of the Clang AST;
+  * `clang-check`_ with the ``-ast-dump`` (and optionally ``-ast-dump-filter``)
+    provides a convenient way to dump AST of a C++ program.
+
+If CMake is configured with ``CLANG_ENABLE_STATIC_ANALYZER``,
+:program:`clang-tidy` will not be built with support for the
+``clang-analyzer-*`` checks or the ``mpi-*`` checks.
+
+
+.. _AST Matchers: https://clang.llvm.org/docs/LibASTMatchers.html
+.. _PPCallbacks: https://clang.llvm.org/doxygen/classclang_1_1PPCallbacks.html
+.. _clang-check: https://clang.llvm.org/docs/ClangCheck.html
+
+
+Choosing the Right Place for your Check
+---------------------------------------
+
+If you have an idea of a check, you should decide whether it should be
+implemented as a:
+
++ *Clang diagnostic*: if the check is generic enough, targets code patterns that
+  most probably are bugs (rather than style or readability issues), can be
+  implemented effectively and with extremely low false positive rate, it may
+  make a good Clang diagnostic.
+
++ *Clang static analyzer check*: if the check requires some sort of control flow
+  analysis, it should probably be implemented as a static analyzer check.
+
++ *clang-tidy check* is a good choice for linter-style checks, checks that are
+  related to a certain coding style, checks that address code readability, etc.
+
+
+Preparing your Workspace
+------------------------
+
+If you are new to LLVM development, you should read the `Getting Started with
+the LLVM System`_, `Using Clang Tools`_ and `How To Setup Clang Tooling For
+LLVM`_ documents to check out and build LLVM, Clang and Clang Extra Tools with
+CMake.
+
+Once you are done, change to the ``llvm/tools/clang/tools/extra`` directory, and
+let's start!
+
+.. _Getting Started with the LLVM System: https://llvm.org/docs/GettingStarted.html
+.. _Using Clang Tools: https://clang.llvm.org/docs/ClangTools.html
+.. _How To Setup Clang Tooling For LLVM: https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
+
+
+The Directory Structure
+-----------------------
+
+:program:`clang-tidy` source code resides in the
+``llvm/tools/clang/tools/extra`` directory and is structured as follows:
+
+::
+
+  clang-tidy/                       # Clang-tidy core.
+  |-- ClangTidy.h                   # Interfaces for users and checks.
+  |-- ClangTidyModule.h             # Interface for clang-tidy modules.
+  |-- ClangTidyModuleRegistry.h     # Interface for registering of modules.
+     ...
+  |-- google/                       # Google clang-tidy module.
+  |-+
+    |-- GoogleTidyModule.cpp
+    |-- GoogleTidyModule.h
+          ...
+  |-- llvm/                         # LLVM clang-tidy module.
+  |-+
+    |-- LLVMTidyModule.cpp
+    |-- LLVMTidyModule.h
+          ...
+  |-- objc/                         # Objective-C clang-tidy module.
+  |-+
+    |-- ObjCTidyModule.cpp
+    |-- ObjCTidyModule.h
+          ...
+  |-- tool/                         # Sources of the clang-tidy binary.
+          ...
+  test/clang-tidy/                  # Integration tests.
+      ...
+  unittests/clang-tidy/             # Unit tests.
+  |-- ClangTidyTest.h
+  |-- GoogleModuleTest.cpp
+  |-- LLVMModuleTest.cpp
+  |-- ObjCModuleTest.cpp
+      ...
+
+
+Writing a clang-tidy Check
+--------------------------
+
+So you have an idea of a useful check for :program:`clang-tidy`.
+
+First, if you're not familiar with LLVM development, read through the `Getting
+Started with LLVM`_ document for instructions on setting up your workflow and
+the `LLVM Coding Standards`_ document to familiarize yourself with the coding
+style used in the project. For code reviews we mostly use `LLVM Phabricator`_.
+
+.. _Getting Started with LLVM: https://llvm.org/docs/GettingStarted.html
+.. _LLVM Coding Standards: https://llvm.org/docs/CodingStandards.html
+.. _LLVM Phabricator: https://llvm.org/docs/Phabricator.html
+
+Next, you need to decide which module the check belongs to. Modules
+are located in subdirectories of `clang-tidy/
+<https://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/clang-tidy/>`_
+and contain checks targeting a certain aspect of code quality (performance,
+readability, etc.), certain coding style or standard (Google, LLVM, CERT, etc.)
+or a widely used API (e.g. MPI). Their names are same as user-facing check
+groups names described :ref:`above <checks-groups-table>`.
+
+After choosing the module and the name for the check, run the
+``clang-tidy/add_new_check.py`` script to create the skeleton of the check and
+plug it to :program:`clang-tidy`. It's the recommended way of adding new checks.
+
+If we want to create a `readability-awesome-function-names`, we would run:
+
+.. code-block:: console
+
+  $ clang-tidy/add_new_check.py readability awesome-function-names
+
+
+The ``add_new_check.py`` script will:
+  * create the class for your check inside the specified module's directory and
+    register it in the module and in the build system;
+  * create a lit test file in the ``test/clang-tidy/`` directory;
+  * create a documentation file and include it into the
+    ``docs/clang-tidy/checks/list.rst``.
+
+Let's see in more detail at the check class definition:
+
+.. code-block:: c++
+
+  ...
+
+  #include "../ClangTidy.h"
+
+  namespace clang {
+  namespace tidy {
+  namespace readability {
+
+  ...
+  class AwesomeFunctionNamesCheck : public ClangTidyCheck {
+  public:
+    AwesomeFunctionNamesCheck(StringRef Name, ClangTidyContext *Context)
+        : ClangTidyCheck(Name, Context) {}
+    void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+    void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  };
+
+  } // namespace readability
+  } // namespace tidy
+  } // namespace clang
+
+  ...
+
+Constructor of the check receives the ``Name`` and ``Context`` parameters, and
+must forward them to the ``ClangTidyCheck`` constructor.
+
+In our case the check needs to operate on the AST level and it overrides the
+``registerMatchers`` and ``check`` methods. If we wanted to analyze code on the
+preprocessor level, we'd need instead to override the ``registerPPCallbacks``
+method.
+
+In the ``registerMatchers`` method we create an AST Matcher (see `AST Matchers`_
+for more information) that will find the pattern in the AST that we want to
+inspect. The results of the matching are passed to the ``check`` method, which
+can further inspect them and report diagnostics.
+
+.. code-block:: c++
+
+  using namespace ast_matchers;
+
+  void AwesomeFunctionNamesCheck::registerMatchers(MatchFinder *Finder) {
+    Finder->addMatcher(functionDecl().bind("x"), this);
+  }
+
+  void AwesomeFunctionNamesCheck::check(const MatchFinder::MatchResult &Result) {
+    const auto *MatchedDecl = Result.Nodes.getNodeAs<FunctionDecl>("x");
+    if (MatchedDecl->getName().startswith("awesome_"))
+      return;
+    diag(MatchedDecl->getLocation(), "function %0 is insufficiently awesome")
+        << MatchedDecl
+        << FixItHint::CreateInsertion(MatchedDecl->getLocation(), "awesome_");
+  }
+
+(If you want to see an example of a useful check, look at
+`clang-tidy/google/ExplicitConstructorCheck.h
+<https://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/clang-tidy/google/ExplicitConstructorCheck.h>`_
+and `clang-tidy/google/ExplicitConstructorCheck.cpp
+<https://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/clang-tidy/google/ExplicitConstructorCheck.cpp>`_).
+
+
+Registering your Check
+----------------------
+
+(The ``add_new_check.py`` takes care of registering the check in an existing
+module. If you want to create a new module or know the details, read on.)
+
+The check should be registered in the corresponding module with a distinct name:
+
+.. code-block:: c++
+
+  class MyModule : public ClangTidyModule {
+   public:
+    void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
+      CheckFactories.registerCheck<ExplicitConstructorCheck>(
+          "my-explicit-constructor");
+    }
+  };
+
+Now we need to register the module in the ``ClangTidyModuleRegistry`` using a
+statically initialized variable:
+
+.. code-block:: c++
+
+  static ClangTidyModuleRegistry::Add<MyModule> X("my-module",
+                                                  "Adds my lint checks.");
+
+
+When using LLVM build system, we need to use the following hack to ensure the
+module is linked into the :program:`clang-tidy` binary:
+
+Add this near the ``ClangTidyModuleRegistry::Add<MyModule>`` variable:
+
+.. code-block:: c++
+
+  // This anchor is used to force the linker to link in the generated object file
+  // and thus register the MyModule.
+  volatile int MyModuleAnchorSource = 0;
+
+And this to the main translation unit of the :program:`clang-tidy` binary (or
+the binary you link the ``clang-tidy`` library in)
+``clang-tidy/tool/ClangTidyMain.cpp``:
+
+.. code-block:: c++
+
+  // This anchor is used to force the linker to link the MyModule.
+  extern volatile int MyModuleAnchorSource;
+  static int MyModuleAnchorDestination = MyModuleAnchorSource;
+
+
+Configuring Checks
+------------------
+
+If a check needs configuration options, it can access check-specific options
+using the ``Options.get<Type>("SomeOption", DefaultValue)`` call in the check
+constructor. In this case the check should also override the
+``ClangTidyCheck::storeOptions`` method to make the options provided by the
+check discoverable. This method lets :program:`clang-tidy` know which options
+the check implements and what the current values are (e.g. for the
+``-dump-config`` command line option).
+
+.. code-block:: c++
+
+  class MyCheck : public ClangTidyCheck {
+    const unsigned SomeOption1;
+    const std::string SomeOption2;
+
+  public:
+    MyCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context),
+        SomeOption(Options.get("SomeOption1", -1U)),
+        SomeOption(Options.get("SomeOption2", "some default")) {}
+
+    void storeOptions(ClangTidyOptions::OptionMap &Opts) override {
+      Options.store(Opts, "SomeOption1", SomeOption1);
+      Options.store(Opts, "SomeOption2", SomeOption2);
+    }
+    ...
+
+Assuming the check is registered with the name "my-check", the option can then
+be set in a ``.clang-tidy`` file in the following way:
+
+.. code-block:: yaml
+
+  CheckOptions:
+    - key: my-check.SomeOption1
+      value: 123
+    - key: my-check.SomeOption2
+      value: 'some other value'
+
+If you need to specify check options on a command line, you can use the inline
+YAML format:
+
+.. code-block:: console
+
+  $ clang-tidy -config="{CheckOptions: [{key: a, value: b}, {key: x, value: y}]}" ...
+
+
+Testing Checks
+--------------
+
+To run tests for :program:`clang-tidy` use the command:
+
+.. code-block:: console
+
+  $ ninja check-clang-tools
+
+:program:`clang-tidy` checks can be tested using either unit tests or
+`lit`_ tests. Unit tests may be more convenient to test complex replacements
+with strict checks. `Lit`_ tests allow using partial text matching and regular
+expressions which makes them more suitable for writing compact tests for
+diagnostic messages.
+
+The ``check_clang_tidy.py`` script provides an easy way to test both
+diagnostic messages and fix-its. It filters out ``CHECK`` lines from the test
+file, runs :program:`clang-tidy` and verifies messages and fixes with two
+separate `FileCheck`_ invocations: once with FileCheck's directive
+prefix set to ``CHECK-MESSAGES``, validating the diagnostic messages,
+and once with the directive prefix set to ``CHECK-FIXES``, running
+against the fixed code (i.e., the code after generated fix-its are
+applied). In particular, ``CHECK-FIXES:`` can be used to check
+that code was not modified by fix-its, by checking that it is present
+unchanged in the fixed code. The full set of `FileCheck`_ directives
+is available (e.g., ``CHECK-MESSAGES-SAME:``, ``CHECK-MESSAGES-NOT:``), though
+typically the basic ``CHECK`` forms (``CHECK-MESSAGES`` and ``CHECK-FIXES``)
+are sufficient for clang-tidy tests. Note that the `FileCheck`_
+documentation mostly assumes the default prefix (``CHECK``), and hence
+describes the directive as ``CHECK:``, ``CHECK-SAME:``, ``CHECK-NOT:``, etc.
+Replace ``CHECK`` by either ``CHECK-FIXES`` or ``CHECK-MESSAGES`` for
+clang-tidy tests.
+
+An additional check enabled by ``check_clang_tidy.py`` ensures that
+if `CHECK-MESSAGES:` is used in a file then every warning or error
+must have an associated CHECK in that file. Or, you can use ``CHECK-NOTES:``
+instead, if you want to **also** ensure that all the notes are checked.
+
+To use the ``check_clang_tidy.py`` script, put a .cpp file with the
+appropriate ``RUN`` line in the ``test/clang-tidy`` directory. Use
+``CHECK-MESSAGES:`` and ``CHECK-FIXES:`` lines to write checks against
+diagnostic messages and fixed code.
+
+It's advised to make the checks as specific as possible to avoid checks matching
+to incorrect parts of the input. Use ``[[@LINE+X]]``/``[[@LINE-X]]``
+substitutions and distinct function and variable names in the test code.
+
+Here's an example of a test using the ``check_clang_tidy.py`` script (the full
+source code is at `test/clang-tidy/google-readability-casting.cpp`_):
+
+.. code-block:: c++
+
+  // RUN: %check_clang_tidy %s google-readability-casting %t
+
+  void f(int a) {
+    int b = (int)a;
+    // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: redundant cast to the same type [google-readability-casting]
+    // CHECK-FIXES: int b = a;
+  }
+
+To check more than one scenario in the same test file use
+``-check-suffix=SUFFIX-NAME`` on ``check_clang_tidy.py`` command line or
+``-check-suffixes=SUFFIX-NAME-1,SUFFIX-NAME-2,...``.
+With ``-check-suffix[es]=SUFFIX-NAME`` you need to replace your ``CHECK-*``
+directives with ``CHECK-MESSAGES-SUFFIX-NAME`` and ``CHECK-FIXES-SUFFIX-NAME``.
+
+Here's an example:
+
+.. code-block:: c++
+
+   // RUN: %check_clang_tidy -check-suffix=USING-A %s misc-unused-using-decls %t -- -- -DUSING_A
+   // RUN: %check_clang_tidy -check-suffix=USING-B %s misc-unused-using-decls %t -- -- -DUSING_B
+   // RUN: %check_clang_tidy %s misc-unused-using-decls %t
+   ...
+   // CHECK-MESSAGES-USING-A: :[[@LINE-8]]:10: warning: using decl 'A' {{.*}}
+   // CHECK-MESSAGES-USING-B: :[[@LINE-7]]:10: warning: using decl 'B' {{.*}}
+   // CHECK-MESSAGES: :[[@LINE-6]]:10: warning: using decl 'C' {{.*}}
+   // CHECK-FIXES-USING-A-NOT: using a::A;$
+   // CHECK-FIXES-USING-B-NOT: using a::B;$
+   // CHECK-FIXES-NOT: using a::C;$
+
+
+There are many dark corners in the C++ language, and it may be difficult to make
+your check work perfectly in all cases, especially if it issues fix-it hints. The
+most frequent pitfalls are macros and templates:
+
+1. code written in a macro body/template definition may have a different meaning
+   depending on the macro expansion/template instantiation;
+2. multiple macro expansions/template instantiations may result in the same code
+   being inspected by the check multiple times (possibly, with different
+   meanings, see 1), and the same warning (or a slightly different one) may be
+   issued by the check multiple times; :program:`clang-tidy` will deduplicate
+   _identical_ warnings, but if the warnings are slightly different, all of them
+   will be shown to the user (and used for applying fixes, if any);
+3. making replacements to a macro body/template definition may be fine for some
+   macro expansions/template instantiations, but easily break some other
+   expansions/instantiations.
+
+.. _lit: https://llvm.org/docs/CommandGuide/lit.html
+.. _FileCheck: https://llvm.org/docs/CommandGuide/FileCheck.html
+.. _test/clang-tidy/google-readability-casting.cpp: https://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/test/clang-tidy/google-readability-casting.cpp
+
+
+Running clang-tidy on LLVM
+--------------------------
+
+To test a check it's best to try it out on a larger code base. LLVM and Clang
+are the natural targets as you already have the source code around. The most
+convenient way to run :program:`clang-tidy` is with a compile command database;
+CMake can automatically generate one, for a description of how to enable it see
+`How To Setup Clang Tooling For LLVM`_. Once ``compile_commands.json`` is in
+place and a working version of :program:`clang-tidy` is in ``PATH`` the entire
+code base can be analyzed with ``clang-tidy/tool/run-clang-tidy.py``. The script
+executes :program:`clang-tidy` with the default set of checks on every
+translation unit in the compile command database and displays the resulting
+warnings and errors. The script provides multiple configuration flags.
+
+.. _How To Setup Clang Tooling For LLVM: https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
+
+
+* The default set of checks can be overridden using the ``-checks`` argument,
+  taking the identical format as :program:`clang-tidy` does. For example
+  ``-checks=-*,modernize-use-override`` will run the ``modernize-use-override``
+  check only.
+
+* To restrict the files examined you can provide one or more regex arguments
+  that the file names are matched against.
+  ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze clang-tidy
+  checks. It may also be necessary to restrict the header files warnings are
+  displayed from using the ``-header-filter`` flag. It has the same behavior
+  as the corresponding :program:`clang-tidy` flag.
+
+* To apply suggested fixes ``-fix`` can be passed as an argument. This gathers
+  all changes in a temporary directory and applies them. Passing ``-format``
+  will run clang-format over changed lines.
+
+
+On checks profiling
+-------------------
+
+:program:`clang-tidy` can collect per-check profiling info, and output it
+for each processed source file (translation unit).
+
+To enable profiling info collection, use the ``-enable-check-profile`` argument.
+The timings will be output to ``stderr`` as a table. Example output:
+
+.. code-block:: console
+
+  $ clang-tidy -enable-check-profile -checks=-*,readability-function-size source.cpp
+  ===-------------------------------------------------------------------------===
+                            clang-tidy checks profiling
+  ===-------------------------------------------------------------------------===
+    Total Execution Time: 1.0282 seconds (1.0258 wall clock)
+
+     ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Name ---
+     0.9136 (100.0%)   0.1146 (100.0%)   1.0282 (100.0%)   1.0258 (100.0%)  readability-function-size
+     0.9136 (100.0%)   0.1146 (100.0%)   1.0282 (100.0%)   1.0258 (100.0%)  Total
+
+It can also store that data as JSON files for further processing. Example output:
+
+.. code-block:: console
+
+  $ clang-tidy -enable-check-profile -store-check-profile=.  -checks=-*,readability-function-size source.cpp
+  $ # Note that there won't be timings table printed to the console.
+  $ ls /tmp/out/
+  20180516161318717446360-source.cpp.json
+  $ cat 20180516161318717446360-source.cpp.json
+  {
+  "file": "/path/to/source.cpp",
+  "timestamp": "2018-05-16 16:13:18.717446360",
+  "profile": {
+    "time.clang-tidy.readability-function-size.wall": 1.0421266555786133e+00,
+    "time.clang-tidy.readability-function-size.user": 9.2088400000005421e-01,
+    "time.clang-tidy.readability-function-size.sys": 1.2418899999999974e-01
+  }
+  }
+
+There is only one argument that controls profile storage:
+
+* ``-store-check-profile=<prefix>``
+
+  By default reports are printed in tabulated format to stderr. When this option
+  is passed, these per-TU profiles are instead stored as JSON.
+  If the prefix is not an absolute path, it is considered to be relative to the
+  directory from where you have run :program:`clang-tidy`. All ``.`` and ``..``
+  patterns in the path are collapsed, and symlinks are resolved.
+
+  Example:
+  Let's suppose you have a source file named ``example.cpp``, located in the
+  ``/source`` directory. Only the input filename is used, not the full path
+  to the source file. Additionally, it is prefixed with the current timestamp.
+
+  * If you specify ``-store-check-profile=/tmp``, then the profile will be saved
+    to ``/tmp/<ISO8601-like timestamp>-example.cpp.json``
+
+  * If you run :program:`clang-tidy` from within ``/foo`` directory, and specify
+    ``-store-check-profile=.``, then the profile will still be saved to
+    ``/foo/<ISO8601-like timestamp>-example.cpp.json``
diff --git a/clang-tools-extra/docs/clang-tidy/Integrations.rst b/clang-tools-extra/docs/clang-tidy/Integrations.rst
new file mode 100644
index 00000000000000..2d1e195645f5df
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/Integrations.rst
@@ -0,0 +1,117 @@
+==================================
+Clang-tidy IDE/Editor Integrations
+==================================
+
+.. _Clangd: https://clang.llvm.org/extra/clangd.html
+
+Apart from being a standalone tool, :program:`clang-tidy` is integrated into
+various IDEs, code analyzers, and editors. Besides, it is currently being
+integrated into Clangd_. The following table shows the most
+well-known :program:`clang-tidy` integrations in detail.
+
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|                                      |        Feature                                                                                                                                           |
++======================================+========================+=================================+==========================+=========================================+==========================+
+|  **Tool**                            | On-the-fly inspection  | Check list configuration (GUI)  | Options to checks (GUI)  | Configuration via ``.clang-tidy`` files | Custom clang-tidy binary |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|A.L.E. for Vim                        |         \+\            |               \-\               |           \-\            |                 \-\                     |           \+\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|Clang Power Tools for Visual Studio   |         \-\            |               \+\               |           \-\            |                 \+\                     |           \-\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|Clangd                                |         \+\            |               \-\               |           \-\            |                 \-\                     |           \-\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|CLion IDE                             |         \+\            |               \+\               |           \+\            |                 \+\                     |           \+\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|CodeChecker                           |         \-\            |               \-\               |           \-\            |                 \-\                     |           \+\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|CPPCheck                              |         \-\            |               \-\               |           \-\            |                 \-\                     |           \-\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|CPPDepend                             |         \-\            |               \-\               |           \-\            |                 \-\                     |           \-\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|Flycheck for Emacs                    |         \+\            |               \-\               |           \-\            |                 \+\                     |           \+\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|KDevelop IDE                          |         \-\            |               \+\               |           \+\            |                 \+\                     |           \+\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|Qt Creator IDE                        |         \+\            |               \+\               |           \-\            |                 \-\                     |           \+\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|ReSharper C++ for Visual Studio       |         \+\            |               \+\               |           \-\            |                 \+\                     |           \-\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|Syntastic for Vim                     |         \+\            |               \-\               |           \-\            |                 \-\                     |           \+\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+|Visual Assist for Visual Studio       |         \+\            |               \+\               |           \-\            |                 \-\                     |           \-\            |
++--------------------------------------+------------------------+---------------------------------+--------------------------+-----------------------------------------+--------------------------+
+
+**IDEs**
+
+.. _CLion: https://www.jetbrains.com/clion/
+.. _integrates clang-tidy: https://www.jetbrains.com/help/clion/clang-tidy-checks-support.html
+
+CLion_ 2017.2 and later `integrates clang-tidy`_ as an extension to the
+built-in code analyzer. Starting from 2018.2 EAP, CLion allows using
+:program:`clang-tidy` via Clangd. Inspections and applicable quick-fixes are
+performed on the fly, and checks can be configured in standard command line
+format. In this integration, you can switch to the :program:`clang-tidy`
+binary different from the bundled one, pass the configuration in
+``.clang-tidy`` files instead of using the IDE settings, and configure
+options for particular checks.
+
+.. _KDevelop: https://www.kdevelop.org/
+.. _kdev-clang-tidy: https://github.com/KDE/kdev-clang-tidy/
+
+KDevelop_ with the kdev-clang-tidy_ plugin, starting from version 5.1, performs
+static analysis using :program:`clang-tidy`. The plugin launches the
+:program:`clang-tidy` binary from the specified location and parses its
+output to provide a list of issues.
+
+.. _QtCreator: https://www.qt.io/
+.. _Clang Code Model: http://doc.qt.io/qtcreator/creator-clang-codemodel.html
+
+QtCreator_ 4.6 integrates :program:`clang-tidy` warnings into the editor
+diagnostics under the `Clang Code Model`_. To employ :program:`clang-tidy`
+inspection in QtCreator, you need to create a copy of one of the presets and
+choose the checks to be performed in the Clang Code Model Warnings menu.
+
+.. _MS Visual Studio: https://visualstudio.microsoft.com/
+.. _ReSharper C++: https://www.jetbrains.com/help/resharper/Clang_Tidy_Integration.html
+.. _Visual Assist: https://docs.wholetomato.com/default.asp?W761
+.. _Clang Power Tools: https://marketplace.visualstudio.com/items?itemName=caphyon.ClangPowerTools
+.. _clang-tidy-vs: https://github.com/llvm-mirror/clang-tools-extra/tree/master/clang-tidy-vs
+
+`MS Visual Studio`_ has a native clang-tidy-vs_ plugin and also can integrate
+:program:`clang-tidy` by means of three other tools. The `ReSharper C++`_
+extension, version 2017.3 and later, provides seamless :program:`clang-tidy`
+integration: checks and quick-fixes run alongside native inspections. Apart
+from that, ReSharper C++ incorporates :program:`clang-tidy` as a separate
+step of its code clean-up process. `Visual Assist`_ build 2210 includes a
+subset of :program:`clang-tidy` checklist to inspect the code as you edit.
+Another way to bring :program:`clang-tidy` functionality to Visual Studio is
+the `Clang Power Tools`_ plugin, which includes most of the
+:program:`clang-tidy` checks and runs them during compilation or as a separate
+step of code analysis.
+
+**Editors**
+
+.. _Flycheck: https://github.com/ch1bo/flycheck-clang-tidy
+.. _Syntastic: https://github.com/vim-syntastic/syntastic
+.. _A.L.E.: https://github.com/w0rp/ale
+.. _Emacs24: https://www.gnu.org/s/emacs/
+.. _Vim: https://www.vim.org/
+
+Emacs24_, when expanded with the Flycheck_ plugin, incorporates the
+:program:`clang-tidy` inspection into the syntax analyzer. For Vim_, you can
+use Syntastic_, which includes :program:`clang-tidy`, or `A.L.E.`_,
+a lint engine that applies :program:`clang-tidy` along with other linters.
+
+**Analyzers**
+
+.. _CPPDepend: https://www.cppdepend.com/cppdependv2018
+.. _CPPCheck: https://sourceforge.net/p/cppcheck/news/
+.. _CodeChecker: https://github.com/Ericsson/codechecker
+.. _plugin: https://github.com/Ericsson/CodeCheckerEclipsePlugin
+
+:program:`clang-tidy` is integrated in CPPDepend_ starting from version 2018.1
+and CPPCheck_ 1.82. CPPCheck integration lets you import Visual Studio
+solutions and run the :program:`clang-tidy` inspection on them. The
+CodeChecker_ application of version 5.3 or later, which also comes as a plugin_
+for Eclipse, supports :program:`clang-tidy` as a static analysis instrument and
+allows to use a custom :program:`clang-tidy` binary.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst
index db88c9b1cffd81..57990622e60cdd 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/misc-non-private-member-variables-in-classes.rst
@@ -6,11 +6,11 @@ misc-non-private-member-variables-in-classes
 `cppcoreguidelines-non-private-member-variables-in-classes` redirects here
 as an alias for this check.
 
-Finds classes that contain non-static data members in addition to non-static
-member functions and diagnose all data members declared with a non-``public``
-access specifier. The data members should be declared as ``private`` and
-accessed through member functions instead of exposed to derived classes or
-class consumers.
+Finds classes that contain non-static data members in addition to user-declared
+non-static member functions and diagnose all data members declared with a
+non-``public`` access specifier. The data members should be declared as
+``private`` and accessed through member functions instead of exposed to derived
+classes or class consumers.
 
 Options
 -------
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst
index 8f856a524b2d51..d7bc7474e27f87 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-c-arrays.rst
@@ -54,3 +54,7 @@ such headers between C code, and C++ code.
   }
 
   }
+
+Similarly, the ``main()`` function is ignored. Its second and third parameters
+can be either ``char* argv[]`` or ``char** argv``, but can not be
+``std::array<>``.
diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst
index 20b18b4bf5989a..4172d13487c53c 100644
--- a/clang-tools-extra/docs/clang-tidy/index.rst
+++ b/clang-tools-extra/docs/clang-tidy/index.rst
@@ -10,6 +10,8 @@ See also:
    :maxdepth: 1
 
    The list of clang-tidy checks <checks/list>
+   Clang-tidy IDE/Editor Integrations <Integrations>
+   Getting Involved <Contributing>
 
 :program:`clang-tidy` is a clang-based C++ "linter" tool. Its purpose is to
 provide an extensible framework for diagnosing and fixing typical programming
@@ -310,511 +312,3 @@ the parenthesis) whitespaces can be used and will be ignored.
 
 .. _LibTooling: http://clang.llvm.org/docs/LibTooling.html
 .. _How To Setup Tooling For LLVM: http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
-
-
-Getting Involved
-================
-
-:program:`clang-tidy` has several own checks and can run Clang static analyzer
-checks, but its power is in the ability to easily write custom checks.
-
-Checks are organized in modules, which can be linked into :program:`clang-tidy`
-with minimal or no code changes in :program:`clang-tidy`.
-
-Checks can plug into the analysis on the preprocessor level using `PPCallbacks`_
-or on the AST level using `AST Matchers`_. When an error is found, checks can
-report them in a way similar to how Clang diagnostics work. A fix-it hint can be
-attached to a diagnostic message.
-
-The interface provided by :program:`clang-tidy` makes it easy to write useful
-and precise checks in just a few lines of code. If you have an idea for a good
-check, the rest of this document explains how to do this.
-
-There are a few tools particularly useful when developing clang-tidy checks:
-  * ``add_new_check.py`` is a script to automate the process of adding a new
-    check, it will create the check, update the CMake file and create a test;
-  * ``rename_check.py`` does what the script name suggests, renames an existing
-    check;
-  * :program:`clang-query` is invaluable for interactive prototyping of AST
-    matchers and exploration of the Clang AST;
-  * `clang-check`_ with the ``-ast-dump`` (and optionally ``-ast-dump-filter``)
-    provides a convenient way to dump AST of a C++ program.
-
-If CMake is configured with ``CLANG_ENABLE_STATIC_ANALYZER``,
-:program:`clang-tidy` will not be built with support for the 
-``clang-analyzer-*`` checks or the ``mpi-*`` checks.
-
-
-.. _AST Matchers: http://clang.llvm.org/docs/LibASTMatchers.html
-.. _PPCallbacks: http://clang.llvm.org/doxygen/classclang_1_1PPCallbacks.html
-.. _clang-check: http://clang.llvm.org/docs/ClangCheck.html
-
-
-Choosing the Right Place for your Check
----------------------------------------
-
-If you have an idea of a check, you should decide whether it should be
-implemented as a:
-
-+ *Clang diagnostic*: if the check is generic enough, targets code patterns that
-  most probably are bugs (rather than style or readability issues), can be
-  implemented effectively and with extremely low false positive rate, it may
-  make a good Clang diagnostic.
-
-+ *Clang static analyzer check*: if the check requires some sort of control flow
-  analysis, it should probably be implemented as a static analyzer check.
-
-+ *clang-tidy check* is a good choice for linter-style checks, checks that are
-  related to a certain coding style, checks that address code readability, etc.
-
-
-Preparing your Workspace
-------------------------
-
-If you are new to LLVM development, you should read the `Getting Started with
-the LLVM System`_, `Using Clang Tools`_ and `How To Setup Tooling For LLVM`_
-documents to check out and build LLVM, Clang and Clang Extra Tools with CMake.
-
-Once you are done, change to the ``llvm/tools/clang/tools/extra`` directory, and
-let's start!
-
-.. _Getting Started with the LLVM System: http://llvm.org/docs/GettingStarted.html
-.. _Using Clang Tools: http://clang.llvm.org/docs/ClangTools.html
-
-
-The Directory Structure
------------------------
-
-:program:`clang-tidy` source code resides in the
-``llvm/tools/clang/tools/extra`` directory and is structured as follows:
-
-::
-
-  clang-tidy/                       # Clang-tidy core.
-  |-- ClangTidy.h                   # Interfaces for users and checks.
-  |-- ClangTidyModule.h             # Interface for clang-tidy modules.
-  |-- ClangTidyModuleRegistry.h     # Interface for registering of modules.
-     ...
-  |-- google/                       # Google clang-tidy module.
-  |-+
-    |-- GoogleTidyModule.cpp
-    |-- GoogleTidyModule.h
-          ...
-  |-- llvm/                         # LLVM clang-tidy module.
-  |-+
-    |-- LLVMTidyModule.cpp
-    |-- LLVMTidyModule.h
-          ...
-  |-- objc/                         # Objective-C clang-tidy module.
-  |-+
-    |-- ObjCTidyModule.cpp
-    |-- ObjCTidyModule.h
-          ...
-  |-- tool/                         # Sources of the clang-tidy binary.
-          ...
-  test/clang-tidy/                  # Integration tests.
-      ...
-  unittests/clang-tidy/             # Unit tests.
-  |-- ClangTidyTest.h
-  |-- GoogleModuleTest.cpp
-  |-- LLVMModuleTest.cpp
-  |-- ObjCModuleTest.cpp
-      ...
-
-
-Writing a clang-tidy Check
---------------------------
-
-So you have an idea of a useful check for :program:`clang-tidy`.
-
-First, if you're not familiar with LLVM development, read through the `Getting
-Started with LLVM`_ document for instructions on setting up your workflow and
-the `LLVM Coding Standards`_ document to familiarize yourself with the coding
-style used in the project. For code reviews we mostly use `LLVM Phabricator`_.
-
-.. _Getting Started with LLVM: http://llvm.org/docs/GettingStarted.html
-.. _LLVM Coding Standards: http://llvm.org/docs/CodingStandards.html
-.. _LLVM Phabricator: http://llvm.org/docs/Phabricator.html
-
-Next, you need to decide which module the check belongs to. Modules
-are located in subdirectories of `clang-tidy/
-<http://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/clang-tidy/>`_
-and contain checks targeting a certain aspect of code quality (performance,
-readability, etc.), certain coding style or standard (Google, LLVM, CERT, etc.)
-or a widely used API (e.g. MPI). Their names are same as user-facing check
-groups names described :ref:`above <checks-groups-table>`.
-
-After choosing the module and the name for the check, run the
-``clang-tidy/add_new_check.py`` script to create the skeleton of the check and
-plug it to :program:`clang-tidy`. It's the recommended way of adding new checks.
-
-If we want to create a `readability-awesome-function-names`, we would run:
-
-.. code-block:: console
-
-  $ clang-tidy/add_new_check.py readability awesome-function-names
-
-
-The ``add_new_check.py`` script will:
-  * create the class for your check inside the specified module's directory and
-    register it in the module and in the build system;
-  * create a lit test file in the ``test/clang-tidy/`` directory;
-  * create a documentation file and include it into the
-    ``docs/clang-tidy/checks/list.rst``.
-
-Let's see in more detail at the check class definition:
-
-.. code-block:: c++
-
-  ...
-
-  #include "../ClangTidy.h"
-
-  namespace clang {
-  namespace tidy {
-  namespace readability {
-
-  ...
-  class AwesomeFunctionNamesCheck : public ClangTidyCheck {
-  public:
-    AwesomeFunctionNamesCheck(StringRef Name, ClangTidyContext *Context)
-        : ClangTidyCheck(Name, Context) {}
-    void registerMatchers(ast_matchers::MatchFinder *Finder) override;
-    void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
-  };
-
-  } // namespace readability
-  } // namespace tidy
-  } // namespace clang
-
-  ...
-
-Constructor of the check receives the ``Name`` and ``Context`` parameters, and
-must forward them to the ``ClangTidyCheck`` constructor.
-
-In our case the check needs to operate on the AST level and it overrides the
-``registerMatchers`` and ``check`` methods. If we wanted to analyze code on the
-preprocessor level, we'd need instead to override the ``registerPPCallbacks``
-method.
-
-In the ``registerMatchers`` method we create an AST Matcher (see `AST Matchers`_
-for more information) that will find the pattern in the AST that we want to
-inspect. The results of the matching are passed to the ``check`` method, which
-can further inspect them and report diagnostics.
-
-.. code-block:: c++
-
-  using namespace ast_matchers;
-
-  void AwesomeFunctionNamesCheck::registerMatchers(MatchFinder *Finder) {
-    Finder->addMatcher(functionDecl().bind("x"), this);
-  }
-
-  void AwesomeFunctionNamesCheck::check(const MatchFinder::MatchResult &Result) {
-    const auto *MatchedDecl = Result.Nodes.getNodeAs<FunctionDecl>("x");
-    if (MatchedDecl->getName().startswith("awesome_"))
-      return;
-    diag(MatchedDecl->getLocation(), "function %0 is insufficiently awesome")
-        << MatchedDecl
-        << FixItHint::CreateInsertion(MatchedDecl->getLocation(), "awesome_");
-  }
-
-(If you want to see an example of a useful check, look at
-`clang-tidy/google/ExplicitConstructorCheck.h
-<http://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/clang-tidy/google/ExplicitConstructorCheck.h>`_
-and `clang-tidy/google/ExplicitConstructorCheck.cpp
-<http://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/clang-tidy/google/ExplicitConstructorCheck.cpp>`_).
-
-
-Registering your Check
-----------------------
-
-(The ``add_new_check.py`` takes care of registering the check in an existing
-module. If you want to create a new module or know the details, read on.)
-
-The check should be registered in the corresponding module with a distinct name:
-
-.. code-block:: c++
-
-  class MyModule : public ClangTidyModule {
-   public:
-    void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
-      CheckFactories.registerCheck<ExplicitConstructorCheck>(
-          "my-explicit-constructor");
-    }
-  };
-
-Now we need to register the module in the ``ClangTidyModuleRegistry`` using a
-statically initialized variable:
-
-.. code-block:: c++
-
-  static ClangTidyModuleRegistry::Add<MyModule> X("my-module",
-                                                  "Adds my lint checks.");
-
-
-When using LLVM build system, we need to use the following hack to ensure the
-module is linked into the :program:`clang-tidy` binary:
-
-Add this near the ``ClangTidyModuleRegistry::Add<MyModule>`` variable:
-
-.. code-block:: c++
-
-  // This anchor is used to force the linker to link in the generated object file
-  // and thus register the MyModule.
-  volatile int MyModuleAnchorSource = 0;
-
-And this to the main translation unit of the :program:`clang-tidy` binary (or
-the binary you link the ``clang-tidy`` library in)
-``clang-tidy/tool/ClangTidyMain.cpp``:
-
-.. code-block:: c++
-
-  // This anchor is used to force the linker to link the MyModule.
-  extern volatile int MyModuleAnchorSource;
-  static int MyModuleAnchorDestination = MyModuleAnchorSource;
-
-
-Configuring Checks
-------------------
-
-If a check needs configuration options, it can access check-specific options
-using the ``Options.get<Type>("SomeOption", DefaultValue)`` call in the check
-constructor. In this case the check should also override the
-``ClangTidyCheck::storeOptions`` method to make the options provided by the
-check discoverable. This method lets :program:`clang-tidy` know which options
-the check implements and what the current values are (e.g. for the
-``-dump-config`` command line option).
-
-.. code-block:: c++
-
-  class MyCheck : public ClangTidyCheck {
-    const unsigned SomeOption1;
-    const std::string SomeOption2;
-
-  public:
-    MyCheck(StringRef Name, ClangTidyContext *Context)
-      : ClangTidyCheck(Name, Context),
-        SomeOption(Options.get("SomeOption1", -1U)),
-        SomeOption(Options.get("SomeOption2", "some default")) {}
-
-    void storeOptions(ClangTidyOptions::OptionMap &Opts) override {
-      Options.store(Opts, "SomeOption1", SomeOption1);
-      Options.store(Opts, "SomeOption2", SomeOption2);
-    }
-    ...
-
-Assuming the check is registered with the name "my-check", the option can then
-be set in a ``.clang-tidy`` file in the following way:
-
-.. code-block:: yaml
-
-  CheckOptions:
-    - key: my-check.SomeOption1
-      value: 123
-    - key: my-check.SomeOption2
-      value: 'some other value'
-
-If you need to specify check options on a command line, you can use the inline
-YAML format:
-
-.. code-block:: console
-
-  $ clang-tidy -config="{CheckOptions: [{key: a, value: b}, {key: x, value: y}]}" ...
-
-
-Testing Checks
---------------
-
-To run tests for :program:`clang-tidy` use the command:
-
-.. code-block:: console
-
-  $ ninja check-clang-tools
-
-:program:`clang-tidy` checks can be tested using either unit tests or
-`lit`_ tests. Unit tests may be more convenient to test complex replacements
-with strict checks. `Lit`_ tests allow using partial text matching and regular
-expressions which makes them more suitable for writing compact tests for
-diagnostic messages.
-
-The ``check_clang_tidy.py`` script provides an easy way to test both
-diagnostic messages and fix-its. It filters out ``CHECK`` lines from the test
-file, runs :program:`clang-tidy` and verifies messages and fixes with two
-separate `FileCheck`_ invocations: once with FileCheck's directive
-prefix set to ``CHECK-MESSAGES``, validating the diagnostic messages,
-and once with the directive prefix set to ``CHECK-FIXES``, running
-against the fixed code (i.e., the code after generated fix-its are
-applied). In particular, ``CHECK-FIXES:`` can be used to check
-that code was not modified by fix-its, by checking that it is present
-unchanged in the fixed code. The full set of `FileCheck`_ directives
-is available (e.g., ``CHECK-MESSAGES-SAME:``, ``CHECK-MESSAGES-NOT:``), though
-typically the basic ``CHECK`` forms (``CHECK-MESSAGES`` and ``CHECK-FIXES``)
-are sufficient for clang-tidy tests. Note that the `FileCheck`_
-documentation mostly assumes the default prefix (``CHECK``), and hence
-describes the directive as ``CHECK:``, ``CHECK-SAME:``, ``CHECK-NOT:``, etc.
-Replace ``CHECK`` by either ``CHECK-FIXES`` or ``CHECK-MESSAGES`` for
-clang-tidy tests.
-
-An additional check enabled by ``check_clang_tidy.py`` ensures that
-if `CHECK-MESSAGES:` is used in a file then every warning or error
-must have an associated CHECK in that file. Or, you can use ``CHECK-NOTES:``
-instead, if you want to **also** ensure that all the notes are checked.
-
-To use the ``check_clang_tidy.py`` script, put a .cpp file with the
-appropriate ``RUN`` line in the ``test/clang-tidy`` directory. Use
-``CHECK-MESSAGES:`` and ``CHECK-FIXES:`` lines to write checks against
-diagnostic messages and fixed code.
-
-It's advised to make the checks as specific as possible to avoid checks matching
-to incorrect parts of the input. Use ``[[@LINE+X]]``/``[[@LINE-X]]``
-substitutions and distinct function and variable names in the test code.
-
-Here's an example of a test using the ``check_clang_tidy.py`` script (the full
-source code is at `test/clang-tidy/google-readability-casting.cpp`_):
-
-.. code-block:: c++
-
-  // RUN: %check_clang_tidy %s google-readability-casting %t
-
-  void f(int a) {
-    int b = (int)a;
-    // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: redundant cast to the same type [google-readability-casting]
-    // CHECK-FIXES: int b = a;
-  }
-
-To check more than one scenario in the same test file use
-``-check-suffix=SUFFIX-NAME`` on ``check_clang_tidy.py`` command line or
-``-check-suffixes=SUFFIX-NAME-1,SUFFIX-NAME-2,...``.
-With ``-check-suffix[es]=SUFFIX-NAME`` you need to replace your ``CHECK-*``
-directives with ``CHECK-MESSAGES-SUFFIX-NAME`` and ``CHECK-FIXES-SUFFIX-NAME``.
-
-Here's an example:
-
-.. code-block:: c++
-
-   // RUN: %check_clang_tidy -check-suffix=USING-A %s misc-unused-using-decls %t -- -- -DUSING_A
-   // RUN: %check_clang_tidy -check-suffix=USING-B %s misc-unused-using-decls %t -- -- -DUSING_B
-   // RUN: %check_clang_tidy %s misc-unused-using-decls %t
-   ...
-   // CHECK-MESSAGES-USING-A: :[[@LINE-8]]:10: warning: using decl 'A' {{.*}}
-   // CHECK-MESSAGES-USING-B: :[[@LINE-7]]:10: warning: using decl 'B' {{.*}}
-   // CHECK-MESSAGES: :[[@LINE-6]]:10: warning: using decl 'C' {{.*}}
-   // CHECK-FIXES-USING-A-NOT: using a::A;$
-   // CHECK-FIXES-USING-B-NOT: using a::B;$
-   // CHECK-FIXES-NOT: using a::C;$
-
-
-There are many dark corners in the C++ language, and it may be difficult to make
-your check work perfectly in all cases, especially if it issues fix-it hints. The
-most frequent pitfalls are macros and templates:
-
-1. code written in a macro body/template definition may have a different meaning
-   depending on the macro expansion/template instantiation;
-2. multiple macro expansions/template instantiations may result in the same code
-   being inspected by the check multiple times (possibly, with different
-   meanings, see 1), and the same warning (or a slightly different one) may be
-   issued by the check multiple times; :program:`clang-tidy` will deduplicate
-   _identical_ warnings, but if the warnings are slightly different, all of them
-   will be shown to the user (and used for applying fixes, if any);
-3. making replacements to a macro body/template definition may be fine for some
-   macro expansions/template instantiations, but easily break some other
-   expansions/instantiations.
-
-.. _lit: http://llvm.org/docs/CommandGuide/lit.html
-.. _FileCheck: http://llvm.org/docs/CommandGuide/FileCheck.html
-.. _test/clang-tidy/google-readability-casting.cpp: http://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/test/clang-tidy/google-readability-casting.cpp
-
-
-Running clang-tidy on LLVM
---------------------------
-
-To test a check it's best to try it out on a larger code base. LLVM and Clang
-are the natural targets as you already have the source code around. The most
-convenient way to run :program:`clang-tidy` is with a compile command database;
-CMake can automatically generate one, for a description of how to enable it see
-`How To Setup Tooling For LLVM`_. Once ``compile_commands.json`` is in place and
-a working version of :program:`clang-tidy` is in ``PATH`` the entire code base
-can be analyzed with ``clang-tidy/tool/run-clang-tidy.py``. The script executes
-:program:`clang-tidy` with the default set of checks on every translation unit
-in the compile command database and displays the resulting warnings and errors.
-The script provides multiple configuration flags.
-
-* The default set of checks can be overridden using the ``-checks`` argument,
-  taking the identical format as :program:`clang-tidy` does. For example
-  ``-checks=-*,modernize-use-override`` will run the ``modernize-use-override``
-  check only.
-
-* To restrict the files examined you can provide one or more regex arguments
-  that the file names are matched against.
-  ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze clang-tidy
-  checks. It may also be necessary to restrict the header files warnings are
-  displayed from using the ``-header-filter`` flag. It has the same behavior
-  as the corresponding :program:`clang-tidy` flag.
-
-* To apply suggested fixes ``-fix`` can be passed as an argument. This gathers
-  all changes in a temporary directory and applies them. Passing ``-format``
-  will run clang-format over changed lines.
-
-
-On checks profiling
--------------------
-
-:program:`clang-tidy` can collect per-check profiling info, and output it
-for each processed source file (translation unit).
-
-To enable profiling info collection, use the ``-enable-check-profile`` argument.
-The timings will be output to ``stderr`` as a table. Example output:
-
-.. code-block:: console
-
-  $ clang-tidy -enable-check-profile -checks=-*,readability-function-size source.cpp
-  ===-------------------------------------------------------------------------===
-                            clang-tidy checks profiling
-  ===-------------------------------------------------------------------------===
-    Total Execution Time: 1.0282 seconds (1.0258 wall clock)
-
-     ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Name ---
-     0.9136 (100.0%)   0.1146 (100.0%)   1.0282 (100.0%)   1.0258 (100.0%)  readability-function-size
-     0.9136 (100.0%)   0.1146 (100.0%)   1.0282 (100.0%)   1.0258 (100.0%)  Total
-
-It can also store that data as JSON files for further processing. Example output:
-
-.. code-block:: console
-
-  $ clang-tidy -enable-check-profile -store-check-profile=.  -checks=-*,readability-function-size source.cpp
-  $ # Note that there won't be timings table printed to the console.
-  $ ls /tmp/out/
-  20180516161318717446360-source.cpp.json
-  $ cat 20180516161318717446360-source.cpp.json
-  {
-  "file": "/path/to/source.cpp",
-  "timestamp": "2018-05-16 16:13:18.717446360",
-  "profile": {
-    "time.clang-tidy.readability-function-size.wall": 1.0421266555786133e+00,
-    "time.clang-tidy.readability-function-size.user": 9.2088400000005421e-01,
-    "time.clang-tidy.readability-function-size.sys": 1.2418899999999974e-01
-  }
-  }
-
-There is only one argument that controls profile storage:
-
-* ``-store-check-profile=<prefix>``
-
-  By default reports are printed in tabulated format to stderr. When this option
-  is passed, these per-TU profiles are instead stored as JSON.
-  If the prefix is not an absolute path, it is considered to be relative to the
-  directory from where you have run :program:`clang-tidy`. All ``.`` and ``..``
-  patterns in the path are collapsed, and symlinks are resolved.
-
-  Example:
-  Let's suppose you have a source file named ``example.cpp``, located in the
-  ``/source`` directory. Only the input filename is used, not the full path
-  to the source file. Additionally, it is prefixed with the current timestamp.
-
-  * If you specify ``-store-check-profile=/tmp``, then the profile will be saved
-    to ``/tmp/<ISO8601-like timestamp>-example.cpp.json``
-
-  * If you run :program:`clang-tidy` from within ``/foo`` directory, and specify
-    ``-store-check-profile=.``, then the profile will still be saved to
-    ``/foo/<ISO8601-like timestamp>-example.cpp.json``
diff --git a/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp b/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp
index 31052716d28733..2a93ff6a18c38d 100644
--- a/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp
+++ b/clang-tools-extra/test/clang-tidy/misc-non-private-member-variables-in-classes.cpp
@@ -35,6 +35,23 @@ class S1 {
   int S1_v3;
 };
 
+// Only data and implicit or static methods, do not warn
+
+class C {
+public:
+  C() {}
+  ~C() {}
+};
+
+struct S1Implicit {
+  C S1Implicit_v0;
+};
+
+struct S1ImplicitAndStatic {
+  C S1Implicit_v0;
+  static void s() {}
+};
+
 //----------------------------------------------------------------------------//
 
 // All functions are static, do not warn.
diff --git a/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-main.cpp b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-main.cpp
new file mode 100644
index 00000000000000..6549422f393aaa
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-main.cpp
@@ -0,0 +1,18 @@
+// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t
+
+int not_main(int argc, char *argv[]) {
+  // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead
+  int f4[] = {1, 2};
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
+}
+
+int main(int argc, char *argv[]) {
+  int f5[] = {1, 2};
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
+
+  auto not_main = [](int argc, char *argv[]) {
+    // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead
+    int f6[] = {1, 2};
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead
+  };
+}
diff --git a/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-three-arg-main.cpp b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-three-arg-main.cpp
new file mode 100644
index 00000000000000..22a4016f79f4da
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/modernize-avoid-c-arrays-ignores-three-arg-main.cpp
@@ -0,0 +1,20 @@
+// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t
+
+int not_main(int argc, char *argv[], char *argw[]) {
+  // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead
+  // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use std::array<> instead
+  int f4[] = {1, 2};
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
+}
+
+int main(int argc, char *argv[], char *argw[]) {
+  int f5[] = {1, 2};
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
+
+  auto not_main = [](int argc, char *argv[], char *argw[]) {
+    // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead
+    // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use std::array<> instead
+    int f6[] = {1, 2};
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead
+  };
+}
diff --git a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-float16.cpp b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-float16.cpp
new file mode 100644
index 00000000000000..b2b858f9345b5e
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-float16.cpp
@@ -0,0 +1,51 @@
+// RUN: %check_clang_tidy %s readability-uppercase-literal-suffix %t -- -- -target aarch64-linux-gnu -I %S
+
+#include "readability-uppercase-literal-suffix.h"
+
+void float16_normal_literals() {
+  // _Float16
+
+  static constexpr auto v14 = 1.f16;
+  // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase
+  // CHECK-MESSAGES-NEXT: static constexpr auto v14 = 1.f16;
+  // CHECK-MESSAGES-NEXT: ^ ~
+  // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}}
+  // CHECK-FIXES: static constexpr auto v14 = 1.F16;
+  static_assert(is_same<decltype(v14), const _Float16>::value, "");
+  static_assert(v14 == 1.F16, "");
+
+  static constexpr auto v15 = 1.e0f16;
+  // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase
+  // CHECK-MESSAGES-NEXT: static constexpr auto v15 = 1.e0f16;
+  // CHECK-MESSAGES-NEXT: ^ ~
+  // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}}
+  // CHECK-FIXES: static constexpr auto v15 = 1.e0F16;
+  static_assert(is_same<decltype(v15), const _Float16>::value, "");
+  static_assert(v15 == 1.F16, "");
+
+  static constexpr auto v16 = 1.F16; // OK.
+  static_assert(is_same<decltype(v16), const _Float16>::value, "");
+  static_assert(v16 == 1.F16, "");
+
+  static constexpr auto v17 = 1.e0F16; // OK.
+  static_assert(is_same<decltype(v17), const _Float16>::value, "");
+  static_assert(v17 == 1.F16, "");
+}
+
+void float16_hexadecimal_literals() {
+// _Float16
+
+  static constexpr auto v13 = 0xfp0f16;
+  // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase
+  // CHECK-MESSAGES-NEXT: static constexpr auto v13 = 0xfp0f16;
+  // CHECK-MESSAGES-NEXT: ^    ~
+  // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}}
+  // CHECK-FIXES: static constexpr auto v13 = 0xfp0F16;
+  static_assert(is_same<decltype(v13), const _Float16>::value, "");
+  static_assert(v13 == 0xfp0F16, "");
+
+  static constexpr auto v14 = 0xfp0F16; // OK.
+  static_assert(is_same<decltype(v14), const _Float16>::value, "");
+  static_assert(v14 == 0xfp0F16, "");
+
+}
diff --git a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp
index 4d41db7a5ec646..50e75fae6ae40f 100644
--- a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp
+++ b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-floating-point.cpp
@@ -97,34 +97,6 @@ void floating_point_suffix() {
   static constexpr auto v13 = 1.e0Q; // OK.
   static_assert(is_same<decltype(v13), const __float128>::value, "");
   static_assert(v13 == 1., "");
-
-  // _Float16
-
-  static constexpr auto v14 = 1.f16;
-  // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase
-  // CHECK-MESSAGES-NEXT: static constexpr auto v14 = 1.f16;
-  // CHECK-MESSAGES-NEXT: ^ ~
-  // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}}
-  // CHECK-FIXES: static constexpr auto v14 = 1.F16;
-  static_assert(is_same<decltype(v14), const _Float16>::value, "");
-  static_assert(v14 == 1.F16, "");
-
-  static constexpr auto v15 = 1.e0f16;
-  // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase
-  // CHECK-MESSAGES-NEXT: static constexpr auto v15 = 1.e0f16;
-  // CHECK-MESSAGES-NEXT: ^ ~
-  // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}}
-  // CHECK-FIXES: static constexpr auto v15 = 1.e0F16;
-  static_assert(is_same<decltype(v15), const _Float16>::value, "");
-  static_assert(v15 == 1.F16, "");
-
-  static constexpr auto v16 = 1.F16; // OK.
-  static_assert(is_same<decltype(v16), const _Float16>::value, "");
-  static_assert(v16 == 1.F16, "");
-
-  static constexpr auto v17 = 1.e0F16; // OK.
-  static_assert(is_same<decltype(v17), const _Float16>::value, "");
-  static_assert(v17 == 1.F16, "");
 }
 
 void floating_point_complex_suffix() {
diff --git a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp
index 4cc9d6d2a70406..415c6d8e7915f9 100644
--- a/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp
+++ b/clang-tools-extra/test/clang-tidy/readability-uppercase-literal-suffix-hexadecimal-floating-point.cpp
@@ -93,21 +93,6 @@ void floating_point_suffix() {
   static constexpr auto v12 = 0xfp0Q; // OK.
   static_assert(is_same<decltype(v12), const __float128>::value, "");
   static_assert(v12 == 0xfp0, "");
-
-  // _Float16
-
-  static constexpr auto v13 = 0xfp0f16;
-  // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: floating point literal has suffix 'f16', which is not uppercase
-  // CHECK-MESSAGES-NEXT: static constexpr auto v13 = 0xfp0f16;
-  // CHECK-MESSAGES-NEXT: ^    ~
-  // CHECK-MESSAGES-NEXT: {{^ *}}F16{{$}}
-  // CHECK-FIXES: static constexpr auto v13 = 0xfp0F16;
-  static_assert(is_same<decltype(v13), const _Float16>::value, "");
-  static_assert(v13 == 0xfp0F16, "");
-
-  static constexpr auto v14 = 0xfp0F16; // OK.
-  static_assert(is_same<decltype(v14), const _Float16>::value, "");
-  static_assert(v14 == 0xfp0F16, "");
 }
 
 void floating_point_complex_suffix() {
diff --git a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
index 639d35c876ac7f..09a117dbbe7d32 100644
--- a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
@@ -76,7 +76,7 @@ TEST_F(BackgroundIndexTest, NoCrashOnErrorFile) {
   size_t CacheHits = 0;
   MemoryShardStorage MSS(Storage, CacheHits);
   OverlayCDB CDB(/*Base=*/nullptr);
-  BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+  BackgroundIndex Idx(Context::empty(), FS, CDB,
                       [&](llvm::StringRef) { return &MSS; });
 
   tooling::CompileCommand Cmd;
@@ -113,7 +113,7 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) {
   size_t CacheHits = 0;
   MemoryShardStorage MSS(Storage, CacheHits);
   OverlayCDB CDB(/*Base=*/nullptr);
-  BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+  BackgroundIndex Idx(Context::empty(), FS, CDB,
                       [&](llvm::StringRef) { return &MSS; });
 
   tooling::CompileCommand Cmd;
@@ -168,7 +168,7 @@ TEST_F(BackgroundIndexTest, ShardStorageTest) {
   // Check nothing is loaded from Storage, but A.cc and A.h has been stored.
   {
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -178,7 +178,7 @@ TEST_F(BackgroundIndexTest, ShardStorageTest) {
 
   {
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -224,7 +224,7 @@ TEST_F(BackgroundIndexTest, DirectIncludesTest) {
   Cmd.CommandLine = {"clang++", testPath("root/A.cc")};
   {
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -262,7 +262,7 @@ TEST_F(BackgroundIndexTest, DISABLED_PeriodicalIndex) {
   MemoryShardStorage MSS(Storage, CacheHits);
   OverlayCDB CDB(/*Base=*/nullptr);
   BackgroundIndex Idx(
-      Context::empty(), "", FS, CDB, [&](llvm::StringRef) { return &MSS; },
+      Context::empty(), FS, CDB, [&](llvm::StringRef) { return &MSS; },
       /*BuildIndexPeriodMs=*/500);
 
   FS.Files[testPath("root/A.cc")] = "#include \"A.h\"";
@@ -310,7 +310,7 @@ TEST_F(BackgroundIndexTest, ShardStorageLoad) {
   // Check nothing is loaded from Storage, but A.cc and A.h has been stored.
   {
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -325,7 +325,7 @@ TEST_F(BackgroundIndexTest, ShardStorageLoad) {
       )cpp";
   {
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -343,7 +343,7 @@ TEST_F(BackgroundIndexTest, ShardStorageLoad) {
   {
     CacheHits = 0;
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -384,7 +384,7 @@ TEST_F(BackgroundIndexTest, ShardStorageEmptyFile) {
   // Check that A.cc, A.h and B.h has been stored.
   {
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -400,7 +400,7 @@ TEST_F(BackgroundIndexTest, ShardStorageEmptyFile) {
   {
     CacheHits = 0;
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
@@ -416,7 +416,7 @@ TEST_F(BackgroundIndexTest, ShardStorageEmptyFile) {
   {
     CacheHits = 0;
     OverlayCDB CDB(/*Base=*/nullptr);
-    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+    BackgroundIndex Idx(Context::empty(), FS, CDB,
                         [&](llvm::StringRef) { return &MSS; });
     CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
     ASSERT_TRUE(Idx.blockUntilIdleForTest());
diff --git a/clang-tools-extra/unittests/clangd/ClangdTests.cpp b/clang-tools-extra/unittests/clangd/ClangdTests.cpp
index c1cc623a05a112..356efe529dde0e 100644
--- a/clang-tools-extra/unittests/clangd/ClangdTests.cpp
+++ b/clang-tools-extra/unittests/clangd/ClangdTests.cpp
@@ -10,6 +10,7 @@
 #include "Annotations.h"
 #include "ClangdLSPServer.h"
 #include "ClangdServer.h"
+#include "GlobalCompilationDatabase.h"
 #include "Matchers.h"
 #include "SyncAPI.h"
 #include "TestFS.h"
@@ -1037,6 +1038,28 @@ TEST(ClangdTests, PreambleVFSStatCache) {
 }
 #endif
 
+TEST_F(ClangdVFSTest, FlagsWithPlugins) {
+  MockFSProvider FS;
+  ErrorCheckingDiagConsumer DiagConsumer;
+  MockCompilationDatabase CDB;
+  CDB.ExtraClangFlags = {
+      "-Xclang",
+      "-add-plugin",
+      "-Xclang",
+      "random-plugin",
+  };
+  OverlayCDB OCDB(&CDB);
+  ClangdServer Server(OCDB, FS, DiagConsumer, ClangdServer::optsForTest());
+
+  auto FooCpp = testPath("foo.cpp");
+  const auto SourceContents = "int main() { return 0; }";
+  FS.Files[FooCpp] = FooCpp;
+  Server.addDocument(FooCpp, SourceContents);
+  auto Result = dumpASTWithoutMemoryLocs(Server, FooCpp);
+  EXPECT_TRUE(Server.blockUntilIdleForTest()) << "Waiting for diagnostics";
+  EXPECT_NE(Result, "<no-ast>");
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp
index 02f12eab7dd6a2..f26181decb80fa 100644
--- a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp
+++ b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp
@@ -2320,6 +2320,17 @@ TEST(CompletionTest, ObjectiveCMethodTwoArgumentsFromMiddle) {
   EXPECT_THAT(C, ElementsAre(SnippetSuffix("${1:(unsigned int)}")));
 }
 
+TEST(CompletionTest, WorksWithNullType) {
+  auto R = completions(R"cpp(
+    int main() {
+      for (auto [loopVar] : y ) { // y has to be unresolved.
+        int z = loopV^;
+      }
+    }
+  )cpp");
+  EXPECT_THAT(R.Completions, ElementsAre(Named("loopVar")));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp
index b0052c751c27b9..41c28bf35d5a34 100644
--- a/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp
+++ b/clang-tools-extra/unittests/clangd/GlobalCompilationDatabaseTests.cpp
@@ -65,7 +65,7 @@ class OverlayCDBTest : public ::testing::Test {
 };
 
 TEST_F(OverlayCDBTest, GetCompileCommand) {
-  OverlayCDB CDB(Base.get());
+  OverlayCDB CDB(Base.get(), {}, std::string(""));
   EXPECT_EQ(CDB.getCompileCommand(testPath("foo.cc")),
             Base->getCompileCommand(testPath("foo.cc")));
   EXPECT_EQ(CDB.getCompileCommand(testPath("missing.cc")), llvm::None);
@@ -85,7 +85,7 @@ TEST_F(OverlayCDBTest, GetFallbackCommand) {
 }
 
 TEST_F(OverlayCDBTest, NoBase) {
-  OverlayCDB CDB(nullptr, {"-DA=6"});
+  OverlayCDB CDB(nullptr, {"-DA=6"}, std::string(""));
   EXPECT_EQ(CDB.getCompileCommand(testPath("bar.cc")), None);
   auto Override = cmd(testPath("bar.cc"), "-DA=5");
   CDB.setCompileCommand(testPath("bar.cc"), Override);
diff --git a/clang-tools-extra/unittests/clangd/XRefsTests.cpp b/clang-tools-extra/unittests/clangd/XRefsTests.cpp
index 88394b6cc5126e..8616fbdb179eff 100644
--- a/clang-tools-extra/unittests/clangd/XRefsTests.cpp
+++ b/clang-tools-extra/unittests/clangd/XRefsTests.cpp
@@ -1082,8 +1082,6 @@ TEST(GoToInclude, All) {
   // Test include outside of preamble.
   Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("6"));
   ASSERT_TRUE(bool(Locations)) << "findDefinitions returned an error";
-  EXPECT_THAT(*Locations,
-              ElementsAre(FileRange(FooH, HeaderAnnotations.range())));
 
   // Test a few positions that do not result in Locations.
   Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("4"));
@@ -1092,13 +1090,9 @@ TEST(GoToInclude, All) {
 
   Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("5"));
   ASSERT_TRUE(bool(Locations)) << "findDefinitions returned an error";
-  EXPECT_THAT(*Locations,
-              ElementsAre(FileRange(FooH, HeaderAnnotations.range())));
 
   Locations = runFindDefinitions(Server, FooCpp, SourceAnnotations.point("7"));
   ASSERT_TRUE(bool(Locations)) << "findDefinitions returned an error";
-  EXPECT_THAT(*Locations,
-              ElementsAre(FileRange(FooH, HeaderAnnotations.range())));
 }
 
 TEST(GoToDefinition, WithPreamble) {
diff --git a/clang/docs/AttributeReference.rst b/clang/docs/AttributeReference.rst
index a763ddeaeb106d..01938f64f56c15 100644
--- a/clang/docs/AttributeReference.rst
+++ b/clang/docs/AttributeReference.rst
@@ -1,13 +1,5176 @@
 ..
   -------------------------------------------------------------------
   NOTE: This file is automatically generated by running clang-tblgen
-  -gen-attr-docs. Do not edit this file by hand!! The contents for
-  this file are automatically generated by a server-side process.
-  
-  Please do not commit this file. The file exists for local testing
-  purposes only.
+  -gen-attr-docs. Do not edit this file by hand!!
   -------------------------------------------------------------------
 
 ===================
 Attributes in Clang
-===================
\ No newline at end of file
+===================
+.. contents::
+   :local:
+
+.. |br| raw:: html
+
+  <br/>
+
+Introduction
+============
+
+This page lists the attributes currently supported by Clang.
+
+Function Attributes
+===================
+
+
+#pragma omp declare simd
+------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","","``omp declare simd``",""
+
+The `declare simd` construct can be applied to a function to enable the creation
+of one or more versions that can process multiple arguments using SIMD
+instructions from a single invocation in a SIMD loop. The `declare simd`
+directive is a declarative directive. There may be multiple `declare simd`
+directives for a function. The use of a `declare simd` construct on a function
+enables the creation of SIMD versions of the associated function that can be
+used to process multiple arguments from a single invocation from a SIMD loop
+concurrently.
+The syntax of the `declare simd` construct is as follows:
+
+  .. code-block:: none
+
+    #pragma omp declare simd [clause[[,] clause] ...] new-line
+    [#pragma omp declare simd [clause[[,] clause] ...] new-line]
+    [...]
+    function definition or declaration
+
+where clause is one of the following:
+
+  .. code-block:: none
+
+    simdlen(length)
+    linear(argument-list[:constant-linear-step])
+    aligned(argument-list[:alignment])
+    uniform(argument-list)
+    inbranch
+    notinbranch
+
+
+#pragma omp declare target
+--------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","","``omp declare target``",""
+
+The `declare target` directive specifies that variables and functions are mapped
+to a device for OpenMP offload mechanism.
+
+The syntax of the declare target directive is as follows:
+
+  .. code-block:: c
+
+    #pragma omp declare target new-line
+    declarations-definition-seq
+    #pragma omp end declare target new-line
+
+
+_Noreturn
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``_Noreturn``","",""
+
+A function declared as ``_Noreturn`` shall not return to its caller. The
+compiler will generate a diagnostic for a function declared as ``_Noreturn``
+that appears to be capable of returning to its caller.
+
+
+abi_tag
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``abi_tag``","``gnu::abi_tag``","","","","","Yes"
+
+The ``abi_tag`` attribute can be applied to a function, variable, class or
+inline namespace declaration to modify the mangled name of the entity. It gives
+the ability to distinguish between different versions of the same entity but
+with different ABI versions supported. For example, a newer version of a class
+could have a different set of data members and thus have a different size. Using
+the ``abi_tag`` attribute, it is possible to have different mangled names for
+a global variable of the class type. Therefore, the old code could keep using
+the old manged name and the new code will use the new mangled name with tags.
+
+
+acquire_capability, acquire_shared_capability
+---------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``acquire_capability`` |br| ``acquire_shared_capability`` |br| ``exclusive_lock_function`` |br| ``shared_lock_function``","``clang::acquire_capability`` |br| ``clang::acquire_shared_capability``","","","","",""
+
+Marks a function as acquiring a capability.
+
+
+alloc_align
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``alloc_align``","``gnu::alloc_align``","","","","",""
+
+Use ``__attribute__((alloc_align(<alignment>))`` on a function
+declaration to specify that the return value of the function (which must be a
+pointer type) is at least as aligned as the value of the indicated parameter. The
+parameter is given by its index in the list of formal parameters; the first
+parameter has index 1 unless the function is a C++ non-static member function,
+in which case the first parameter has index 2 to account for the implicit ``this``
+parameter.
+
+.. code-block:: c++
+
+  // The returned pointer has the alignment specified by the first parameter.
+  void *a(size_t align) __attribute__((alloc_align(1)));
+
+  // The returned pointer has the alignment specified by the second parameter.
+  void *b(void *v, size_t align) __attribute__((alloc_align(2)));
+
+  // The returned pointer has the alignment specified by the second visible
+  // parameter, however it must be adjusted for the implicit 'this' parameter.
+  void *Foo::b(void *v, size_t align) __attribute__((alloc_align(3)));
+
+Note that this attribute merely informs the compiler that a function always
+returns a sufficiently aligned pointer. It does not cause the compiler to
+emit code to enforce that alignment.  The behavior is undefined if the returned
+poitner is not sufficiently aligned.
+
+
+alloc_size
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``alloc_size``","``gnu::alloc_size``","","","","","Yes"
+
+The ``alloc_size`` attribute can be placed on functions that return pointers in
+order to hint to the compiler how many bytes of memory will be available at the
+returned pointer. ``alloc_size`` takes one or two arguments.
+
+- ``alloc_size(N)`` implies that argument number N equals the number of
+  available bytes at the returned pointer.
+- ``alloc_size(N, M)`` implies that the product of argument number N and
+  argument number M equals the number of available bytes at the returned
+  pointer.
+
+Argument numbers are 1-based.
+
+An example of how to use ``alloc_size``
+
+.. code-block:: c
+
+  void *my_malloc(int a) __attribute__((alloc_size(1)));
+  void *my_calloc(int a, int b) __attribute__((alloc_size(1, 2)));
+
+  int main() {
+    void *const p = my_malloc(100);
+    assert(__builtin_object_size(p, 0) == 100);
+    void *const a = my_calloc(20, 5);
+    assert(__builtin_object_size(a, 0) == 100);
+  }
+
+.. Note:: This attribute works differently in clang than it does in GCC.
+  Specifically, clang will only trace ``const`` pointers (as above); we give up
+  on pointers that are not marked as ``const``. In the vast majority of cases,
+  this is unimportant, because LLVM has support for the ``alloc_size``
+  attribute. However, this may cause mildly unintuitive behavior when used with
+  other attributes, such as ``enable_if``.
+
+
+artificial
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``artificial``","``gnu::artificial``","","","","",""
+
+The ``artificial`` attribute can be applied to an inline function. If such a
+function is inlined, the attribute indicates that debuggers should associate
+the resulting instructions with the call site, rather than with the
+corresponding line within the inlined callee.
+
+
+assert_capability, assert_shared_capability
+-------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``assert_capability`` |br| ``assert_shared_capability``","``clang::assert_capability`` |br| ``clang::assert_shared_capability``","","","","",""
+
+Marks a function that dynamically tests whether a capability is held, and halts
+the program if it is not held.
+
+
+assume_aligned
+--------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``assume_aligned``","``gnu::assume_aligned``","","","","","Yes"
+
+Use ``__attribute__((assume_aligned(<alignment>[,<offset>]))`` on a function
+declaration to specify that the return value of the function (which must be a
+pointer type) has the specified offset, in bytes, from an address with the
+specified alignment. The offset is taken to be zero if omitted.
+
+.. code-block:: c++
+
+  // The returned pointer value has 32-byte alignment.
+  void *a() __attribute__((assume_aligned (32)));
+
+  // The returned pointer value is 4 bytes greater than an address having
+  // 32-byte alignment.
+  void *b() __attribute__((assume_aligned (32, 4)));
+
+Note that this attribute provides information to the compiler regarding a
+condition that the code already ensures is true. It does not cause the compiler
+to enforce the provided alignment assumption.
+
+
+availability
+------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``availability``","``clang::availability``","``clang::availability``","","","","Yes"
+
+The ``availability`` attribute can be placed on declarations to describe the
+lifecycle of that declaration relative to operating system versions.  Consider
+the function declaration for a hypothetical function ``f``:
+
+.. code-block:: c++
+
+  void f(void) __attribute__((availability(macos,introduced=10.4,deprecated=10.6,obsoleted=10.7)));
+
+The availability attribute states that ``f`` was introduced in macOS 10.4,
+deprecated in macOS 10.6, and obsoleted in macOS 10.7.  This information
+is used by Clang to determine when it is safe to use ``f``: for example, if
+Clang is instructed to compile code for macOS 10.5, a call to ``f()``
+succeeds.  If Clang is instructed to compile code for macOS 10.6, the call
+succeeds but Clang emits a warning specifying that the function is deprecated.
+Finally, if Clang is instructed to compile code for macOS 10.7, the call
+fails because ``f()`` is no longer available.
+
+The availability attribute is a comma-separated list starting with the
+platform name and then including clauses specifying important milestones in the
+declaration's lifetime (in any order) along with additional information.  Those
+clauses can be:
+
+introduced=\ *version*
+  The first version in which this declaration was introduced.
+
+deprecated=\ *version*
+  The first version in which this declaration was deprecated, meaning that
+  users should migrate away from this API.
+
+obsoleted=\ *version*
+  The first version in which this declaration was obsoleted, meaning that it
+  was removed completely and can no longer be used.
+
+unavailable
+  This declaration is never available on this platform.
+
+message=\ *string-literal*
+  Additional message text that Clang will provide when emitting a warning or
+  error about use of a deprecated or obsoleted declaration.  Useful to direct
+  users to replacement APIs.
+
+replacement=\ *string-literal*
+  Additional message text that Clang will use to provide Fix-It when emitting
+  a warning about use of a deprecated declaration. The Fix-It will replace
+  the deprecated declaration with the new declaration specified.
+
+Multiple availability attributes can be placed on a declaration, which may
+correspond to different platforms.  Only the availability attribute with the
+platform corresponding to the target platform will be used; any others will be
+ignored.  If no availability attribute specifies availability for the current
+target platform, the availability attributes are ignored.  Supported platforms
+are:
+
+``ios``
+  Apple's iOS operating system.  The minimum deployment target is specified by
+  the ``-mios-version-min=*version*`` or ``-miphoneos-version-min=*version*``
+  command-line arguments.
+
+``macos``
+  Apple's macOS operating system.  The minimum deployment target is
+  specified by the ``-mmacosx-version-min=*version*`` command-line argument.
+  ``macosx`` is supported for backward-compatibility reasons, but it is
+  deprecated.
+
+``tvos``
+  Apple's tvOS operating system.  The minimum deployment target is specified by
+  the ``-mtvos-version-min=*version*`` command-line argument.
+
+``watchos``
+  Apple's watchOS operating system.  The minimum deployment target is specified by
+  the ``-mwatchos-version-min=*version*`` command-line argument.
+
+A declaration can typically be used even when deploying back to a platform
+version prior to when the declaration was introduced.  When this happens, the
+declaration is `weakly linked
+<https://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPFrameworks/Concepts/WeakLinking.html>`_,
+as if the ``weak_import`` attribute were added to the declaration.  A
+weakly-linked declaration may or may not be present a run-time, and a program
+can determine whether the declaration is present by checking whether the
+address of that declaration is non-NULL.
+
+The flag ``strict`` disallows using API when deploying back to a
+platform version prior to when the declaration was introduced.  An
+attempt to use such API before its introduction causes a hard error.
+Weakly-linking is almost always a better API choice, since it allows
+users to query availability at runtime.
+
+If there are multiple declarations of the same entity, the availability
+attributes must either match on a per-platform basis or later
+declarations must not have availability attributes for that
+platform. For example:
+
+.. code-block:: c
+
+  void g(void) __attribute__((availability(macos,introduced=10.4)));
+  void g(void) __attribute__((availability(macos,introduced=10.4))); // okay, matches
+  void g(void) __attribute__((availability(ios,introduced=4.0))); // okay, adds a new platform
+  void g(void); // okay, inherits both macos and ios availability from above.
+  void g(void) __attribute__((availability(macos,introduced=10.5))); // error: mismatch
+
+When one method overrides another, the overriding method can be more widely available than the overridden method, e.g.,:
+
+.. code-block:: objc
+
+  @interface A
+  - (id)method __attribute__((availability(macos,introduced=10.4)));
+  - (id)method2 __attribute__((availability(macos,introduced=10.4)));
+  @end
+
+  @interface B : A
+  - (id)method __attribute__((availability(macos,introduced=10.3))); // okay: method moved into base class later
+  - (id)method __attribute__((availability(macos,introduced=10.5))); // error: this method was available via the base class in 10.4
+  @end
+
+Starting with the macOS 10.12 SDK, the ``API_AVAILABLE`` macro from
+``<os/availability.h>`` can simplify the spelling:
+
+.. code-block:: objc
+
+  @interface A
+  - (id)method API_AVAILABLE(macos(10.11)));
+  - (id)otherMethod API_AVAILABLE(macos(10.11), ios(11.0));
+  @end
+
+Also see the documentation for `@available
+<http://clang.llvm.org/docs/LanguageExtensions.html#objective-c-available>`_
+
+
+carries_dependency
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``carries_dependency``","``carries_dependency``","","","","","Yes"
+
+The ``carries_dependency`` attribute specifies dependency propagation into and
+out of functions.
+
+When specified on a function or Objective-C method, the ``carries_dependency``
+attribute means that the return value carries a dependency out of the function,
+so that the implementation need not constrain ordering upon return from that
+function. Implementations of the function and its caller may choose to preserve
+dependencies instead of emitting memory ordering instructions such as fences.
+
+Note, this attribute does not change the meaning of the program, but may result
+in generation of more efficient code.
+
+
+cf_consumed
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``cf_consumed``","``clang::cf_consumed``","``clang::cf_consumed``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+cf_returns_not_retained
+-----------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``cf_returns_not_retained``","``clang::cf_returns_not_retained``","``clang::cf_returns_not_retained``","","","",""
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+cf_returns_retained
+-------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``cf_returns_retained``","``clang::cf_returns_retained``","``clang::cf_returns_retained``","","","",""
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+code_seg
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","``code_seg``","","",""
+
+The ``__declspec(code_seg)`` attribute enables the placement of code into separate
+named segments that can be paged or locked in memory individually. This attribute
+is used to control the placement of instantiated templates and compiler-generated
+code. See the documentation for `__declspec(code_seg)`_ on MSDN.
+
+.. _`__declspec(code_seg)`: http://msdn.microsoft.com/en-us/library/dn636922.aspx
+
+
+convergent
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``convergent``","``clang::convergent``","``clang::convergent``","","","","Yes"
+
+The ``convergent`` attribute can be placed on a function declaration. It is
+translated into the LLVM ``convergent`` attribute, which indicates that the call
+instructions of a function with this attribute cannot be made control-dependent
+on any additional values.
+
+In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA,
+the call instructions of a function with this attribute must be executed by
+all work items or threads in a work group or sub group.
+
+This attribute is different from ``noduplicate`` because it allows duplicating
+function calls if it can be proved that the duplicated function calls are
+not made control-dependent on any additional values, e.g., unrolling a loop
+executed by all work items.
+
+Sample usage:
+.. code-block:: c
+
+  void convfunc(void) __attribute__((convergent));
+  // Setting it as a C++11 attribute is also valid in a C++ program.
+  // void convfunc(void) [[clang::convergent]];
+
+
+cpu_dispatch
+------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``cpu_dispatch``","``clang::cpu_dispatch``","``clang::cpu_dispatch``","``cpu_dispatch``","","","Yes"
+
+The ``cpu_specific`` and ``cpu_dispatch`` attributes are used to define and
+resolve multiversioned functions. This form of multiversioning provides a
+mechanism for declaring versions across translation units and manually
+specifying the resolved function list. A specified CPU defines a set of minimum
+features that are required for the function to be called. The result of this is
+that future processors execute the most restrictive version of the function the
+new processor can execute.
+
+Function versions are defined with ``cpu_specific``, which takes one or more CPU
+names as a parameter. For example:
+
+.. code-block:: c
+
+  // Declares and defines the ivybridge version of single_cpu.
+  __attribute__((cpu_specific(ivybridge)))
+  void single_cpu(void){}
+
+  // Declares and defines the atom version of single_cpu.
+  __attribute__((cpu_specific(atom)))
+  void single_cpu(void){}
+
+  // Declares and defines both the ivybridge and atom version of multi_cpu.
+  __attribute__((cpu_specific(ivybridge, atom)))
+  void multi_cpu(void){}
+
+A dispatching (or resolving) function can be declared anywhere in a project's
+source code with ``cpu_dispatch``. This attribute takes one or more CPU names
+as a parameter (like ``cpu_specific``). Functions marked with ``cpu_dispatch``
+are not expected to be defined, only declared. If such a marked function has a
+definition, any side effects of the function are ignored; trivial function
+bodies are permissible for ICC compatibility.
+
+.. code-block:: c
+
+  // Creates a resolver for single_cpu above.
+  __attribute__((cpu_dispatch(ivybridge, atom)))
+  void single_cpu(void){}
+
+  // Creates a resolver for multi_cpu, but adds a 3rd version defined in another
+  // translation unit.
+  __attribute__((cpu_dispatch(ivybridge, atom, sandybridge)))
+  void multi_cpu(void){}
+
+Note that it is possible to have a resolving function that dispatches based on
+more or fewer options than are present in the program. Specifying fewer will
+result in the omitted options not being considered during resolution. Specifying
+a version for resolution that isn't defined in the program will result in a
+linking failure.
+
+It is also possible to specify a CPU name of ``generic`` which will be resolved
+if the executing processor doesn't satisfy the features required in the CPU
+name. The behavior of a program executing on a processor that doesn't satisfy
+any option of a multiversioned function is undefined.
+
+
+cpu_specific
+------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``cpu_specific``","``clang::cpu_specific``","``clang::cpu_specific``","``cpu_specific``","","","Yes"
+
+The ``cpu_specific`` and ``cpu_dispatch`` attributes are used to define and
+resolve multiversioned functions. This form of multiversioning provides a
+mechanism for declaring versions across translation units and manually
+specifying the resolved function list. A specified CPU defines a set of minimum
+features that are required for the function to be called. The result of this is
+that future processors execute the most restrictive version of the function the
+new processor can execute.
+
+Function versions are defined with ``cpu_specific``, which takes one or more CPU
+names as a parameter. For example:
+
+.. code-block:: c
+
+  // Declares and defines the ivybridge version of single_cpu.
+  __attribute__((cpu_specific(ivybridge)))
+  void single_cpu(void){}
+
+  // Declares and defines the atom version of single_cpu.
+  __attribute__((cpu_specific(atom)))
+  void single_cpu(void){}
+
+  // Declares and defines both the ivybridge and atom version of multi_cpu.
+  __attribute__((cpu_specific(ivybridge, atom)))
+  void multi_cpu(void){}
+
+A dispatching (or resolving) function can be declared anywhere in a project's
+source code with ``cpu_dispatch``. This attribute takes one or more CPU names
+as a parameter (like ``cpu_specific``). Functions marked with ``cpu_dispatch``
+are not expected to be defined, only declared. If such a marked function has a
+definition, any side effects of the function are ignored; trivial function
+bodies are permissible for ICC compatibility.
+
+.. code-block:: c
+
+  // Creates a resolver for single_cpu above.
+  __attribute__((cpu_dispatch(ivybridge, atom)))
+  void single_cpu(void){}
+
+  // Creates a resolver for multi_cpu, but adds a 3rd version defined in another
+  // translation unit.
+  __attribute__((cpu_dispatch(ivybridge, atom, sandybridge)))
+  void multi_cpu(void){}
+
+Note that it is possible to have a resolving function that dispatches based on
+more or fewer options than are present in the program. Specifying fewer will
+result in the omitted options not being considered during resolution. Specifying
+a version for resolution that isn't defined in the program will result in a
+linking failure.
+
+It is also possible to specify a CPU name of ``generic`` which will be resolved
+if the executing processor doesn't satisfy the features required in the CPU
+name. The behavior of a program executing on a processor that doesn't satisfy
+any option of a multiversioned function is undefined.
+
+
+deprecated
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``deprecated``","``gnu::deprecated`` |br| ``deprecated``","``deprecated``","``deprecated``","","",""
+
+The ``deprecated`` attribute can be applied to a function, a variable, or a
+type. This is useful when identifying functions, variables, or types that are
+expected to be removed in a future version of a program.
+
+Consider the function declaration for a hypothetical function ``f``:
+
+.. code-block:: c++
+
+  void f(void) __attribute__((deprecated("message", "replacement")));
+
+When spelled as `__attribute__((deprecated))`, the deprecated attribute can have
+two optional string arguments. The first one is the message to display when
+emitting the warning; the second one enables the compiler to provide a Fix-It
+to replace the deprecated name with a new name. Otherwise, when spelled as
+`[[gnu::deprecated]] or [[deprecated]]`, the attribute can have one optional
+string argument which is the message to display when emitting the warning.
+
+
+diagnose_if
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``diagnose_if``","","","","","",""
+
+The ``diagnose_if`` attribute can be placed on function declarations to emit
+warnings or errors at compile-time if calls to the attributed function meet
+certain user-defined criteria. For example:
+
+.. code-block:: c
+
+  int abs(int a)
+    __attribute__((diagnose_if(a >= 0, "Redundant abs call", "warning")));
+  int must_abs(int a)
+    __attribute__((diagnose_if(a >= 0, "Redundant abs call", "error")));
+
+  int val = abs(1); // warning: Redundant abs call
+  int val2 = must_abs(1); // error: Redundant abs call
+  int val3 = abs(val);
+  int val4 = must_abs(val); // Because run-time checks are not emitted for
+                            // diagnose_if attributes, this executes without
+                            // issue.
+
+
+``diagnose_if`` is closely related to ``enable_if``, with a few key differences:
+
+* Overload resolution is not aware of ``diagnose_if`` attributes: they're
+  considered only after we select the best candidate from a given candidate set.
+* Function declarations that differ only in their ``diagnose_if`` attributes are
+  considered to be redeclarations of the same function (not overloads).
+* If the condition provided to ``diagnose_if`` cannot be evaluated, no
+  diagnostic will be emitted.
+
+Otherwise, ``diagnose_if`` is essentially the logical negation of ``enable_if``.
+
+As a result of bullet number two, ``diagnose_if`` attributes will stack on the
+same function. For example:
+
+.. code-block:: c
+
+  int foo() __attribute__((diagnose_if(1, "diag1", "warning")));
+  int foo() __attribute__((diagnose_if(1, "diag2", "warning")));
+
+  int bar = foo(); // warning: diag1
+                   // warning: diag2
+  int (*fooptr)(void) = foo; // warning: diag1
+                             // warning: diag2
+
+  constexpr int supportsAPILevel(int N) { return N < 5; }
+  int baz(int a)
+    __attribute__((diagnose_if(!supportsAPILevel(10),
+                               "Upgrade to API level 10 to use baz", "error")));
+  int baz(int a)
+    __attribute__((diagnose_if(!a, "0 is not recommended.", "warning")));
+
+  int (*bazptr)(int) = baz; // error: Upgrade to API level 10 to use baz
+  int v = baz(0); // error: Upgrade to API level 10 to use baz
+
+Query for this feature with ``__has_attribute(diagnose_if)``.
+
+
+disable_tail_calls
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``disable_tail_calls``","``clang::disable_tail_calls``","``clang::disable_tail_calls``","","","","Yes"
+
+The ``disable_tail_calls`` attribute instructs the backend to not perform tail call optimization inside the marked function.
+
+For example:
+
+  .. code-block:: c
+
+    int callee(int);
+
+    int foo(int a) __attribute__((disable_tail_calls)) {
+      return callee(a); // This call is not tail-call optimized.
+    }
+
+Marking virtual functions as ``disable_tail_calls`` is legal.
+
+  .. code-block:: c++
+
+    int callee(int);
+
+    class Base {
+    public:
+      [[clang::disable_tail_calls]] virtual int foo1() {
+        return callee(); // This call is not tail-call optimized.
+      }
+    };
+
+    class Derived1 : public Base {
+    public:
+      int foo1() override {
+        return callee(); // This call is tail-call optimized.
+      }
+    };
+
+
+enable_if
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``enable_if``","","","","","","Yes"
+
+.. Note:: Some features of this attribute are experimental. The meaning of
+  multiple enable_if attributes on a single declaration is subject to change in
+  a future version of clang. Also, the ABI is not standardized and the name
+  mangling may change in future versions. To avoid that, use asm labels.
+
+The ``enable_if`` attribute can be placed on function declarations to control
+which overload is selected based on the values of the function's arguments.
+When combined with the ``overloadable`` attribute, this feature is also
+available in C.
+
+.. code-block:: c++
+
+  int isdigit(int c);
+  int isdigit(int c) __attribute__((enable_if(c <= -1 || c > 255, "chosen when 'c' is out of range"))) __attribute__((unavailable("'c' must have the value of an unsigned char or EOF")));
+
+  void foo(char c) {
+    isdigit(c);
+    isdigit(10);
+    isdigit(-10);  // results in a compile-time error.
+  }
+
+The enable_if attribute takes two arguments, the first is an expression written
+in terms of the function parameters, the second is a string explaining why this
+overload candidate could not be selected to be displayed in diagnostics. The
+expression is part of the function signature for the purposes of determining
+whether it is a redeclaration (following the rules used when determining
+whether a C++ template specialization is ODR-equivalent), but is not part of
+the type.
+
+The enable_if expression is evaluated as if it were the body of a
+bool-returning constexpr function declared with the arguments of the function
+it is being applied to, then called with the parameters at the call site. If the
+result is false or could not be determined through constant expression
+evaluation, then this overload will not be chosen and the provided string may
+be used in a diagnostic if the compile fails as a result.
+
+Because the enable_if expression is an unevaluated context, there are no global
+state changes, nor the ability to pass information from the enable_if
+expression to the function body. For example, suppose we want calls to
+strnlen(strbuf, maxlen) to resolve to strnlen_chk(strbuf, maxlen, size of
+strbuf) only if the size of strbuf can be determined:
+
+.. code-block:: c++
+
+  __attribute__((always_inline))
+  static inline size_t strnlen(const char *s, size_t maxlen)
+    __attribute__((overloadable))
+    __attribute__((enable_if(__builtin_object_size(s, 0) != -1))),
+                             "chosen when the buffer size is known but 'maxlen' is not")))
+  {
+    return strnlen_chk(s, maxlen, __builtin_object_size(s, 0));
+  }
+
+Multiple enable_if attributes may be applied to a single declaration. In this
+case, the enable_if expressions are evaluated from left to right in the
+following manner. First, the candidates whose enable_if expressions evaluate to
+false or cannot be evaluated are discarded. If the remaining candidates do not
+share ODR-equivalent enable_if expressions, the overload resolution is
+ambiguous. Otherwise, enable_if overload resolution continues with the next
+enable_if attribute on the candidates that have not been discarded and have
+remaining enable_if attributes. In this way, we pick the most specific
+overload out of a number of viable overloads using enable_if.
+
+.. code-block:: c++
+
+  void f() __attribute__((enable_if(true, "")));  // #1
+  void f() __attribute__((enable_if(true, ""))) __attribute__((enable_if(true, "")));  // #2
+
+  void g(int i, int j) __attribute__((enable_if(i, "")));  // #1
+  void g(int i, int j) __attribute__((enable_if(j, ""))) __attribute__((enable_if(true)));  // #2
+
+In this example, a call to f() is always resolved to #2, as the first enable_if
+expression is ODR-equivalent for both declarations, but #1 does not have another
+enable_if expression to continue evaluating, so the next round of evaluation has
+only a single candidate. In a call to g(1, 1), the call is ambiguous even though
+#2 has more enable_if attributes, because the first enable_if expressions are
+not ODR-equivalent.
+
+Query for this feature with ``__has_attribute(enable_if)``.
+
+Note that functions with one or more ``enable_if`` attributes may not have
+their address taken, unless all of the conditions specified by said
+``enable_if`` are constants that evaluate to ``true``. For example:
+
+.. code-block:: c
+
+  const int TrueConstant = 1;
+  const int FalseConstant = 0;
+  int f(int a) __attribute__((enable_if(a > 0, "")));
+  int g(int a) __attribute__((enable_if(a == 0 || a != 0, "")));
+  int h(int a) __attribute__((enable_if(1, "")));
+  int i(int a) __attribute__((enable_if(TrueConstant, "")));
+  int j(int a) __attribute__((enable_if(FalseConstant, "")));
+
+  void fn() {
+    int (*ptr)(int);
+    ptr = &f; // error: 'a > 0' is not always true
+    ptr = &g; // error: 'a == 0 || a != 0' is not a truthy constant
+    ptr = &h; // OK: 1 is a truthy constant
+    ptr = &i; // OK: 'TrueConstant' is a truthy constant
+    ptr = &j; // error: 'FalseConstant' is a constant, but not truthy
+  }
+
+Because ``enable_if`` evaluation happens during overload resolution,
+``enable_if`` may give unintuitive results when used with templates, depending
+on when overloads are resolved. In the example below, clang will emit a
+diagnostic about no viable overloads for ``foo`` in ``bar``, but not in ``baz``:
+
+.. code-block:: c++
+
+  double foo(int i) __attribute__((enable_if(i > 0, "")));
+  void *foo(int i) __attribute__((enable_if(i <= 0, "")));
+  template <int I>
+  auto bar() { return foo(I); }
+
+  template <typename T>
+  auto baz() { return foo(T::number); }
+
+  struct WithNumber { constexpr static int number = 1; };
+  void callThem() {
+    bar<sizeof(WithNumber)>();
+    baz<WithNumber>();
+  }
+
+This is because, in ``bar``, ``foo`` is resolved prior to template
+instantiation, so the value for ``I`` isn't known (thus, both ``enable_if``
+conditions for ``foo`` fail). However, in ``baz``, ``foo`` is resolved during
+template instantiation, so the value for ``T::number`` is known.
+
+
+exclude_from_explicit_instantiation
+-----------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``exclude_from_explicit_instantiation``","``clang::exclude_from_explicit_instantiation``","``clang::exclude_from_explicit_instantiation``","","","","Yes"
+
+The ``exclude_from_explicit_instantiation`` attribute opts-out a member of a
+class template from being part of explicit template instantiations of that
+class template. This means that an explicit instantiation will not instantiate
+members of the class template marked with the attribute, but also that code
+where an extern template declaration of the enclosing class template is visible
+will not take for granted that an external instantiation of the class template
+would provide those members (which would otherwise be a link error, since the
+explicit instantiation won't provide those members). For example, let's say we
+don't want the ``data()`` method to be part of libc++'s ABI. To make sure it
+is not exported from the dylib, we give it hidden visibility:
+
+  .. code-block:: c++
+
+    // in <string>
+    template <class CharT>
+    class basic_string {
+    public:
+      __attribute__((__visibility__("hidden")))
+      const value_type* data() const noexcept { ... }
+    };
+
+    template class basic_string<char>;
+
+Since an explicit template instantiation declaration for ``basic_string<char>``
+is provided, the compiler is free to assume that ``basic_string<char>::data()``
+will be provided by another translation unit, and it is free to produce an
+external call to this function. However, since ``data()`` has hidden visibility
+and the explicit template instantiation is provided in a shared library (as
+opposed to simply another translation unit), ``basic_string<char>::data()``
+won't be found and a link error will ensue. This happens because the compiler
+assumes that ``basic_string<char>::data()`` is part of the explicit template
+instantiation declaration, when it really isn't. To tell the compiler that
+``data()`` is not part of the explicit template instantiation declaration, the
+``exclude_from_explicit_instantiation`` attribute can be used:
+
+  .. code-block:: c++
+
+    // in <string>
+    template <class CharT>
+    class basic_string {
+    public:
+      __attribute__((__visibility__("hidden")))
+      __attribute__((exclude_from_explicit_instantiation))
+      const value_type* data() const noexcept { ... }
+    };
+
+    template class basic_string<char>;
+
+Now, the compiler won't assume that ``basic_string<char>::data()`` is provided
+externally despite there being an explicit template instantiation declaration:
+the compiler will implicitly instantiate ``basic_string<char>::data()`` in the
+TUs where it is used.
+
+This attribute can be used on static and non-static member functions of class
+templates, static data members of class templates and member classes of class
+templates.
+
+
+external_source_symbol
+----------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``external_source_symbol``","``clang::external_source_symbol``","``clang::external_source_symbol``","","","","Yes"
+
+The ``external_source_symbol`` attribute specifies that a declaration originates
+from an external source and describes the nature of that source.
+
+The fact that Clang is capable of recognizing declarations that were defined
+externally can be used to provide better tooling support for mixed-language
+projects or projects that rely on auto-generated code. For instance, an IDE that
+uses Clang and that supports mixed-language projects can use this attribute to
+provide a correct 'jump-to-definition' feature. For a concrete example,
+consider a protocol that's defined in a Swift file:
+
+.. code-block:: swift
+
+  @objc public protocol SwiftProtocol {
+    func method()
+  }
+
+This protocol can be used from Objective-C code by including a header file that
+was generated by the Swift compiler. The declarations in that header can use
+the ``external_source_symbol`` attribute to make Clang aware of the fact
+that ``SwiftProtocol`` actually originates from a Swift module:
+
+.. code-block:: objc
+
+  __attribute__((external_source_symbol(language="Swift",defined_in="module")))
+  @protocol SwiftProtocol
+  @required
+  - (void) method;
+  @end
+
+Consequently, when 'jump-to-definition' is performed at a location that
+references ``SwiftProtocol``, the IDE can jump to the original definition in
+the Swift source file rather than jumping to the Objective-C declaration in the
+auto-generated header file.
+
+The ``external_source_symbol`` attribute is a comma-separated list that includes
+clauses that describe the origin and the nature of the particular declaration.
+Those clauses can be:
+
+language=\ *string-literal*
+  The name of the source language in which this declaration was defined.
+
+defined_in=\ *string-literal*
+  The name of the source container in which the declaration was defined. The
+  exact definition of source container is language-specific, e.g. Swift's
+  source containers are modules, so ``defined_in`` should specify the Swift
+  module name.
+
+generated_declaration
+  This declaration was automatically generated by some tool.
+
+The clauses can be specified in any order. The clauses that are listed above are
+all optional, but the attribute has to have at least one clause.
+
+
+flatten
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``flatten``","``gnu::flatten``","","","","","Yes"
+
+The ``flatten`` attribute causes calls within the attributed function to
+be inlined unless it is impossible to do so, for example if the body of the
+callee is unavailable or if the callee has the ``noinline`` attribute.
+
+
+force_align_arg_pointer
+-----------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``force_align_arg_pointer``","``gnu::force_align_arg_pointer``","","","","",""
+
+Use this attribute to force stack alignment.
+
+Legacy x86 code uses 4-byte stack alignment. Newer aligned SSE instructions
+(like 'movaps') that work with the stack require operands to be 16-byte aligned.
+This attribute realigns the stack in the function prologue to make sure the
+stack can be used with SSE instructions.
+
+Note that the x86_64 ABI forces 16-byte stack alignment at the call site.
+Because of this, 'force_align_arg_pointer' is not needed on x86_64, except in
+rare cases where the caller does not align the stack properly (e.g. flow
+jumps from i386 arch code).
+
+  .. code-block:: c
+
+    __attribute__ ((force_align_arg_pointer))
+    void f () {
+      ...
+    }
+
+
+format
+------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``format``","``gnu::format``","","","","",""
+
+Clang supports the ``format`` attribute, which indicates that the function
+accepts a ``printf`` or ``scanf``-like format string and corresponding
+arguments or a ``va_list`` that contains these arguments.
+
+Please see `GCC documentation about format attribute
+<http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html>`_ to find details
+about attribute syntax.
+
+Clang implements two kinds of checks with this attribute.
+
+#. Clang checks that the function with the ``format`` attribute is called with
+   a format string that uses format specifiers that are allowed, and that
+   arguments match the format string.  This is the ``-Wformat`` warning, it is
+   on by default.
+
+#. Clang checks that the format string argument is a literal string.  This is
+   the ``-Wformat-nonliteral`` warning, it is off by default.
+
+   Clang implements this mostly the same way as GCC, but there is a difference
+   for functions that accept a ``va_list`` argument (for example, ``vprintf``).
+   GCC does not emit ``-Wformat-nonliteral`` warning for calls to such
+   functions.  Clang does not warn if the format string comes from a function
+   parameter, where the function is annotated with a compatible attribute,
+   otherwise it warns.  For example:
+
+   .. code-block:: c
+
+     __attribute__((__format__ (__scanf__, 1, 3)))
+     void foo(const char* s, char *buf, ...) {
+       va_list ap;
+       va_start(ap, buf);
+
+       vprintf(s, ap); // warning: format string is not a string literal
+     }
+
+   In this case we warn because ``s`` contains a format string for a
+   ``scanf``-like function, but it is passed to a ``printf``-like function.
+
+   If the attribute is removed, clang still warns, because the format string is
+   not a string literal.
+
+   Another example:
+
+   .. code-block:: c
+
+     __attribute__((__format__ (__printf__, 1, 3)))
+     void foo(const char* s, char *buf, ...) {
+       va_list ap;
+       va_start(ap, buf);
+
+       vprintf(s, ap); // warning
+     }
+
+   In this case Clang does not warn because the format string ``s`` and
+   the corresponding arguments are annotated.  If the arguments are
+   incorrect, the caller of ``foo`` will receive a warning.
+
+
+gnu_inline
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``gnu_inline``","``gnu::gnu_inline``","","","","","Yes"
+
+The ``gnu_inline`` changes the meaning of ``extern inline`` to use GNU inline
+semantics, meaning:
+
+* If any declaration that is declared ``inline`` is not declared ``extern``,
+then the ``inline`` keyword is just a hint. In particular, an out-of-line
+definition is still emitted for a function with external linkage, even if all
+call sites are inlined, unlike in C99 and C++ inline semantics.
+
+* If all declarations that are declared ``inline`` are also declared
+``extern``, then the function body is present only for inlining and no
+out-of-line version is emitted.
+
+Some important consequences: ``static inline`` emits an out-of-line
+version if needed, a plain ``inline`` definition emits an out-of-line version
+always, and an ``extern inline`` definition (in a header) followed by a
+(non-``extern``) ``inline`` declaration in a source file emits an out-of-line
+version of the function in that source file but provides the function body for
+inlining to all includers of the header.
+
+Either ``__GNUC_GNU_INLINE__`` (GNU inline semantics) or
+``__GNUC_STDC_INLINE__`` (C99 semantics) will be defined (they are mutually
+exclusive). If ``__GNUC_STDC_INLINE__`` is defined, then the ``gnu_inline``
+function attribute can be used to get GNU inline semantics on a per function
+basis. If ``__GNUC_GNU_INLINE__`` is defined, then the translation unit is
+already being compiled with GNU inline semantics as the implied default. It is
+unspecified which macro is defined in a C++ compilation.
+
+GNU inline semantics are the default behavior with ``-std=gnu89``,
+``-std=c89``, ``-std=c94``, or ``-fgnu89-inline``.
+
+
+ifunc
+-----
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``ifunc``","``gnu::ifunc``","","","","","Yes"
+
+``__attribute__((ifunc("resolver")))`` is used to mark that the address of a declaration should be resolved at runtime by calling a resolver function.
+
+The symbol name of the resolver function is given in quotes.  A function with this name (after mangling) must be defined in the current translation unit; it may be ``static``.  The resolver function should return a pointer.
+
+The ``ifunc`` attribute may only be used on a function declaration.  A function declaration with an ``ifunc`` attribute is considered to be a definition of the declared entity.  The entity must not have weak linkage; for example, in C++, it cannot be applied to a declaration if a definition at that location would be considered inline.
+
+Not all targets support this attribute. ELF target support depends on both the linker and runtime linker, and is available in at least lld 4.0 and later, binutils 2.20.1 and later, glibc v2.11.1 and later, and FreeBSD 9.1 and later. Non-ELF targets currently do not support this attribute.
+
+
+internal_linkage
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``internal_linkage``","``clang::internal_linkage``","``clang::internal_linkage``","","","","Yes"
+
+The ``internal_linkage`` attribute changes the linkage type of the declaration to internal.
+This is similar to C-style ``static``, but can be used on classes and class methods. When applied to a class definition,
+this attribute affects all methods and static data members of that class.
+This can be used to contain the ABI of a C++ library by excluding unwanted class methods from the export tables.
+
+
+interrupt (ARM)
+---------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``interrupt``","``gnu::interrupt``","","","","",""
+
+Clang supports the GNU style ``__attribute__((interrupt("TYPE")))`` attribute on
+ARM targets. This attribute may be attached to a function definition and
+instructs the backend to generate appropriate function entry/exit code so that
+it can be used directly as an interrupt service routine.
+
+The parameter passed to the interrupt attribute is optional, but if
+provided it must be a string literal with one of the following values: "IRQ",
+"FIQ", "SWI", "ABORT", "UNDEF".
+
+The semantics are as follows:
+
+- If the function is AAPCS, Clang instructs the backend to realign the stack to
+  8 bytes on entry. This is a general requirement of the AAPCS at public
+  interfaces, but may not hold when an exception is taken. Doing this allows
+  other AAPCS functions to be called.
+- If the CPU is M-class this is all that needs to be done since the architecture
+  itself is designed in such a way that functions obeying the normal AAPCS ABI
+  constraints are valid exception handlers.
+- If the CPU is not M-class, the prologue and epilogue are modified to save all
+  non-banked registers that are used, so that upon return the user-mode state
+  will not be corrupted. Note that to avoid unnecessary overhead, only
+  general-purpose (integer) registers are saved in this way. If VFP operations
+  are needed, that state must be saved manually.
+
+  Specifically, interrupt kinds other than "FIQ" will save all core registers
+  except "lr" and "sp". "FIQ" interrupts will save r0-r7.
+- If the CPU is not M-class, the return instruction is changed to one of the
+  canonical sequences permitted by the architecture for exception return. Where
+  possible the function itself will make the necessary "lr" adjustments so that
+  the "preferred return address" is selected.
+
+  Unfortunately the compiler is unable to make this guarantee for an "UNDEF"
+  handler, where the offset from "lr" to the preferred return address depends on
+  the execution state of the code which generated the exception. In this case
+  a sequence equivalent to "movs pc, lr" will be used.
+
+
+interrupt (AVR)
+---------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``interrupt``","``gnu::interrupt``","","","","","Yes"
+
+Clang supports the GNU style ``__attribute__((interrupt))`` attribute on
+AVR targets. This attribute may be attached to a function definition and instructs
+the backend to generate appropriate function entry/exit code so that it can be used
+directly as an interrupt service routine.
+
+On the AVR, the hardware globally disables interrupts when an interrupt is executed.
+The first instruction of an interrupt handler declared with this attribute is a SEI
+instruction to re-enable interrupts. See also the signal attribute that
+does not insert a SEI instruction.
+
+
+interrupt (MIPS)
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``interrupt``","``gnu::interrupt``","","","","","Yes"
+
+Clang supports the GNU style ``__attribute__((interrupt("ARGUMENT")))`` attribute on
+MIPS targets. This attribute may be attached to a function definition and instructs
+the backend to generate appropriate function entry/exit code so that it can be used
+directly as an interrupt service routine.
+
+By default, the compiler will produce a function prologue and epilogue suitable for
+an interrupt service routine that handles an External Interrupt Controller (eic)
+generated interrupt. This behaviour can be explicitly requested with the "eic"
+argument.
+
+Otherwise, for use with vectored interrupt mode, the argument passed should be
+of the form "vector=LEVEL" where LEVEL is one of the following values:
+"sw0", "sw1", "hw0", "hw1", "hw2", "hw3", "hw4", "hw5". The compiler will
+then set the interrupt mask to the corresponding level which will mask all
+interrupts up to and including the argument.
+
+The semantics are as follows:
+
+- The prologue is modified so that the Exception Program Counter (EPC) and
+  Status coprocessor registers are saved to the stack. The interrupt mask is
+  set so that the function can only be interrupted by a higher priority
+  interrupt. The epilogue will restore the previous values of EPC and Status.
+
+- The prologue and epilogue are modified to save and restore all non-kernel
+  registers as necessary.
+
+- The FPU is disabled in the prologue, as the floating pointer registers are not
+  spilled to the stack.
+
+- The function return sequence is changed to use an exception return instruction.
+
+- The parameter sets the interrupt mask for the function corresponding to the
+  interrupt level specified. If no mask is specified the interrupt mask
+  defaults to "eic".
+
+
+interrupt (RISCV)
+-----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``interrupt``","``gnu::interrupt``","","","","","Yes"
+
+Clang supports the GNU style ``__attribute__((interrupt))`` attribute on RISCV
+targets. This attribute may be attached to a function definition and instructs
+the backend to generate appropriate function entry/exit code so that it can be
+used directly as an interrupt service routine.
+
+Permissible values for this parameter are ``user``, ``supervisor``,
+and ``machine``. If there is no parameter, then it defaults to machine.
+
+Repeated interrupt attribute on the same declaration will cause a warning
+to be emitted. In case of repeated declarations, the last one prevails.
+
+Refer to:
+https://gcc.gnu.org/onlinedocs/gcc/RISC-V-Function-Attributes.html
+https://riscv.org/specifications/privileged-isa/
+The RISC-V Instruction Set Manual Volume II: Privileged Architecture
+Version 1.10.
+
+
+kernel
+------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``kernel``","","","","","","Yes"
+
+``__attribute__((kernel))`` is used to mark a ``kernel`` function in
+RenderScript.
+
+In RenderScript, ``kernel`` functions are used to express data-parallel
+computations.  The RenderScript runtime efficiently parallelizes ``kernel``
+functions to run on computational resources such as multi-core CPUs and GPUs.
+See the RenderScript_ documentation for more information.
+
+.. _RenderScript: https://developer.android.com/guide/topics/renderscript/compute.html
+
+
+lifetimebound
+-------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``lifetimebound``","``clang::lifetimebound``","","","","",""
+
+The ``lifetimebound`` attribute indicates that a resource owned by
+a function parameter or implicit object parameter
+is retained by the return value of the annotated function
+(or, for a parameter of a constructor, in the value of the constructed object).
+It is only supported in C++.
+
+This attribute provides an experimental implementation of the facility
+described in the C++ committee paper [http://wg21.link/p0936r0](P0936R0),
+and is subject to change as the design of the corresponding functionality
+changes.
+
+
+long_call, far
+--------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``long_call`` |br| ``far``","``gnu::long_call`` |br| ``gnu::far``","","","","","Yes"
+
+Clang supports the ``__attribute__((long_call))``, ``__attribute__((far))``,
+and ``__attribute__((near))`` attributes on MIPS targets. These attributes may
+only be added to function declarations and change the code generated
+by the compiler when directly calling the function. The ``near`` attribute
+allows calls to the function to be made using the ``jal`` instruction, which
+requires the function to be located in the same naturally aligned 256MB
+segment as the caller.  The ``long_call`` and ``far`` attributes are synonyms
+and require the use of a different call sequence that works regardless
+of the distance between the functions.
+
+These attributes have no effect for position-independent code.
+
+These attributes take priority over command line switches such
+as ``-mlong-calls`` and ``-mno-long-calls``.
+
+
+micromips
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``micromips``","``gnu::micromips``","","","","","Yes"
+
+Clang supports the GNU style ``__attribute__((micromips))`` and
+``__attribute__((nomicromips))`` attributes on MIPS targets. These attributes
+may be attached to a function definition and instructs the backend to generate
+or not to generate microMIPS code for that function.
+
+These attributes override the `-mmicromips` and `-mno-micromips` options
+on the command line.
+
+
+min_vector_width
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``min_vector_width``","``clang::min_vector_width``","``clang::min_vector_width``","","","","Yes"
+
+Clang supports the ``__attribute__((min_vector_width(width)))`` attribute. This
+attribute may be attached to a function and informs the backend that this
+function desires vectors of at least this width to be generated. Target-specific
+maximum vector widths still apply. This means even if you ask for something
+larger than the target supports, you will only get what the target supports.
+This attribute is meant to be a hint to control target heuristics that may
+generate narrower vectors than what the target hardware supports.
+
+This is currently used by the X86 target to allow some CPUs that support 512-bit
+vectors to be limited to using 256-bit vectors to avoid frequency penalties.
+This is currently enabled with the ``-prefer-vector-width=256`` command line
+option. The ``min_vector_width`` attribute can be used to prevent the backend
+from trying to split vector operations to match the ``prefer-vector-width``. All
+X86 vector intrinsics from x86intrin.h already set this attribute. Additionally,
+use of any of the X86-specific vector builtins will implicitly set this
+attribute on the calling function. The intent is that explicitly writing vector
+code using the X86 intrinsics will prevent ``prefer-vector-width`` from
+affecting the code.
+
+
+no_caller_saved_registers
+-------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_caller_saved_registers``","``gnu::no_caller_saved_registers``","","","","",""
+
+Use this attribute to indicate that the specified function has no
+caller-saved registers. That is, all registers are callee-saved except for
+registers used for passing parameters to the function or returning parameters
+from the function.
+The compiler saves and restores any modified registers that were not used for
+passing or returning arguments to the function.
+
+The user can call functions specified with the 'no_caller_saved_registers'
+attribute from an interrupt handler without saving and restoring all
+call-clobbered registers.
+
+Note that 'no_caller_saved_registers' attribute is not a calling convention.
+In fact, it only overrides the decision of which registers should be saved by
+the caller, but not how the parameters are passed from the caller to the callee.
+
+For example:
+
+  .. code-block:: c
+
+    __attribute__ ((no_caller_saved_registers, fastcall))
+    void f (int arg1, int arg2) {
+      ...
+    }
+
+  In this case parameters 'arg1' and 'arg2' will be passed in registers.
+  In this case, on 32-bit x86 targets, the function 'f' will use ECX and EDX as
+  register parameters. However, it will not assume any scratch registers and
+  should save and restore any modified registers except for ECX and EDX.
+
+
+no_sanitize
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_sanitize``","``clang::no_sanitize``","``clang::no_sanitize``","","","","Yes"
+
+Use the ``no_sanitize`` attribute on a function or a global variable
+declaration to specify that a particular instrumentation or set of
+instrumentations should not be applied. The attribute takes a list of
+string literals, which have the same meaning as values accepted by the
+``-fno-sanitize=`` flag. For example,
+``__attribute__((no_sanitize("address", "thread")))`` specifies that
+AddressSanitizer and ThreadSanitizer should not be applied to the
+function or variable.
+
+See :ref:`Controlling Code Generation <controlling-code-generation>` for a
+full list of supported sanitizer flags.
+
+
+no_sanitize_address, no_address_safety_analysis
+-----------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_address_safety_analysis`` |br| ``no_sanitize_address`` |br| ``no_sanitize_thread`` |br| ``no_sanitize_memory``","``gnu::no_address_safety_analysis`` |br| ``gnu::no_sanitize_address`` |br| ``gnu::no_sanitize_thread`` |br| ``clang::no_sanitize_memory``","``clang::no_sanitize_memory``","","","","Yes"
+
+.. _langext-address_sanitizer:
+
+Use ``__attribute__((no_sanitize_address))`` on a function or a global
+variable declaration to specify that address safety instrumentation
+(e.g. AddressSanitizer) should not be applied.
+
+
+no_sanitize_memory
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_address_safety_analysis`` |br| ``no_sanitize_address`` |br| ``no_sanitize_thread`` |br| ``no_sanitize_memory``","``gnu::no_address_safety_analysis`` |br| ``gnu::no_sanitize_address`` |br| ``gnu::no_sanitize_thread`` |br| ``clang::no_sanitize_memory``","``clang::no_sanitize_memory``","","","","Yes"
+
+.. _langext-memory_sanitizer:
+
+Use ``__attribute__((no_sanitize_memory))`` on a function declaration to
+specify that checks for uninitialized memory should not be inserted
+(e.g. by MemorySanitizer). The function may still be instrumented by the tool
+to avoid false positives in other places.
+
+
+no_sanitize_thread
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_address_safety_analysis`` |br| ``no_sanitize_address`` |br| ``no_sanitize_thread`` |br| ``no_sanitize_memory``","``gnu::no_address_safety_analysis`` |br| ``gnu::no_sanitize_address`` |br| ``gnu::no_sanitize_thread`` |br| ``clang::no_sanitize_memory``","``clang::no_sanitize_memory``","","","","Yes"
+
+.. _langext-thread_sanitizer:
+
+Use ``__attribute__((no_sanitize_thread))`` on a function declaration to
+specify that checks for data races on plain (non-atomic) memory accesses should
+not be inserted by ThreadSanitizer. The function is still instrumented by the
+tool to avoid false positives and provide meaningful stack traces.
+
+
+no_split_stack
+--------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_split_stack``","``gnu::no_split_stack``","","","","","Yes"
+
+The ``no_split_stack`` attribute disables the emission of the split stack
+preamble for a particular function. It has no effect if ``-fsplit-stack``
+is not specified.
+
+
+no_stack_protector
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_stack_protector``","``clang::no_stack_protector``","``clang::no_stack_protector``","","","","Yes"
+
+Clang supports the ``__attribute__((no_stack_protector))`` attribute which disables
+the stack protector on the specified function. This attribute is useful for
+selectively disabling the stack protector on some functions when building with
+``-fstack-protector`` compiler option.
+
+For example, it disables the stack protector for the function ``foo`` but function
+``bar`` will still be built with the stack protector with the ``-fstack-protector``
+option.
+
+.. code-block:: c
+
+    int __attribute__((no_stack_protector))
+    foo (int x); // stack protection will be disabled for foo.
+
+    int bar(int y); // bar can be built with the stack protector.
+
+
+noalias
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","``noalias``","","",""
+
+The ``noalias`` attribute indicates that the only memory accesses inside
+function are loads and stores from objects pointed to by its pointer-typed
+arguments, with arbitrary offsets.
+
+
+nocf_check
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``nocf_check``","``gnu::nocf_check``","","","","","Yes"
+
+Jump Oriented Programming attacks rely on tampering with addresses used by
+indirect call / jmp, e.g. redirect control-flow to non-programmer
+intended bytes in the binary.
+X86 Supports Indirect Branch Tracking (IBT) as part of Control-Flow
+Enforcement Technology (CET). IBT instruments ENDBR instructions used to
+specify valid targets of indirect call / jmp.
+The ``nocf_check`` attribute has two roles:
+1. Appertains to a function - do not add ENDBR instruction at the beginning of
+the function.
+2. Appertains to a function pointer - do not track the target function of this
+pointer (by adding nocf_check prefix to the indirect-call instruction).
+
+
+nodiscard, warn_unused_result
+-----------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``warn_unused_result``","``nodiscard`` |br| ``clang::warn_unused_result`` |br| ``gnu::warn_unused_result``","``nodiscard``","","","","Yes"
+
+Clang supports the ability to diagnose when the results of a function call
+expression are discarded under suspicious circumstances. A diagnostic is
+generated when a function or its return type is marked with ``[[nodiscard]]``
+(or ``__attribute__((warn_unused_result))``) and the function call appears as a
+potentially-evaluated discarded-value expression that is not explicitly cast to
+`void`.
+
+.. code-block: c++
+  struct [[nodiscard]] error_info { /*...*/ };
+  error_info enable_missile_safety_mode();
+
+  void launch_missiles();
+  void test_missiles() {
+    enable_missile_safety_mode(); // diagnoses
+    launch_missiles();
+  }
+  error_info &foo();
+  void f() { foo(); } // Does not diagnose, error_info is a reference.
+
+
+noduplicate
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``noduplicate``","``clang::noduplicate``","``clang::noduplicate``","","","","Yes"
+
+The ``noduplicate`` attribute can be placed on function declarations to control
+whether function calls to this function can be duplicated or not as a result of
+optimizations. This is required for the implementation of functions with
+certain special requirements, like the OpenCL "barrier" function, that might
+need to be run concurrently by all the threads that are executing in lockstep
+on the hardware. For example this attribute applied on the function
+"nodupfunc" in the code below avoids that:
+
+.. code-block:: c
+
+  void nodupfunc() __attribute__((noduplicate));
+  // Setting it as a C++11 attribute is also valid
+  // void nodupfunc() [[clang::noduplicate]];
+  void foo();
+  void bar();
+
+  nodupfunc();
+  if (a > n) {
+    foo();
+  } else {
+    bar();
+  }
+
+gets possibly modified by some optimizations into code similar to this:
+
+.. code-block:: c
+
+  if (a > n) {
+    nodupfunc();
+    foo();
+  } else {
+    nodupfunc();
+    bar();
+  }
+
+where the call to "nodupfunc" is duplicated and sunk into the two branches
+of the condition.
+
+
+nomicromips
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``nomicromips``","``gnu::nomicromips``","","","","","Yes"
+
+Clang supports the GNU style ``__attribute__((micromips))`` and
+``__attribute__((nomicromips))`` attributes on MIPS targets. These attributes
+may be attached to a function definition and instructs the backend to generate
+or not to generate microMIPS code for that function.
+
+These attributes override the `-mmicromips` and `-mno-micromips` options
+on the command line.
+
+
+noreturn
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","``noreturn``","","","","","Yes"
+
+A function declared as ``[[noreturn]]`` shall not return to its caller. The
+compiler will generate a diagnostic for a function declared as ``[[noreturn]]``
+that appears to be capable of returning to its caller.
+
+
+not_tail_called
+---------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``not_tail_called``","``clang::not_tail_called``","``clang::not_tail_called``","","","","Yes"
+
+The ``not_tail_called`` attribute prevents tail-call optimization on statically bound calls. It has no effect on indirect calls. Virtual functions, objective-c methods, and functions marked as ``always_inline`` cannot be marked as ``not_tail_called``.
+
+For example, it prevents tail-call optimization in the following case:
+
+  .. code-block:: c
+
+    int __attribute__((not_tail_called)) foo1(int);
+
+    int foo2(int a) {
+      return foo1(a); // No tail-call optimization on direct calls.
+    }
+
+However, it doesn't prevent tail-call optimization in this case:
+
+  .. code-block:: c
+
+    int __attribute__((not_tail_called)) foo1(int);
+
+    int foo2(int a) {
+      int (*fn)(int) = &foo1;
+
+      // not_tail_called has no effect on an indirect call even if the call can be
+      // resolved at compile time.
+      return (*fn)(a);
+    }
+
+Marking virtual functions as ``not_tail_called`` is an error:
+
+  .. code-block:: c++
+
+    class Base {
+    public:
+      // not_tail_called on a virtual function is an error.
+      [[clang::not_tail_called]] virtual int foo1();
+
+      virtual int foo2();
+
+      // Non-virtual functions can be marked ``not_tail_called``.
+      [[clang::not_tail_called]] int foo3();
+    };
+
+    class Derived1 : public Base {
+    public:
+      int foo1() override;
+
+      // not_tail_called on a virtual function is an error.
+      [[clang::not_tail_called]] int foo2() override;
+    };
+
+
+nothrow
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``nothrow``","``gnu::nothrow``","","``nothrow``","","","Yes"
+
+Clang supports the GNU style ``__attribute__((nothrow))`` and Microsoft style
+``__declspec(nothrow)`` attribute as an equivalent of `noexcept` on function
+declarations. This attribute informs the compiler that the annotated function
+does not throw an exception. This prevents exception-unwinding. This attribute
+is particularly useful on functions in the C Standard Library that are
+guaranteed to not throw an exception.
+
+
+ns_consumed
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``ns_consumed``","``clang::ns_consumed``","``clang::ns_consumed``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+ns_consumes_self
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``ns_consumes_self``","``clang::ns_consumes_self``","``clang::ns_consumes_self``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+ns_returns_autoreleased
+-----------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``ns_returns_autoreleased``","``clang::ns_returns_autoreleased``","``clang::ns_returns_autoreleased``","","","",""
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+ns_returns_not_retained
+-----------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``ns_returns_not_retained``","``clang::ns_returns_not_retained``","``clang::ns_returns_not_retained``","","","",""
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+ns_returns_retained
+-------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``ns_returns_retained``","``clang::ns_returns_retained``","``clang::ns_returns_retained``","","","",""
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+objc_boxable
+------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``objc_boxable``","``clang::objc_boxable``","``clang::objc_boxable``","","","","Yes"
+
+Structs and unions marked with the ``objc_boxable`` attribute can be used
+with the Objective-C boxed expression syntax, ``@(...)``.
+
+**Usage**: ``__attribute__((objc_boxable))``. This attribute
+can only be placed on a declaration of a trivially-copyable struct or union:
+
+.. code-block:: objc
+
+  struct __attribute__((objc_boxable)) some_struct {
+    int i;
+  };
+  union __attribute__((objc_boxable)) some_union {
+    int i;
+    float f;
+  };
+  typedef struct __attribute__((objc_boxable)) _some_struct some_struct;
+
+  // ...
+
+  some_struct ss;
+  NSValue *boxed = @(ss);
+
+
+objc_method_family
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``objc_method_family``","``clang::objc_method_family``","``clang::objc_method_family``","","","","Yes"
+
+Many methods in Objective-C have conventional meanings determined by their
+selectors. It is sometimes useful to be able to mark a method as having a
+particular conventional meaning despite not having the right selector, or as
+not having the conventional meaning that its selector would suggest. For these
+use cases, we provide an attribute to specifically describe the "method family"
+that a method belongs to.
+
+**Usage**: ``__attribute__((objc_method_family(X)))``, where ``X`` is one of
+``none``, ``alloc``, ``copy``, ``init``, ``mutableCopy``, or ``new``.  This
+attribute can only be placed at the end of a method declaration:
+
+.. code-block:: objc
+
+  - (NSString *)initMyStringValue __attribute__((objc_method_family(none)));
+
+Users who do not wish to change the conventional meaning of a method, and who
+merely want to document its non-standard retain and release semantics, should
+use the retaining behavior attributes (``ns_returns_retained``,
+``ns_returns_not_retained``, etc).
+
+Query for this feature with ``__has_attribute(objc_method_family)``.
+
+
+objc_requires_super
+-------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``objc_requires_super``","``clang::objc_requires_super``","``clang::objc_requires_super``","","","","Yes"
+
+Some Objective-C classes allow a subclass to override a particular method in a
+parent class but expect that the overriding method also calls the overridden
+method in the parent class. For these cases, we provide an attribute to
+designate that a method requires a "call to ``super``" in the overriding
+method in the subclass.
+
+**Usage**: ``__attribute__((objc_requires_super))``.  This attribute can only
+be placed at the end of a method declaration:
+
+.. code-block:: objc
+
+  - (void)foo __attribute__((objc_requires_super));
+
+This attribute can only be applied the method declarations within a class, and
+not a protocol.  Currently this attribute does not enforce any placement of
+where the call occurs in the overriding method (such as in the case of
+``-dealloc`` where the call must appear at the end).  It checks only that it
+exists.
+
+Note that on both OS X and iOS that the Foundation framework provides a
+convenience macro ``NS_REQUIRES_SUPER`` that provides syntactic sugar for this
+attribute:
+
+.. code-block:: objc
+
+  - (void)foo NS_REQUIRES_SUPER;
+
+This macro is conditionally defined depending on the compiler's support for
+this attribute.  If the compiler does not support the attribute the macro
+expands to nothing.
+
+Operationally, when a method has this annotation the compiler will warn if the
+implementation of an override in a subclass does not call super.  For example:
+
+.. code-block:: objc
+
+   warning: method possibly missing a [super AnnotMeth] call
+   - (void) AnnotMeth{};
+                      ^
+
+
+objc_runtime_name
+-----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``objc_runtime_name``","``clang::objc_runtime_name``","``clang::objc_runtime_name``","","","","Yes"
+
+By default, the Objective-C interface or protocol identifier is used
+in the metadata name for that object. The `objc_runtime_name`
+attribute allows annotated interfaces or protocols to use the
+specified string argument in the object's metadata name instead of the
+default name.
+
+**Usage**: ``__attribute__((objc_runtime_name("MyLocalName")))``.  This attribute
+can only be placed before an @protocol or @interface declaration:
+
+.. code-block:: objc
+
+  __attribute__((objc_runtime_name("MyLocalName")))
+  @interface Message
+  @end
+
+
+objc_runtime_visible
+--------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``objc_runtime_visible``","``clang::objc_runtime_visible``","``clang::objc_runtime_visible``","","","","Yes"
+
+This attribute specifies that the Objective-C class to which it applies is visible to the Objective-C runtime but not to the linker. Classes annotated with this attribute cannot be subclassed and cannot have categories defined for them.
+
+
+optnone
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``optnone``","``clang::optnone``","``clang::optnone``","","","","Yes"
+
+The ``optnone`` attribute suppresses essentially all optimizations
+on a function or method, regardless of the optimization level applied to
+the compilation unit as a whole.  This is particularly useful when you
+need to debug a particular function, but it is infeasible to build the
+entire application without optimization.  Avoiding optimization on the
+specified function can improve the quality of the debugging information
+for that function.
+
+This attribute is incompatible with the ``always_inline`` and ``minsize``
+attributes.
+
+
+os_consumed
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``os_consumed``","``clang::os_consumed``","``clang::os_consumed``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+os_consumes_this
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``os_consumes_this``","``clang::os_consumes_this``","``clang::os_consumes_this``","","","",""
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+os_returns_not_retained
+-----------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``os_returns_not_retained``","``clang::os_returns_not_retained``","``clang::os_returns_not_retained``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+os_returns_retained
+-------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``os_returns_retained``","``clang::os_returns_retained``","``clang::os_returns_retained``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+os_returns_retained_on_non_zero
+-------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``os_returns_retained_on_non_zero``","``clang::os_returns_retained_on_non_zero``","``clang::os_returns_retained_on_non_zero``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+os_returns_retained_on_zero
+---------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``os_returns_retained_on_zero``","``clang::os_returns_retained_on_zero``","``clang::os_returns_retained_on_zero``","","","","Yes"
+
+The behavior of a function with respect to reference counting for Foundation
+(Objective-C), CoreFoundation (C) and OSObject (C++) is determined by a naming
+convention (e.g. functions starting with "get" are assumed to return at
+``+0``).
+
+It can be overriden using a family of the following attributes.  In
+Objective-C, the annotation ``__attribute__((ns_returns_retained))`` applied to
+a function communicates that the object is returned at ``+1``, and the caller
+is responsible for freeing it.
+Similiarly, the annotation ``__attribute__((ns_returns_not_retained))``
+specifies that the object is returned at ``+0`` and the ownership remains with
+the callee.
+The annotation ``__attribute__((ns_consumes_self))`` specifies that
+the Objective-C method call consumes the reference to ``self``, e.g. by
+attaching it to a supplied parameter.
+Additionally, parameters can have an annotation
+``__attribute__((ns_consumed))``, which specifies that passing an owned object
+as that parameter effectively transfers the ownership, and the caller is no
+longer responsible for it.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+In C programs using CoreFoundation, a similar set of attributes:
+``__attribute__((cf_returns_not_retained))``,
+``__attribute__((cf_returns_retained))`` and ``__attribute__((cf_consumed))``
+have the same respective semantics when applied to CoreFoundation objects.
+These attributes affect code generation when interacting with ARC code, and
+they are used by the Clang Static Analyzer.
+
+Finally, in C++ interacting with XNU kernel (objects inheriting from OSObject),
+the same attribute family is present:
+``__attribute__((os_returns_not_retained))``,
+``__attribute__((os_returns_retained))`` and ``__attribute__((os_consumed))``,
+with the same respective semantics.
+Similar to ``__attribute__((ns_consumes_self))``,
+``__attribute__((os_consumes_this))`` specifies that the method call consumes
+the reference to "this" (e.g., when attaching it to a different object supplied
+as a parameter).
+Out parameters (parameters the function is meant to write into,
+either via pointers-to-pointers or references-to-pointers)
+may be annotated with ``__attribute__((os_returns_retained))``
+or ``__attribute__((os_returns_not_retained))`` which specifies that the object
+written into the out parameter should (or respectively should not) be released
+after use.
+Since often out parameters may or may not be written depending on the exit
+code of the function,
+annotations ``__attribute__((os_returns_retained_on_zero))``
+and ``__attribute__((os_returns_retained_on_non_zero))`` specify that
+an out parameter at ``+1`` is written if and only if the function returns a zero
+(respectively non-zero) error code.
+Observe that return-code-dependent out parameter annotations are only
+available for retained out parameters, as non-retained object do not have to be
+released by the callee.
+These attributes are only used by the Clang Static Analyzer.
+
+The family of attributes ``X_returns_X_retained`` can be added to functions,
+C++ methods, and Objective-C methods and properties.
+Attributes ``X_consumed`` can be added to parameters of methods, functions,
+and Objective-C methods.
+
+
+overloadable
+------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``overloadable``","``clang::overloadable``","``clang::overloadable``","","","","Yes"
+
+Clang provides support for C++ function overloading in C.  Function overloading
+in C is introduced using the ``overloadable`` attribute.  For example, one
+might provide several overloaded versions of a ``tgsin`` function that invokes
+the appropriate standard function computing the sine of a value with ``float``,
+``double``, or ``long double`` precision:
+
+.. code-block:: c
+
+  #include <math.h>
+  float __attribute__((overloadable)) tgsin(float x) { return sinf(x); }
+  double __attribute__((overloadable)) tgsin(double x) { return sin(x); }
+  long double __attribute__((overloadable)) tgsin(long double x) { return sinl(x); }
+
+Given these declarations, one can call ``tgsin`` with a ``float`` value to
+receive a ``float`` result, with a ``double`` to receive a ``double`` result,
+etc.  Function overloading in C follows the rules of C++ function overloading
+to pick the best overload given the call arguments, with a few C-specific
+semantics:
+
+* Conversion from ``float`` or ``double`` to ``long double`` is ranked as a
+  floating-point promotion (per C99) rather than as a floating-point conversion
+  (as in C++).
+
+* A conversion from a pointer of type ``T*`` to a pointer of type ``U*`` is
+  considered a pointer conversion (with conversion rank) if ``T`` and ``U`` are
+  compatible types.
+
+* A conversion from type ``T`` to a value of type ``U`` is permitted if ``T``
+  and ``U`` are compatible types.  This conversion is given "conversion" rank.
+
+* If no viable candidates are otherwise available, we allow a conversion from a
+  pointer of type ``T*`` to a pointer of type ``U*``, where ``T`` and ``U`` are
+  incompatible. This conversion is ranked below all other types of conversions.
+  Please note: ``U`` lacking qualifiers that are present on ``T`` is sufficient
+  for ``T`` and ``U`` to be incompatible.
+
+The declaration of ``overloadable`` functions is restricted to function
+declarations and definitions.  If a function is marked with the ``overloadable``
+attribute, then all declarations and definitions of functions with that name,
+except for at most one (see the note below about unmarked overloads), must have
+the ``overloadable`` attribute.  In addition, redeclarations of a function with
+the ``overloadable`` attribute must have the ``overloadable`` attribute, and
+redeclarations of a function without the ``overloadable`` attribute must *not*
+have the ``overloadable`` attribute. e.g.,
+
+.. code-block:: c
+
+  int f(int) __attribute__((overloadable));
+  float f(float); // error: declaration of "f" must have the "overloadable" attribute
+  int f(int); // error: redeclaration of "f" must have the "overloadable" attribute
+
+  int g(int) __attribute__((overloadable));
+  int g(int) { } // error: redeclaration of "g" must also have the "overloadable" attribute
+
+  int h(int);
+  int h(int) __attribute__((overloadable)); // error: declaration of "h" must not
+                                            // have the "overloadable" attribute
+
+Functions marked ``overloadable`` must have prototypes.  Therefore, the
+following code is ill-formed:
+
+.. code-block:: c
+
+  int h() __attribute__((overloadable)); // error: h does not have a prototype
+
+However, ``overloadable`` functions are allowed to use a ellipsis even if there
+are no named parameters (as is permitted in C++).  This feature is particularly
+useful when combined with the ``unavailable`` attribute:
+
+.. code-block:: c++
+
+  void honeypot(...) __attribute__((overloadable, unavailable)); // calling me is an error
+
+Functions declared with the ``overloadable`` attribute have their names mangled
+according to the same rules as C++ function names.  For example, the three
+``tgsin`` functions in our motivating example get the mangled names
+``_Z5tgsinf``, ``_Z5tgsind``, and ``_Z5tgsine``, respectively.  There are two
+caveats to this use of name mangling:
+
+* Future versions of Clang may change the name mangling of functions overloaded
+  in C, so you should not depend on an specific mangling.  To be completely
+  safe, we strongly urge the use of ``static inline`` with ``overloadable``
+  functions.
+
+* The ``overloadable`` attribute has almost no meaning when used in C++,
+  because names will already be mangled and functions are already overloadable.
+  However, when an ``overloadable`` function occurs within an ``extern "C"``
+  linkage specification, it's name *will* be mangled in the same way as it
+  would in C.
+
+For the purpose of backwards compatibility, at most one function with the same
+name as other ``overloadable`` functions may omit the ``overloadable``
+attribute. In this case, the function without the ``overloadable`` attribute
+will not have its name mangled.
+
+For example:
+
+.. code-block:: c
+
+  // Notes with mangled names assume Itanium mangling.
+  int f(int);
+  int f(double) __attribute__((overloadable));
+  void foo() {
+    f(5); // Emits a call to f (not _Z1fi, as it would with an overload that
+          // was marked with overloadable).
+    f(1.0); // Emits a call to _Z1fd.
+  }
+
+Support for unmarked overloads is not present in some versions of clang. You may
+query for it using ``__has_extension(overloadable_unmarked)``.
+
+Query for this attribute with ``__has_attribute(overloadable)``.
+
+
+reinitializes
+-------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``reinitializes``","``clang::reinitializes``","","","","",""
+
+The ``reinitializes`` attribute can be applied to a non-static, non-const C++
+member function to indicate that this member function reinitializes the entire
+object to a known state, independent of the previous state of the object.
+
+This attribute can be interpreted by static analyzers that warn about uses of an
+object that has been left in an indeterminate state by a move operation. If a
+member function marked with the ``reinitializes`` attribute is called on a
+moved-from object, the analyzer can conclude that the object is no longer in an
+indeterminate state.
+
+A typical example where this attribute would be used is on functions that clear
+a container class:
+
+.. code-block:: c++
+
+  template <class T>
+  class Container {
+  public:
+    ...
+    [[clang::reinitializes]] void Clear();
+    ...
+  };
+
+
+release_capability, release_shared_capability
+---------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``release_capability`` |br| ``release_shared_capability`` |br| ``release_generic_capability`` |br| ``unlock_function``","``clang::release_capability`` |br| ``clang::release_shared_capability`` |br| ``clang::release_generic_capability`` |br| ``clang::unlock_function``","","","","",""
+
+Marks a function as releasing a capability.
+
+
+short_call, near
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``short_call`` |br| ``near``","``gnu::short_call`` |br| ``gnu::near``","","","","","Yes"
+
+Clang supports the ``__attribute__((long_call))``, ``__attribute__((far))``,
+``__attribute__((short__call))``, and ``__attribute__((near))`` attributes
+on MIPS targets. These attributes may only be added to function declarations
+and change the code generated by the compiler when directly calling
+the function. The ``short_call`` and ``near`` attributes are synonyms and
+allow calls to the function to be made using the ``jal`` instruction, which
+requires the function to be located in the same naturally aligned 256MB segment
+as the caller.  The ``long_call`` and ``far`` attributes are synonyms and
+require the use of a different call sequence that works regardless
+of the distance between the functions.
+
+These attributes have no effect for position-independent code.
+
+These attributes take priority over command line switches such
+as ``-mlong-calls`` and ``-mno-long-calls``.
+
+
+signal
+------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``signal``","``gnu::signal``","","","","","Yes"
+
+Clang supports the GNU style ``__attribute__((signal))`` attribute on
+AVR targets. This attribute may be attached to a function definition and instructs
+the backend to generate appropriate function entry/exit code so that it can be used
+directly as an interrupt service routine.
+
+Interrupt handler functions defined with the signal attribute do not re-enable interrupts.
+
+
+speculative_load_hardening
+--------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``speculative_load_hardening``","``clang::speculative_load_hardening``","``clang::speculative_load_hardening``","","","","Yes"
+
+This attribute can be applied to a function declaration in order to indicate
+  that `Speculative Load Hardening <https://llvm.org/docs/SpeculativeLoadHardening.html>`_
+  should be enabled for the function body. This can also be applied to a method
+  in Objective C.
+
+  Speculative Load Hardening is a best-effort mitigation against
+  information leak attacks that make use of control flow
+  miss-speculation - specifically miss-speculation of whether a branch
+  is taken or not. Typically vulnerabilities enabling such attacks are
+  classified as "Spectre variant #1". Notably, this does not attempt to
+  mitigate against miss-speculation of branch target, classified as
+  "Spectre variant #2" vulnerabilities.
+
+  When inlining, the attribute is sticky. Inlining a function that
+  carries this attribute will cause the caller to gain the
+  attribute. This is intended to provide a maximally conservative model
+  where the code in a function annotated with this attribute will always
+  (even after inlining) end up hardened.
+
+
+target
+------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``target``","``gnu::target``","","","","","Yes"
+
+Clang supports the GNU style ``__attribute__((target("OPTIONS")))`` attribute.
+This attribute may be attached to a function definition and instructs
+the backend to use different code generation options than were passed on the
+command line.
+
+The current set of options correspond to the existing "subtarget features" for
+the target with or without a "-mno-" in front corresponding to the absence
+of the feature, as well as ``arch="CPU"`` which will change the default "CPU"
+for the function.
+
+Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2",
+"avx", "xop" and largely correspond to the machine specific options handled by
+the front end.
+
+Additionally, this attribute supports function multiversioning for ELF based
+x86/x86-64 targets, which can be used to create multiple implementations of the
+same function that will be resolved at runtime based on the priority of their
+``target`` attribute strings. A function is considered a multiversioned function
+if either two declarations of the function have different ``target`` attribute
+strings, or if it has a ``target`` attribute string of ``default``.  For
+example:
+
+  .. code-block:: c++
+
+    __attribute__((target("arch=atom")))
+    void foo() {} // will be called on 'atom' processors.
+    __attribute__((target("default")))
+    void foo() {} // will be called on any other processors.
+
+All multiversioned functions must contain a ``default`` (fallback)
+implementation, otherwise usages of the function are considered invalid.
+Additionally, a function may not become multiversioned after its first use.
+
+
+try_acquire_capability, try_acquire_shared_capability
+-----------------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``try_acquire_capability`` |br| ``try_acquire_shared_capability``","``clang::try_acquire_capability`` |br| ``clang::try_acquire_shared_capability``","","","","",""
+
+Marks a function that attempts to acquire a capability. This function may fail to
+actually acquire the capability; they accept a Boolean value determining
+whether acquiring the capability means success (true), or failing to acquire
+the capability means success (false).
+
+
+xray_always_instrument, xray_never_instrument, xray_log_args
+------------------------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``xray_always_instrument`` |br| ``xray_never_instrument``","``clang::xray_always_instrument`` |br| ``clang::xray_never_instrument``","``clang::xray_always_instrument`` |br| ``clang::xray_never_instrument``","","","","Yes"
+
+``__attribute__((xray_always_instrument))`` or ``[[clang::xray_always_instrument]]`` is used to mark member functions (in C++), methods (in Objective C), and free functions (in C, C++, and Objective C) to be instrumented with XRay. This will cause the function to always have space at the beginning and exit points to allow for runtime patching.
+
+Conversely, ``__attribute__((xray_never_instrument))`` or ``[[clang::xray_never_instrument]]`` will inhibit the insertion of these instrumentation points.
+
+If a function has neither of these attributes, they become subject to the XRay heuristics used to determine whether a function should be instrumented or otherwise.
+
+``__attribute__((xray_log_args(N)))`` or ``[[clang::xray_log_args(N)]]`` is used to preserve N function arguments for the logging function.  Currently, only N==1 is supported.
+
+
+xray_always_instrument, xray_never_instrument, xray_log_args
+------------------------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``xray_log_args``","``clang::xray_log_args``","``clang::xray_log_args``","","","","Yes"
+
+``__attribute__((xray_always_instrument))`` or ``[[clang::xray_always_instrument]]`` is used to mark member functions (in C++), methods (in Objective C), and free functions (in C, C++, and Objective C) to be instrumented with XRay. This will cause the function to always have space at the beginning and exit points to allow for runtime patching.
+
+Conversely, ``__attribute__((xray_never_instrument))`` or ``[[clang::xray_never_instrument]]`` will inhibit the insertion of these instrumentation points.
+
+If a function has neither of these attributes, they become subject to the XRay heuristics used to determine whether a function should be instrumented or otherwise.
+
+``__attribute__((xray_log_args(N)))`` or ``[[clang::xray_log_args(N)]]`` is used to preserve N function arguments for the logging function.  Currently, only N==1 is supported.
+
+
+Variable Attributes
+===================
+
+
+always_destroy
+--------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``always_destroy``","``clang::always_destroy``","","","","","Yes"
+
+The ``always_destroy`` attribute specifies that a variable with static or thread
+storage duration should have its exit-time destructor run. This attribute is the
+default unless clang was invoked with -fno-c++-static-destructors.
+
+
+dllexport
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``dllexport``","``gnu::dllexport``","","``dllexport``","","","Yes"
+
+The ``__declspec(dllexport)`` attribute declares a variable, function, or
+Objective-C interface to be exported from the module.  It is available under the
+``-fdeclspec`` flag for compatibility with various compilers.  The primary use
+is for COFF object files which explicitly specify what interfaces are available
+for external use.  See the dllexport_ documentation on MSDN for more
+information.
+
+.. _dllexport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx
+
+
+dllimport
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``dllimport``","``gnu::dllimport``","","``dllimport``","","","Yes"
+
+The ``__declspec(dllimport)`` attribute declares a variable, function, or
+Objective-C interface to be imported from an external module.  It is available
+under the ``-fdeclspec`` flag for compatibility with various compilers.  The
+primary use is for COFF object files which explicitly specify what interfaces
+are imported from external modules.  See the dllimport_ documentation on MSDN
+for more information.
+
+.. _dllimport: https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx
+
+
+init_seg
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","","``init_seg``",""
+
+The attribute applied by ``pragma init_seg()`` controls the section into
+which global initialization function pointers are emitted.  It is only
+available with ``-fms-extensions``.  Typically, this function pointer is
+emitted into ``.CRT$XCU`` on Windows.  The user can change the order of
+initialization by using a different section name with the same
+``.CRT$XC`` prefix and a suffix that sorts lexicographically before or
+after the standard ``.CRT$XCU`` sections.  See the init_seg_
+documentation on MSDN for more information.
+
+.. _init_seg: http://msdn.microsoft.com/en-us/library/7977wcck(v=vs.110).aspx
+
+
+maybe_unused, unused
+--------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``unused``","``maybe_unused`` |br| ``gnu::unused``","``maybe_unused``","","","",""
+
+When passing the ``-Wunused`` flag to Clang, entities that are unused by the
+program may be diagnosed. The ``[[maybe_unused]]`` (or
+``__attribute__((unused))``) attribute can be used to silence such diagnostics
+when the entity cannot be removed. For instance, a local variable may exist
+solely for use in an ``assert()`` statement, which makes the local variable
+unused when ``NDEBUG`` is defined.
+
+The attribute may be applied to the declaration of a class, a typedef, a
+variable, a function or method, a function parameter, an enumeration, an
+enumerator, a non-static data member, or a label.
+
+.. code-block: c++
+  #include <cassert>
+
+  [[maybe_unused]] void f([[maybe_unused]] bool thing1,
+                          [[maybe_unused]] bool thing2) {
+    [[maybe_unused]] bool b = thing1 && thing2;
+    assert(b);
+  }
+
+
+no_destroy
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``no_destroy``","``clang::no_destroy``","","","","","Yes"
+
+The ``no_destroy`` attribute specifies that a variable with static or thread
+storage duration shouldn't have its exit-time destructor run. Annotating every
+static and thread duration variable with this attribute is equivalent to
+invoking clang with -fno-c++-static-destructors.
+
+
+nodebug
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``nodebug``","``gnu::nodebug``","","","","","Yes"
+
+The ``nodebug`` attribute allows you to suppress debugging information for a
+function or method, or for a variable that is not a parameter or a non-static
+data member.
+
+
+noescape
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``noescape``","``clang::noescape``","``clang::noescape``","","","","Yes"
+
+``noescape`` placed on a function parameter of a pointer type is used to inform
+the compiler that the pointer cannot escape: that is, no reference to the object
+the pointer points to that is derived from the parameter value will survive
+after the function returns. Users are responsible for making sure parameters
+annotated with ``noescape`` do not actuallly escape.
+
+For example:
+
+.. code-block:: c
+
+  int *gp;
+
+  void nonescapingFunc(__attribute__((noescape)) int *p) {
+    *p += 100; // OK.
+  }
+
+  void escapingFunc(__attribute__((noescape)) int *p) {
+    gp = p; // Not OK.
+  }
+
+Additionally, when the parameter is a `block pointer
+<https://clang.llvm.org/docs/BlockLanguageSpec.html>`, the same restriction
+applies to copies of the block. For example:
+
+.. code-block:: c
+
+  typedef void (^BlockTy)();
+  BlockTy g0, g1;
+
+  void nonescapingFunc(__attribute__((noescape)) BlockTy block) {
+    block(); // OK.
+  }
+
+  void escapingFunc(__attribute__((noescape)) BlockTy block) {
+    g0 = block; // Not OK.
+    g1 = Block_copy(block); // Not OK either.
+  }
+
+
+nosvm
+-----
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``nosvm``","","","","","","Yes"
+
+OpenCL 2.0 supports the optional ``__attribute__((nosvm))`` qualifier for
+pointer variable. It informs the compiler that the pointer does not refer
+to a shared virtual memory region. See OpenCL v2.0 s6.7.2 for details.
+
+Since it is not widely used and has been removed from OpenCL 2.1, it is ignored
+by Clang.
+
+
+objc_externally_retained
+------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``objc_externally_retained``","``clang::objc_externally_retained``","``clang::objc_externally_retained``","","","","Yes"
+
+The ``objc_externally_retained`` attribute can be applied to strong local
+variables, functions, methods, or blocks to opt into
+`externally-retained semantics
+<https://clang.llvm.org/docs/AutomaticReferenceCounting.html#externally-retained-variables>`_.
+
+When applied to the definition of a function, method, or block, every parameter
+of the function with implicit strong retainable object pointer type is
+considered externally-retained, and becomes ``const``. By explicitly annotating
+a parameter with ``__strong``, you can opt back into the default
+non-externally-retained behaviour for that parameter. For instance,
+``first_param`` is externally-retained below, but not ``second_param``:
+
+.. code-block:: objc
+
+  __attribute__((objc_externally_retained))
+  void f(NSArray *first_param, __strong NSArray *second_param) {
+    // ...
+  }
+
+Likewise, when applied to a strong local variable, that variable becomes
+``const`` and is considered externally-retained.
+
+When compiled without ``-fobjc-arc``, this attribute is ignored.
+
+
+pass_object_size
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``pass_object_size``","``clang::pass_object_size``","``clang::pass_object_size``","","","","Yes"
+
+.. Note:: The mangling of functions with parameters that are annotated with
+  ``pass_object_size`` is subject to change. You can get around this by
+  using ``__asm__("foo")`` to explicitly name your functions, thus preserving
+  your ABI; also, non-overloadable C functions with ``pass_object_size`` are
+  not mangled.
+
+The ``pass_object_size(Type)`` attribute can be placed on function parameters to
+instruct clang to call ``__builtin_object_size(param, Type)`` at each callsite
+of said function, and implicitly pass the result of this call in as an invisible
+argument of type ``size_t`` directly after the parameter annotated with
+``pass_object_size``. Clang will also replace any calls to
+``__builtin_object_size(param, Type)`` in the function by said implicit
+parameter.
+
+Example usage:
+
+.. code-block:: c
+
+  int bzero1(char *const p __attribute__((pass_object_size(0))))
+      __attribute__((noinline)) {
+    int i = 0;
+    for (/**/; i < (int)__builtin_object_size(p, 0); ++i) {
+      p[i] = 0;
+    }
+    return i;
+  }
+
+  int main() {
+    char chars[100];
+    int n = bzero1(&chars[0]);
+    assert(n == sizeof(chars));
+    return 0;
+  }
+
+If successfully evaluating ``__builtin_object_size(param, Type)`` at the
+callsite is not possible, then the "failed" value is passed in. So, using the
+definition of ``bzero1`` from above, the following code would exit cleanly:
+
+.. code-block:: c
+
+  int main2(int argc, char *argv[]) {
+    int n = bzero1(argv);
+    assert(n == -1);
+    return 0;
+  }
+
+``pass_object_size`` plays a part in overload resolution. If two overload
+candidates are otherwise equally good, then the overload with one or more
+parameters with ``pass_object_size`` is preferred. This implies that the choice
+between two identical overloads both with ``pass_object_size`` on one or more
+parameters will always be ambiguous; for this reason, having two such overloads
+is illegal. For example:
+
+.. code-block:: c++
+
+  #define PS(N) __attribute__((pass_object_size(N)))
+  // OK
+  void Foo(char *a, char *b); // Overload A
+  // OK -- overload A has no parameters with pass_object_size.
+  void Foo(char *a PS(0), char *b PS(0)); // Overload B
+  // Error -- Same signature (sans pass_object_size) as overload B, and both
+  // overloads have one or more parameters with the pass_object_size attribute.
+  void Foo(void *a PS(0), void *b);
+
+  // OK
+  void Bar(void *a PS(0)); // Overload C
+  // OK
+  void Bar(char *c PS(1)); // Overload D
+
+  void main() {
+    char known[10], *unknown;
+    Foo(unknown, unknown); // Calls overload B
+    Foo(known, unknown); // Calls overload B
+    Foo(unknown, known); // Calls overload B
+    Foo(known, known); // Calls overload B
+
+    Bar(known); // Calls overload D
+    Bar(unknown); // Calls overload D
+  }
+
+Currently, ``pass_object_size`` is a bit restricted in terms of its usage:
+
+* Only one use of ``pass_object_size`` is allowed per parameter.
+
+* It is an error to take the address of a function with ``pass_object_size`` on
+  any of its parameters. If you wish to do this, you can create an overload
+  without ``pass_object_size`` on any parameters.
+
+* It is an error to apply the ``pass_object_size`` attribute to parameters that
+  are not pointers. Additionally, any parameter that ``pass_object_size`` is
+  applied to must be marked ``const`` at its function's definition.
+
+
+require_constant_initialization
+-------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``require_constant_initialization``","``clang::require_constant_initialization``","","","","","Yes"
+
+This attribute specifies that the variable to which it is attached is intended
+to have a `constant initializer <http://en.cppreference.com/w/cpp/language/constant_initialization>`_
+according to the rules of [basic.start.static]. The variable is required to
+have static or thread storage duration. If the initialization of the variable
+is not a constant initializer an error will be produced. This attribute may
+only be used in C++.
+
+Note that in C++03 strict constant expression checking is not done. Instead
+the attribute reports if Clang can emit the variable as a constant, even if it's
+not technically a 'constant initializer'. This behavior is non-portable.
+
+Static storage duration variables with constant initializers avoid hard-to-find
+bugs caused by the indeterminate order of dynamic initialization. They can also
+be safely used during dynamic initialization across translation units.
+
+This attribute acts as a compile time assertion that the requirements
+for constant initialization have been met. Since these requirements change
+between dialects and have subtle pitfalls it's important to fail fast instead
+of silently falling back on dynamic initialization.
+
+.. code-block:: c++
+
+  // -std=c++14
+  #define SAFE_STATIC [[clang::require_constant_initialization]]
+  struct T {
+    constexpr T(int) {}
+    ~T(); // non-trivial
+  };
+  SAFE_STATIC T x = {42}; // Initialization OK. Doesn't check destructor.
+  SAFE_STATIC T y = 42; // error: variable does not have a constant initializer
+  // copy initialization is not a constant expression on a non-literal type.
+
+
+section, __declspec(allocate)
+-----------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``section``","``gnu::section``","","``allocate``","","","Yes"
+
+The ``section`` attribute allows you to specify a specific section a
+global variable or function should be in after translation.
+
+
+swift_context
+-------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``swift_context``","``clang::swift_context``","``clang::swift_context``","","","","Yes"
+
+The ``swift_context`` attribute marks a parameter of a ``swiftcall``
+function as having the special context-parameter ABI treatment.
+
+This treatment generally passes the context value in a special register
+which is normally callee-preserved.
+
+A ``swift_context`` parameter must either be the last parameter or must be
+followed by a ``swift_error_result`` parameter (which itself must always be
+the last parameter).
+
+A context parameter must have pointer or reference type.
+
+
+swift_error_result
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``swift_error_result``","``clang::swift_error_result``","``clang::swift_error_result``","","","","Yes"
+
+The ``swift_error_result`` attribute marks a parameter of a ``swiftcall``
+function as having the special error-result ABI treatment.
+
+This treatment generally passes the underlying error value in and out of
+the function through a special register which is normally callee-preserved.
+This is modeled in C by pretending that the register is addressable memory:
+
+- The caller appears to pass the address of a variable of pointer type.
+  The current value of this variable is copied into the register before
+  the call; if the call returns normally, the value is copied back into the
+  variable.
+
+- The callee appears to receive the address of a variable.  This address
+  is actually a hidden location in its own stack, initialized with the
+  value of the register upon entry.  When the function returns normally,
+  the value in that hidden location is written back to the register.
+
+A ``swift_error_result`` parameter must be the last parameter, and it must be
+preceded by a ``swift_context`` parameter.
+
+A ``swift_error_result`` parameter must have type ``T**`` or ``T*&`` for some
+type T.  Note that no qualifiers are permitted on the intermediate level.
+
+It is undefined behavior if the caller does not pass a pointer or
+reference to a valid object.
+
+The standard convention is that the error value itself (that is, the
+value stored in the apparent argument) will be null upon function entry,
+but this is not enforced by the ABI.
+
+
+swift_indirect_result
+---------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``swift_indirect_result``","``clang::swift_indirect_result``","``clang::swift_indirect_result``","","","","Yes"
+
+The ``swift_indirect_result`` attribute marks a parameter of a ``swiftcall``
+function as having the special indirect-result ABI treatment.
+
+This treatment gives the parameter the target's normal indirect-result
+ABI treatment, which may involve passing it differently from an ordinary
+parameter.  However, only the first indirect result will receive this
+treatment.  Furthermore, low-level lowering may decide that a direct result
+must be returned indirectly; if so, this will take priority over the
+``swift_indirect_result`` parameters.
+
+A ``swift_indirect_result`` parameter must either be the first parameter or
+follow another ``swift_indirect_result`` parameter.
+
+A ``swift_indirect_result`` parameter must have type ``T*`` or ``T&`` for
+some object type ``T``.  If ``T`` is a complete type at the point of
+definition of a function, it is undefined behavior if the argument
+value does not point to storage of adequate size and alignment for a
+value of type ``T``.
+
+Making indirect results explicit in the signature allows C functions to
+directly construct objects into them without relying on language
+optimizations like C++'s named return value optimization (NRVO).
+
+
+swiftcall
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``swiftcall``","``clang::swiftcall``","``clang::swiftcall``","","","",""
+
+The ``swiftcall`` attribute indicates that a function should be called
+using the Swift calling convention for a function or function pointer.
+
+The lowering for the Swift calling convention, as described by the Swift
+ABI documentation, occurs in multiple phases.  The first, "high-level"
+phase breaks down the formal parameters and results into innately direct
+and indirect components, adds implicit paraameters for the generic
+signature, and assigns the context and error ABI treatments to parameters
+where applicable.  The second phase breaks down the direct parameters
+and results from the first phase and assigns them to registers or the
+stack.  The ``swiftcall`` convention only handles this second phase of
+lowering; the C function type must accurately reflect the results
+of the first phase, as follows:
+
+- Results classified as indirect by high-level lowering should be
+  represented as parameters with the ``swift_indirect_result`` attribute.
+
+- Results classified as direct by high-level lowering should be represented
+  as follows:
+
+  - First, remove any empty direct results.
+
+  - If there are no direct results, the C result type should be ``void``.
+
+  - If there is one direct result, the C result type should be a type with
+    the exact layout of that result type.
+
+  - If there are a multiple direct results, the C result type should be
+    a struct type with the exact layout of a tuple of those results.
+
+- Parameters classified as indirect by high-level lowering should be
+  represented as parameters of pointer type.
+
+- Parameters classified as direct by high-level lowering should be
+  omitted if they are empty types; otherwise, they should be represented
+  as a parameter type with a layout exactly matching the layout of the
+  Swift parameter type.
+
+- The context parameter, if present, should be represented as a trailing
+  parameter with the ``swift_context`` attribute.
+
+- The error result parameter, if present, should be represented as a
+  trailing parameter (always following a context parameter) with the
+  ``swift_error_result`` attribute.
+
+``swiftcall`` does not support variadic arguments or unprototyped functions.
+
+The parameter ABI treatment attributes are aspects of the function type.
+A function type which which applies an ABI treatment attribute to a
+parameter is a different type from an otherwise-identical function type
+that does not.  A single parameter may not have multiple ABI treatment
+attributes.
+
+Support for this feature is target-dependent, although it should be
+supported on every target that Swift supports.  Query for this support
+with ``__has_attribute(swiftcall)``.  This implies support for the
+``swift_context``, ``swift_error_result``, and ``swift_indirect_result``
+attributes.
+
+
+thread
+------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","``thread``","","",""
+
+The ``__declspec(thread)`` attribute declares a variable with thread local
+storage.  It is available under the ``-fms-extensions`` flag for MSVC
+compatibility.  See the documentation for `__declspec(thread)`_ on MSDN.
+
+.. _`__declspec(thread)`: http://msdn.microsoft.com/en-us/library/9w1sdazb.aspx
+
+In Clang, ``__declspec(thread)`` is generally equivalent in functionality to the
+GNU ``__thread`` keyword.  The variable must not have a destructor and must have
+a constant initializer, if any.  The attribute only applies to variables
+declared with static storage duration, such as globals, class static data
+members, and static locals.
+
+
+tls_model
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``tls_model``","``gnu::tls_model``","","","","","Yes"
+
+The ``tls_model`` attribute allows you to specify which thread-local storage
+model to use. It accepts the following strings:
+
+* global-dynamic
+* local-dynamic
+* initial-exec
+* local-exec
+
+TLS models are mutually exclusive.
+
+
+trivial_abi
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``trivial_abi``","``clang::trivial_abi``","","","","","Yes"
+
+The ``trivial_abi`` attribute can be applied to a C++ class, struct, or union.
+It instructs the compiler to pass and return the type using the C ABI for the
+underlying type when the type would otherwise be considered non-trivial for the
+purpose of calls.
+A class annotated with `trivial_abi` can have non-trivial destructors or copy/move constructors without automatically becoming non-trivial for the purposes of calls. For example:
+
+  .. code-block:: c++
+
+    // A is trivial for the purposes of calls because `trivial_abi` makes the
+    // user-provided special functions trivial.
+    struct __attribute__((trivial_abi)) A {
+      ~A();
+      A(const A &);
+      A(A &&);
+      int x;
+    };
+
+    // B's destructor and copy/move constructor are considered trivial for the
+    // purpose of calls because A is trivial.
+    struct B {
+      A a;
+    };
+
+If a type is trivial for the purposes of calls, has a non-trivial destructor,
+and is passed as an argument by value, the convention is that the callee will
+destroy the object before returning.
+
+Attribute ``trivial_abi`` has no effect in the following cases:
+
+- The class directly declares a virtual base or virtual methods.
+- The class has a base class that is non-trivial for the purposes of calls.
+- The class has a non-static data member whose type is non-trivial for the purposes of calls, which includes:
+
+  - classes that are non-trivial for the purposes of calls
+  - __weak-qualified types in Objective-C++
+  - arrays of any of the above
+
+
+uninitialized
+-------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``uninitialized``","``clang::uninitialized``","","","","",""
+
+The command-line parameter ``-ftrivial-auto-var-init=*`` can be used to
+initialize trivial automatic stack variables. By default, trivial automatic
+stack variables are uninitialized. This attribute is used to override the
+command-line parameter, forcing variables to remain uninitialized. It has no
+semantic meaning in that using uninitialized values is undefined behavior,
+it rather documents the programmer's intent.
+
+
+Type Attributes
+===============
+
+
+__single_inhertiance, __multiple_inheritance, __virtual_inheritance
+-------------------------------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``__single_inheritance`` |br| ``__multiple_inheritance`` |br| ``__virtual_inheritance`` |br| ``__unspecified_inheritance``","",""
+
+This collection of keywords is enabled under ``-fms-extensions`` and controls
+the pointer-to-member representation used on ``*-*-win32`` targets.
+
+The ``*-*-win32`` targets utilize a pointer-to-member representation which
+varies in size and alignment depending on the definition of the underlying
+class.
+
+However, this is problematic when a forward declaration is only available and
+no definition has been made yet.  In such cases, Clang is forced to utilize the
+most general representation that is available to it.
+
+These keywords make it possible to use a pointer-to-member representation other
+than the most general one regardless of whether or not the definition will ever
+be present in the current translation unit.
+
+This family of keywords belong between the ``class-key`` and ``class-name``:
+
+.. code-block:: c++
+
+  struct __single_inheritance S;
+  int S::*i;
+  struct S {};
+
+This keyword can be applied to class templates but only has an effect when used
+on full specializations:
+
+.. code-block:: c++
+
+  template <typename T, typename U> struct __single_inheritance A; // warning: inheritance model ignored on primary template
+  template <typename T> struct __multiple_inheritance A<T, T>; // warning: inheritance model ignored on partial specialization
+  template <> struct __single_inheritance A<int, float>;
+
+Note that choosing an inheritance model less general than strictly necessary is
+an error:
+
+.. code-block:: c++
+
+  struct __multiple_inheritance S; // error: inheritance model does not match definition
+  int S::*i;
+  struct S {};
+
+
+align_value
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``align_value``","","","","","","Yes"
+
+The align_value attribute can be added to the typedef of a pointer type or the
+declaration of a variable of pointer or reference type. It specifies that the
+pointer will point to, or the reference will bind to, only objects with at
+least the provided alignment. This alignment value must be some positive power
+of 2.
+
+   .. code-block:: c
+
+     typedef double * aligned_double_ptr __attribute__((align_value(64)));
+     void foo(double & x  __attribute__((align_value(128)),
+              aligned_double_ptr y) { ... }
+
+If the pointer value does not have the specified alignment at runtime, the
+behavior of the program is undefined.
+
+
+empty_bases
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","``empty_bases``","","",""
+
+The empty_bases attribute permits the compiler to utilize the
+empty-base-optimization more frequently.
+This attribute only applies to struct, class, and union types.
+It is only supported when using the Microsoft C++ ABI.
+
+
+enum_extensibility
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``enum_extensibility``","``clang::enum_extensibility``","``clang::enum_extensibility``","","","","Yes"
+
+Attribute ``enum_extensibility`` is used to distinguish between enum definitions
+that are extensible and those that are not. The attribute can take either
+``closed`` or ``open`` as an argument. ``closed`` indicates a variable of the
+enum type takes a value that corresponds to one of the enumerators listed in the
+enum definition or, when the enum is annotated with ``flag_enum``, a value that
+can be constructed using values corresponding to the enumerators. ``open``
+indicates a variable of the enum type can take any values allowed by the
+standard and instructs clang to be more lenient when issuing warnings.
+
+.. code-block:: c
+
+  enum __attribute__((enum_extensibility(closed))) ClosedEnum {
+    A0, A1
+  };
+
+  enum __attribute__((enum_extensibility(open))) OpenEnum {
+    B0, B1
+  };
+
+  enum __attribute__((enum_extensibility(closed),flag_enum)) ClosedFlagEnum {
+    C0 = 1 << 0, C1 = 1 << 1
+  };
+
+  enum __attribute__((enum_extensibility(open),flag_enum)) OpenFlagEnum {
+    D0 = 1 << 0, D1 = 1 << 1
+  };
+
+  void foo1() {
+    enum ClosedEnum ce;
+    enum OpenEnum oe;
+    enum ClosedFlagEnum cfe;
+    enum OpenFlagEnum ofe;
+
+    ce = A1;           // no warnings
+    ce = 100;          // warning issued
+    oe = B1;           // no warnings
+    oe = 100;          // no warnings
+    cfe = C0 | C1;     // no warnings
+    cfe = C0 | C1 | 4; // warning issued
+    ofe = D0 | D1;     // no warnings
+    ofe = D0 | D1 | 4; // no warnings
+  }
+
+
+flag_enum
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``flag_enum``","``clang::flag_enum``","``clang::flag_enum``","","","","Yes"
+
+This attribute can be added to an enumerator to signal to the compiler that it
+is intended to be used as a flag type. This will cause the compiler to assume
+that the range of the type includes all of the values that you can get by
+manipulating bits of the enumerator when issuing warnings.
+
+
+layout_version
+--------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","``layout_version``","","",""
+
+The layout_version attribute requests that the compiler utilize the class
+layout rules of a particular compiler version.
+This attribute only applies to struct, class, and union types.
+It is only supported when using the Microsoft C++ ABI.
+
+
+lto_visibility_public
+---------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``lto_visibility_public``","``clang::lto_visibility_public``","``clang::lto_visibility_public``","","","","Yes"
+
+See :doc:`LTOVisibility`.
+
+
+noderef
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``noderef``","``clang::noderef``","``clang::noderef``","","","",""
+
+The ``noderef`` attribute causes clang to diagnose dereferences of annotated pointer types.
+This is ideally used with pointers that point to special memory which cannot be read
+from or written to, but allowing for the pointer to be used in pointer arithmetic.
+The following are examples of valid expressions where dereferences are diagnosed:
+
+.. code-block:: c
+
+  int __attribute__((noderef)) *p;
+  int x = *p;  // warning
+
+  int __attribute__((noderef)) **p2;
+  x = **p2;  // warning
+
+  int * __attribute__((noderef)) *p3;
+  p = *p3;  // warning
+
+  struct S {
+    int a;
+  };
+  struct S __attribute__((noderef)) *s;
+  x = s->a;    // warning
+  x = (*s).a;  // warning
+
+Not all dereferences may diagnose a warning if the value directed by the pointer may not be
+accessed. The following are examples of valid expressions where may not be diagnosed:
+
+.. code-block:: c
+
+  int *q;
+  int __attribute__((noderef)) *p;
+  q = &*p;
+  q = *&p;
+
+  struct S {
+    int a;
+  };
+  struct S __attribute__((noderef)) *s;
+  p = &s->a;
+  p = &(*s).a;
+
+``noderef`` is currently only supported for pointers and arrays and not usable for
+references or Objective-C object pointers.
+
+.. code-block: c++
+
+  int x = 2;
+  int __attribute__((noderef)) &y = x;  // warning: 'noderef' can only be used on an array or pointer type
+
+.. code-block: objc
+
+  id __attribute__((noderef)) obj = [NSObject new]; // warning: 'noderef' can only be used on an array or pointer type
+
+
+novtable
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","``novtable``","","",""
+
+This attribute can be added to a class declaration or definition to signal to
+the compiler that constructors and destructors will not reference the virtual
+function table. It is only supported when using the Microsoft C++ ABI.
+
+
+objc_subclassing_restricted
+---------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``objc_subclassing_restricted``","``clang::objc_subclassing_restricted``","``clang::objc_subclassing_restricted``","","","","Yes"
+
+This attribute can be added to an Objective-C ``@interface`` declaration to
+ensure that this class cannot be subclassed.
+
+
+selectany
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``selectany``","``gnu::selectany``","","``selectany``","","",""
+
+This attribute appertains to a global symbol, causing it to have a weak
+definition (
+`linkonce <https://llvm.org/docs/LangRef.html#linkage-types>`_
+), allowing the linker to select any definition.
+
+For more information see
+`gcc documentation <https://gcc.gnu.org/onlinedocs/gcc-7.2.0/gcc/Microsoft-Windows-Variable-Attributes.html>`_
+or `msvc documentation <https://docs.microsoft.com/pl-pl/cpp/cpp/selectany>`_.
+
+
+transparent_union
+-----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``transparent_union``","``gnu::transparent_union``","","","","",""
+
+This attribute can be applied to a union to change the behaviour of calls to
+functions that have an argument with a transparent union type. The compiler
+behaviour is changed in the following manner:
+
+- A value whose type is any member of the transparent union can be passed as an
+  argument without the need to cast that value.
+
+- The argument is passed to the function using the calling convention of the
+  first member of the transparent union. Consequently, all the members of the
+  transparent union should have the same calling convention as its first member.
+
+Transparent unions are not supported in C++.
+
+
+Statement Attributes
+====================
+
+
+#pragma clang loop
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","","``clang loop`` |br| ``unroll`` |br| ``nounroll`` |br| ``unroll_and_jam`` |br| ``nounroll_and_jam``",""
+
+The ``#pragma clang loop`` directive allows loop optimization hints to be
+specified for the subsequent loop. The directive allows pipelining to be
+disabled, or vectorization, interleaving, and unrolling to be enabled or disabled.
+Vector width, interleave count, unrolling count, and the initiation interval
+for pipelining can be explicitly specified. See `language extensions
+<http://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations>`_
+for details.
+
+
+#pragma unroll, #pragma nounroll
+--------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","","``clang loop`` |br| ``unroll`` |br| ``nounroll`` |br| ``unroll_and_jam`` |br| ``nounroll_and_jam``",""
+
+Loop unrolling optimization hints can be specified with ``#pragma unroll`` and
+``#pragma nounroll``. The pragma is placed immediately before a for, while,
+do-while, or c++11 range-based for loop.
+
+Specifying ``#pragma unroll`` without a parameter directs the loop unroller to
+attempt to fully unroll the loop if the trip count is known at compile time and
+attempt to partially unroll the loop if the trip count is not known at compile
+time:
+
+.. code-block:: c++
+
+  #pragma unroll
+  for (...) {
+    ...
+  }
+
+Specifying the optional parameter, ``#pragma unroll _value_``, directs the
+unroller to unroll the loop ``_value_`` times.  The parameter may optionally be
+enclosed in parentheses:
+
+.. code-block:: c++
+
+  #pragma unroll 16
+  for (...) {
+    ...
+  }
+
+  #pragma unroll(16)
+  for (...) {
+    ...
+  }
+
+Specifying ``#pragma nounroll`` indicates that the loop should not be unrolled:
+
+.. code-block:: c++
+
+  #pragma nounroll
+  for (...) {
+    ...
+  }
+
+``#pragma unroll`` and ``#pragma unroll _value_`` have identical semantics to
+``#pragma clang loop unroll(full)`` and
+``#pragma clang loop unroll_count(_value_)`` respectively. ``#pragma nounroll``
+is equivalent to ``#pragma clang loop unroll(disable)``.  See
+`language extensions
+<http://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations>`_
+for further details including limitations of the unroll hints.
+
+
+__read_only, __write_only, __read_write (read_only, write_only, read_write)
+---------------------------------------------------------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``__read_only`` |br| ``read_only`` |br| ``__write_only`` |br| ``write_only`` |br| ``__read_write`` |br| ``read_write``","",""
+
+The access qualifiers must be used with image object arguments or pipe arguments
+to declare if they are being read or written by a kernel or function.
+
+The read_only/__read_only, write_only/__write_only and read_write/__read_write
+names are reserved for use as access qualifiers and shall not be used otherwise.
+
+.. code-block:: c
+
+  kernel void
+  foo (read_only image2d_t imageA,
+       write_only image2d_t imageB) {
+    ...
+  }
+
+In the above example imageA is a read-only 2D image object, and imageB is a
+write-only 2D image object.
+
+The read_write (or __read_write) qualifier can not be used with pipe.
+
+More details can be found in the OpenCL C language Spec v2.0, Section 6.6.
+
+
+fallthrough
+-----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","``fallthrough`` |br| ``clang::fallthrough``","``fallthrough``","","","",""
+
+The ``fallthrough`` (or ``clang::fallthrough``) attribute is used
+to annotate intentional fall-through
+between switch labels.  It can only be applied to a null statement placed at a
+point of execution between any statement and the next switch label.  It is
+common to mark these places with a specific comment, but this attribute is
+meant to replace comments with a more strict annotation, which can be checked
+by the compiler.  This attribute doesn't change semantics of the code and can
+be used wherever an intended fall-through occurs.  It is designed to mimic
+control-flow statements like ``break;``, so it can be placed in most places
+where ``break;`` can, but only if there are no statements on the execution path
+between it and the next switch label.
+
+By default, Clang does not warn on unannotated fallthrough from one ``switch``
+case to another. Diagnostics on fallthrough without a corresponding annotation
+can be enabled with the ``-Wimplicit-fallthrough`` argument.
+
+Here is an example:
+
+.. code-block:: c++
+
+  // compile with -Wimplicit-fallthrough
+  switch (n) {
+  case 22:
+  case 33:  // no warning: no statements between case labels
+    f();
+  case 44:  // warning: unannotated fall-through
+    g();
+    [[clang::fallthrough]];
+  case 55:  // no warning
+    if (x) {
+      h();
+      break;
+    }
+    else {
+      i();
+      [[clang::fallthrough]];
+    }
+  case 66:  // no warning
+    p();
+    [[clang::fallthrough]]; // warning: fallthrough annotation does not
+                            //          directly precede case label
+    q();
+  case 77:  // warning: unannotated fall-through
+    r();
+  }
+
+
+intel_reqd_sub_group_size
+-------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``intel_reqd_sub_group_size``","","","","","","Yes"
+
+The optional attribute intel_reqd_sub_group_size can be used to indicate that
+the kernel must be compiled and executed with the specified subgroup size. When
+this attribute is present, get_max_sub_group_size() is guaranteed to return the
+specified integer value. This is important for the correctness of many subgroup
+algorithms, and in some cases may be used by the compiler to generate more optimal
+code. See `cl_intel_required_subgroup_size
+<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt>`
+for details.
+
+
+opencl_unroll_hint
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``opencl_unroll_hint``","","","","","",""
+
+The opencl_unroll_hint attribute qualifier can be used to specify that a loop
+(for, while and do loops) can be unrolled. This attribute qualifier can be
+used to specify full unrolling or partial unrolling by a specified amount.
+This is a compiler hint and the compiler may ignore this directive. See
+`OpenCL v2.0 <https://www.khronos.org/registry/cl/specs/opencl-2.0.pdf>`_
+s6.11.5 for details.
+
+
+suppress
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","``gsl::suppress``","","","","",""
+
+The ``[[gsl::suppress]]`` attribute suppresses specific
+clang-tidy diagnostics for rules of the `C++ Core Guidelines`_ in a portable
+way. The attribute can be attached to declarations, statements, and at
+namespace scope.
+
+.. code-block:: c++
+
+  [[gsl::suppress("Rh-public")]]
+  void f_() {
+    int *p;
+    [[gsl::suppress("type")]] {
+      p = reinterpret_cast<int*>(7);
+    }
+  }
+  namespace N {
+    [[clang::suppress("type", "bounds")]];
+    ...
+  }
+
+.. _`C++ Core Guidelines`: https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#inforce-enforcement
+
+
+AMD GPU Attributes
+==================
+
+
+amdgpu_flat_work_group_size
+---------------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``amdgpu_flat_work_group_size``","``clang::amdgpu_flat_work_group_size``","","","","","Yes"
+
+The flat work-group size is the number of work-items in the work-group size
+specified when the kernel is dispatched. It is the product of the sizes of the
+x, y, and z dimension of the work-group.
+
+Clang supports the
+``__attribute__((amdgpu_flat_work_group_size(<min>, <max>)))`` attribute for the
+AMDGPU target. This attribute may be attached to a kernel function definition
+and is an optimization hint.
+
+``<min>`` parameter specifies the minimum flat work-group size, and ``<max>``
+parameter specifies the maximum flat work-group size (must be greater than
+``<min>``) to which all dispatches of the kernel will conform. Passing ``0, 0``
+as ``<min>, <max>`` implies the default behavior (``128, 256``).
+
+If specified, the AMDGPU target backend might be able to produce better machine
+code for barriers and perform scratch promotion by estimating available group
+segment size.
+
+An error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes.
+
+
+amdgpu_num_sgpr
+---------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``amdgpu_num_sgpr``","``clang::amdgpu_num_sgpr``","","","","","Yes"
+
+Clang supports the ``__attribute__((amdgpu_num_sgpr(<num_sgpr>)))`` and
+``__attribute__((amdgpu_num_vgpr(<num_vgpr>)))`` attributes for the AMDGPU
+target. These attributes may be attached to a kernel function definition and are
+an optimization hint.
+
+If these attributes are specified, then the AMDGPU target backend will attempt
+to limit the number of SGPRs and/or VGPRs used to the specified value(s). The
+number of used SGPRs and/or VGPRs may further be rounded up to satisfy the
+allocation requirements or constraints of the subtarget. Passing ``0`` as
+``num_sgpr`` and/or ``num_vgpr`` implies the default behavior (no limits).
+
+These attributes can be used to test the AMDGPU target backend. It is
+recommended that the ``amdgpu_waves_per_eu`` attribute be used to control
+resources such as SGPRs and VGPRs since it is aware of the limits for different
+subtargets.
+
+An error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes;
+  - The AMDGPU target backend is unable to create machine code that can meet the
+    request.
+
+
+amdgpu_num_vgpr
+---------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``amdgpu_num_vgpr``","``clang::amdgpu_num_vgpr``","","","","","Yes"
+
+Clang supports the ``__attribute__((amdgpu_num_sgpr(<num_sgpr>)))`` and
+``__attribute__((amdgpu_num_vgpr(<num_vgpr>)))`` attributes for the AMDGPU
+target. These attributes may be attached to a kernel function definition and are
+an optimization hint.
+
+If these attributes are specified, then the AMDGPU target backend will attempt
+to limit the number of SGPRs and/or VGPRs used to the specified value(s). The
+number of used SGPRs and/or VGPRs may further be rounded up to satisfy the
+allocation requirements or constraints of the subtarget. Passing ``0`` as
+``num_sgpr`` and/or ``num_vgpr`` implies the default behavior (no limits).
+
+These attributes can be used to test the AMDGPU target backend. It is
+recommended that the ``amdgpu_waves_per_eu`` attribute be used to control
+resources such as SGPRs and VGPRs since it is aware of the limits for different
+subtargets.
+
+An error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes;
+  - The AMDGPU target backend is unable to create machine code that can meet the
+    request.
+
+
+amdgpu_waves_per_eu
+-------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``amdgpu_waves_per_eu``","``clang::amdgpu_waves_per_eu``","","","","","Yes"
+
+A compute unit (CU) is responsible for executing the wavefronts of a work-group.
+It is composed of one or more execution units (EU), which are responsible for
+executing the wavefronts. An EU can have enough resources to maintain the state
+of more than one executing wavefront. This allows an EU to hide latency by
+switching between wavefronts in a similar way to symmetric multithreading on a
+CPU. In order to allow the state for multiple wavefronts to fit on an EU, the
+resources used by a single wavefront have to be limited. For example, the number
+of SGPRs and VGPRs. Limiting such resources can allow greater latency hiding,
+but can result in having to spill some register state to memory.
+
+Clang supports the ``__attribute__((amdgpu_waves_per_eu(<min>[, <max>])))``
+attribute for the AMDGPU target. This attribute may be attached to a kernel
+function definition and is an optimization hint.
+
+``<min>`` parameter specifies the requested minimum number of waves per EU, and
+*optional* ``<max>`` parameter specifies the requested maximum number of waves
+per EU (must be greater than ``<min>`` if specified). If ``<max>`` is omitted,
+then there is no restriction on the maximum number of waves per EU other than
+the one dictated by the hardware for which the kernel is compiled. Passing
+``0, 0`` as ``<min>, <max>`` implies the default behavior (no limits).
+
+If specified, this attribute allows an advanced developer to tune the number of
+wavefronts that are capable of fitting within the resources of an EU. The AMDGPU
+target backend can use this information to limit resources, such as number of
+SGPRs, number of VGPRs, size of available group and private memory segments, in
+such a way that guarantees that at least ``<min>`` wavefronts and at most
+``<max>`` wavefronts are able to fit within the resources of an EU. Requesting
+more wavefronts can hide memory latency but limits available registers which
+can result in spilling. Requesting fewer wavefronts can help reduce cache
+thrashing, but can reduce memory latency hiding.
+
+This attribute controls the machine code generated by the AMDGPU target backend
+to ensure it is capable of meeting the requested values. However, when the
+kernel is executed, there may be other reasons that prevent meeting the request,
+for example, there may be wavefronts from other kernels executing on the EU.
+
+An error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes;
+  - The AMDGPU target backend is unable to create machine code that can meet the
+    request.
+
+
+OpenCL Address Spaces
+=====================
+The address space qualifier may be used to specify the region of memory that is
+used to allocate the object. OpenCL supports the following address spaces:
+__generic(generic), __global(global), __local(local), __private(private),
+__constant(constant).
+
+  .. code-block:: c
+
+    __constant int c = ...;
+
+    __generic int* foo(global int* g) {
+      __local int* l;
+      private int p;
+      ...
+      return l;
+    }
+
+More details can be found in the OpenCL C language Spec v2.0, Section 6.5.
+
+constant
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``__constant`` |br| ``constant``","",""
+
+The constant address space attribute signals that an object is located in
+a constant (non-modifiable) memory region. It is available to all work items.
+Any type can be annotated with the constant address space attribute. Objects
+with the constant address space qualifier can be declared in any scope and must
+have an initializer.
+
+
+generic
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``__generic`` |br| ``generic``","",""
+
+The generic address space attribute is only available with OpenCL v2.0 and later.
+It can be used with pointer types. Variables in global and local scope and
+function parameters in non-kernel functions can have the generic address space
+type attribute. It is intended to be a placeholder for any other address space
+except for '__constant' in OpenCL code which can be used with multiple address
+spaces.
+
+
+global
+------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``__global`` |br| ``global``","",""
+
+The global address space attribute specifies that an object is allocated in
+global memory, which is accessible by all work items. The content stored in this
+memory area persists between kernel executions. Pointer types to the global
+address space are allowed as function parameters or local variables. Starting
+with OpenCL v2.0, the global address space can be used with global (program
+scope) variables and static local variable as well.
+
+
+local
+-----
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``__local`` |br| ``local``","",""
+
+The local address space specifies that an object is allocated in the local (work
+group) memory area, which is accessible to all work items in the same work
+group. The content stored in this memory region is not accessible after
+the kernel execution ends. In a kernel function scope, any variable can be in
+the local address space. In other scopes, only pointer types to the local address
+space are allowed. Local address space variables cannot have an initializer.
+
+
+private
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``__private`` |br| ``private``","",""
+
+The private address space specifies that an object is allocated in the private
+(work item) memory. Other work items cannot access the same memory area and its
+content is destroyed after work item execution ends. Local variables can be
+declared in the private address space. Function arguments are always in the
+private address space. Kernel function arguments of a pointer or an array type
+cannot point to the private address space.
+
+
+Calling Conventions
+===================
+Clang supports several different calling conventions, depending on the target
+platform and architecture. The calling convention used for a function determines
+how parameters are passed, how results are returned to the caller, and other
+low-level details of calling a function.
+
+aarch64_vector_pcs
+------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``aarch64_vector_pcs``","``clang::aarch64_vector_pcs``","``clang::aarch64_vector_pcs``","","","",""
+
+On AArch64 targets, this attribute changes the calling convention of a
+function to preserve additional floating-point and Advanced SIMD registers
+relative to the default calling convention used for AArch64.
+
+This means it is more efficient to call such functions from code that performs
+extensive floating-point and vector calculations, because fewer live SIMD and FP
+registers need to be saved. This property makes it well-suited for e.g.
+floating-point or vector math library functions, which are typically leaf
+functions that require a small number of registers.
+
+However, using this attribute also means that it is more expensive to call
+a function that adheres to the default calling convention from within such
+a function. Therefore, it is recommended that this attribute is only used
+for leaf functions.
+
+For more information, see the documentation for `aarch64_vector_pcs`_ on
+the Arm Developer website.
+
+.. _`aarch64_vector_pcs`: https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi
+
+
+fastcall
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``fastcall``","``gnu::fastcall``","","","``__fastcall`` |br| ``_fastcall``","",""
+
+On 32-bit x86 targets, this attribute changes the calling convention of a
+function to use ECX and EDX as register parameters and clear parameters off of
+the stack on return. This convention does not support variadic calls or
+unprototyped functions in C, and has no effect on x86_64 targets. This calling
+convention is supported primarily for compatibility with existing code. Users
+seeking register parameters should use the ``regparm`` attribute, which does
+not require callee-cleanup.  See the documentation for `__fastcall`_ on MSDN.
+
+.. _`__fastcall`: http://msdn.microsoft.com/en-us/library/6xa169sk.aspx
+
+
+ms_abi
+------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``ms_abi``","``gnu::ms_abi``","","","","",""
+
+On non-Windows x86_64 targets, this attribute changes the calling convention of
+a function to match the default convention used on Windows x86_64. This
+attribute has no effect on Windows targets or non-x86_64 targets.
+
+
+pcs
+---
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``pcs``","``gnu::pcs``","","","","",""
+
+On ARM targets, this attribute can be used to select calling conventions
+similar to ``stdcall`` on x86. Valid parameter values are "aapcs" and
+"aapcs-vfp".
+
+
+preserve_all
+------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``preserve_all``","``clang::preserve_all``","``clang::preserve_all``","","","",""
+
+On X86-64 and AArch64 targets, this attribute changes the calling convention of
+a function. The ``preserve_all`` calling convention attempts to make the code
+in the caller even less intrusive than the ``preserve_most`` calling convention.
+This calling convention also behaves identical to the ``C`` calling convention
+on how arguments and return values are passed, but it uses a different set of
+caller/callee-saved registers. This removes the burden of saving and
+recovering a large register set before and after the call in the caller. If
+the arguments are passed in callee-saved registers, then they will be
+preserved by the callee across the call. This doesn't apply for values
+returned in callee-saved registers.
+
+- On X86-64 the callee preserves all general purpose registers, except for
+  R11. R11 can be used as a scratch register. Furthermore it also preserves
+  all floating-point registers (XMMs/YMMs).
+
+The idea behind this convention is to support calls to runtime functions
+that don't need to call out to any other functions.
+
+This calling convention, like the ``preserve_most`` calling convention, will be
+used by a future version of the Objective-C runtime and should be considered
+experimental at this time.
+
+
+preserve_most
+-------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``preserve_most``","``clang::preserve_most``","``clang::preserve_most``","","","",""
+
+On X86-64 and AArch64 targets, this attribute changes the calling convention of
+a function. The ``preserve_most`` calling convention attempts to make the code
+in the caller as unintrusive as possible. This convention behaves identically
+to the ``C`` calling convention on how arguments and return values are passed,
+but it uses a different set of caller/callee-saved registers. This alleviates
+the burden of saving and recovering a large register set before and after the
+call in the caller. If the arguments are passed in callee-saved registers,
+then they will be preserved by the callee across the call. This doesn't
+apply for values returned in callee-saved registers.
+
+- On X86-64 the callee preserves all general purpose registers, except for
+  R11. R11 can be used as a scratch register. Floating-point registers
+  (XMMs/YMMs) are not preserved and need to be saved by the caller.
+
+The idea behind this convention is to support calls to runtime functions
+that have a hot path and a cold path. The hot path is usually a small piece
+of code that doesn't use many registers. The cold path might need to call out to
+another function and therefore only needs to preserve the caller-saved
+registers, which haven't already been saved by the caller. The
+`preserve_most` calling convention is very similar to the ``cold`` calling
+convention in terms of caller/callee-saved registers, but they are used for
+different types of function calls. ``coldcc`` is for function calls that are
+rarely executed, whereas `preserve_most` function calls are intended to be
+on the hot path and definitely executed a lot. Furthermore ``preserve_most``
+doesn't prevent the inliner from inlining the function call.
+
+This calling convention will be used by a future version of the Objective-C
+runtime and should therefore still be considered experimental at this time.
+Although this convention was created to optimize certain runtime calls to
+the Objective-C runtime, it is not limited to this runtime and might be used
+by other runtimes in the future too. The current implementation only
+supports X86-64 and AArch64, but the intention is to support more architectures
+in the future.
+
+
+regcall
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``regcall``","``gnu::regcall``","","","``__regcall``","",""
+
+On x86 targets, this attribute changes the calling convention to
+`__regcall`_ convention. This convention aims to pass as many arguments
+as possible in registers. It also tries to utilize registers for the
+return value whenever it is possible.
+
+.. _`__regcall`: https://software.intel.com/en-us/node/693069
+
+
+regparm
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``regparm``","``gnu::regparm``","","","","",""
+
+On 32-bit x86 targets, the regparm attribute causes the compiler to pass
+the first three integer parameters in EAX, EDX, and ECX instead of on the
+stack. This attribute has no effect on variadic functions, and all parameters
+are passed via the stack as normal.
+
+
+stdcall
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``stdcall``","``gnu::stdcall``","","","``__stdcall`` |br| ``_stdcall``","",""
+
+On 32-bit x86 targets, this attribute changes the calling convention of a
+function to clear parameters off of the stack on return. This convention does
+not support variadic calls or unprototyped functions in C, and has no effect on
+x86_64 targets. This calling convention is used widely by the Windows API and
+COM applications.  See the documentation for `__stdcall`_ on MSDN.
+
+.. _`__stdcall`: http://msdn.microsoft.com/en-us/library/zxk0tw93.aspx
+
+
+thiscall
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``thiscall``","``gnu::thiscall``","","","``__thiscall`` |br| ``_thiscall``","",""
+
+On 32-bit x86 targets, this attribute changes the calling convention of a
+function to use ECX for the first parameter (typically the implicit ``this``
+parameter of C++ methods) and clear parameters off of the stack on return. This
+convention does not support variadic calls or unprototyped functions in C, and
+has no effect on x86_64 targets. See the documentation for `__thiscall`_ on
+MSDN.
+
+.. _`__thiscall`: http://msdn.microsoft.com/en-us/library/ek8tkfbw.aspx
+
+
+vectorcall
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``vectorcall``","``clang::vectorcall``","``clang::vectorcall``","","``__vectorcall`` |br| ``_vectorcall``","",""
+
+On 32-bit x86 *and* x86_64 targets, this attribute changes the calling
+convention of a function to pass vector parameters in SSE registers.
+
+On 32-bit x86 targets, this calling convention is similar to ``__fastcall``.
+The first two integer parameters are passed in ECX and EDX. Subsequent integer
+parameters are passed in memory, and callee clears the stack.  On x86_64
+targets, the callee does *not* clear the stack, and integer parameters are
+passed in RCX, RDX, R8, and R9 as is done for the default Windows x64 calling
+convention.
+
+On both 32-bit x86 and x86_64 targets, vector and floating point arguments are
+passed in XMM0-XMM5. Homogeneous vector aggregates of up to four elements are
+passed in sequential SSE registers if enough are available. If AVX is enabled,
+256 bit vectors are passed in YMM0-YMM5. Any vector or aggregate type that
+cannot be passed in registers for any reason is passed by reference, which
+allows the caller to align the parameter memory.
+
+See the documentation for `__vectorcall`_ on MSDN for more details.
+
+.. _`__vectorcall`: http://msdn.microsoft.com/en-us/library/dn375768.aspx
+
+
+Consumed Annotation Checking
+============================
+Clang supports additional attributes for checking basic resource management
+properties, specifically for unique objects that have a single owning reference.
+The following attributes are currently supported, although **the implementation
+for these annotations is currently in development and are subject to change.**
+
+callable_when
+-------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``callable_when``","``clang::callable_when``","","","","","Yes"
+
+Use ``__attribute__((callable_when(...)))`` to indicate what states a method
+may be called in.  Valid states are unconsumed, consumed, or unknown.  Each
+argument to this attribute must be a quoted string.  E.g.:
+
+``__attribute__((callable_when("unconsumed", "unknown")))``
+
+
+consumable
+----------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``consumable``","``clang::consumable``","","","","","Yes"
+
+Each ``class`` that uses any of the typestate annotations must first be marked
+using the ``consumable`` attribute.  Failure to do so will result in a warning.
+
+This attribute accepts a single parameter that must be one of the following:
+``unknown``, ``consumed``, or ``unconsumed``.
+
+
+param_typestate
+---------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``param_typestate``","``clang::param_typestate``","","","","","Yes"
+
+This attribute specifies expectations about function parameters.  Calls to an
+function with annotated parameters will issue a warning if the corresponding
+argument isn't in the expected state.  The attribute is also used to set the
+initial state of the parameter when analyzing the function's body.
+
+
+return_typestate
+----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``return_typestate``","``clang::return_typestate``","","","","","Yes"
+
+The ``return_typestate`` attribute can be applied to functions or parameters.
+When applied to a function the attribute specifies the state of the returned
+value.  The function's body is checked to ensure that it always returns a value
+in the specified state.  On the caller side, values returned by the annotated
+function are initialized to the given state.
+
+When applied to a function parameter it modifies the state of an argument after
+a call to the function returns.  The function's body is checked to ensure that
+the parameter is in the expected state before returning.
+
+
+set_typestate
+-------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``set_typestate``","``clang::set_typestate``","","","","","Yes"
+
+Annotate methods that transition an object into a new state with
+``__attribute__((set_typestate(new_state)))``.  The new state must be
+unconsumed, consumed, or unknown.
+
+
+test_typestate
+--------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``test_typestate``","``clang::test_typestate``","","","","","Yes"
+
+Use ``__attribute__((test_typestate(tested_state)))`` to indicate that a method
+returns true if the object is in the specified state..
+
+
+Type Safety Checking
+====================
+Clang supports additional attributes to enable checking type safety properties
+that can't be enforced by the C type system. To see warnings produced by these
+checks, ensure that -Wtype-safety is enabled. Use cases include:
+
+* MPI library implementations, where these attributes enable checking that
+  the buffer type matches the passed ``MPI_Datatype``;
+* for HDF5 library there is a similar use case to MPI;
+* checking types of variadic functions' arguments for functions like
+  ``fcntl()`` and ``ioctl()``.
+
+You can detect support for these attributes with ``__has_attribute()``.  For
+example:
+
+.. code-block:: c++
+
+  #if defined(__has_attribute)
+  #  if __has_attribute(argument_with_type_tag) && \
+        __has_attribute(pointer_with_type_tag) && \
+        __has_attribute(type_tag_for_datatype)
+  #    define ATTR_MPI_PWT(buffer_idx, type_idx) __attribute__((pointer_with_type_tag(mpi,buffer_idx,type_idx)))
+  /* ... other macros ...  */
+  #  endif
+  #endif
+
+  #if !defined(ATTR_MPI_PWT)
+  # define ATTR_MPI_PWT(buffer_idx, type_idx)
+  #endif
+
+  int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */)
+      ATTR_MPI_PWT(1,3);
+
+argument_with_type_tag
+----------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``argument_with_type_tag`` |br| ``pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","","","",""
+
+Use ``__attribute__((argument_with_type_tag(arg_kind, arg_idx,
+type_tag_idx)))`` on a function declaration to specify that the function
+accepts a type tag that determines the type of some other argument.
+
+This attribute is primarily useful for checking arguments of variadic functions
+(``pointer_with_type_tag`` can be used in most non-variadic cases).
+
+In the attribute prototype above:
+  * ``arg_kind`` is an identifier that should be used when annotating all
+    applicable type tags.
+  * ``arg_idx`` provides the position of a function argument. The expected type of
+    this function argument will be determined by the function argument specified
+    by ``type_tag_idx``. In the code example below, "3" means that the type of the
+    function's third argument will be determined by ``type_tag_idx``.
+  * ``type_tag_idx`` provides the position of a function argument. This function
+    argument will be a type tag. The type tag will determine the expected type of
+    the argument specified by ``arg_idx``. In the code example below, "2" means
+    that the type tag associated with the function's second argument should agree
+    with the type of the argument specified by ``arg_idx``.
+
+For example:
+
+.. code-block:: c++
+
+  int fcntl(int fd, int cmd, ...)
+      __attribute__(( argument_with_type_tag(fcntl,3,2) ));
+  // The function's second argument will be a type tag; this type tag will
+  // determine the expected type of the function's third argument.
+
+
+pointer_with_type_tag
+---------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``argument_with_type_tag`` |br| ``pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","``clang::argument_with_type_tag`` |br| ``clang::pointer_with_type_tag``","","","",""
+
+Use ``__attribute__((pointer_with_type_tag(ptr_kind, ptr_idx, type_tag_idx)))``
+on a function declaration to specify that the function accepts a type tag that
+determines the pointee type of some other pointer argument.
+
+In the attribute prototype above:
+  * ``ptr_kind`` is an identifier that should be used when annotating all
+    applicable type tags.
+  * ``ptr_idx`` provides the position of a function argument; this function
+    argument will have a pointer type. The expected pointee type of this pointer
+    type will be determined by the function argument specified by
+    ``type_tag_idx``. In the code example below, "1" means that the pointee type
+    of the function's first argument will be determined by ``type_tag_idx``.
+  * ``type_tag_idx`` provides the position of a function argument; this function
+    argument will be a type tag. The type tag will determine the expected pointee
+    type of the pointer argument specified by ``ptr_idx``. In the code example
+    below, "3" means that the type tag associated with the function's third
+    argument should agree with the pointee type of the pointer argument specified
+    by ``ptr_idx``.
+
+For example:
+
+.. code-block:: c++
+
+  typedef int MPI_Datatype;
+  int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */)
+      __attribute__(( pointer_with_type_tag(mpi,1,3) ));
+  // The function's 3rd argument will be a type tag; this type tag will
+  // determine the expected pointee type of the function's 1st argument.
+
+
+type_tag_for_datatype
+---------------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``type_tag_for_datatype``","``clang::type_tag_for_datatype``","``clang::type_tag_for_datatype``","","","",""
+
+When declaring a variable, use
+``__attribute__((type_tag_for_datatype(kind, type)))`` to create a type tag that
+is tied to the ``type`` argument given to the attribute.
+
+In the attribute prototype above:
+  * ``kind`` is an identifier that should be used when annotating all applicable
+    type tags.
+  * ``type`` indicates the name of the type.
+
+Clang supports annotating type tags of two forms.
+
+  * **Type tag that is a reference to a declared identifier.**
+    Use ``__attribute__((type_tag_for_datatype(kind, type)))`` when declaring that
+    identifier:
+
+    .. code-block:: c++
+
+      typedef int MPI_Datatype;
+      extern struct mpi_datatype mpi_datatype_int
+          __attribute__(( type_tag_for_datatype(mpi,int) ));
+      #define MPI_INT ((MPI_Datatype) &mpi_datatype_int)
+      // &mpi_datatype_int is a type tag. It is tied to type "int".
+
+  * **Type tag that is an integral literal.**
+    Declare a ``static const`` variable with an initializer value and attach
+    ``__attribute__((type_tag_for_datatype(kind, type)))`` on that declaration:
+
+    .. code-block:: c++
+
+      typedef int MPI_Datatype;
+      static const MPI_Datatype mpi_datatype_int
+          __attribute__(( type_tag_for_datatype(mpi,int) )) = 42;
+      #define MPI_INT ((MPI_Datatype) 42)
+      // The number 42 is a type tag. It is tied to type "int".
+
+
+The ``type_tag_for_datatype`` attribute also accepts an optional third argument
+that determines how the type of the function argument specified by either
+``arg_idx`` or ``ptr_idx`` is compared against the type associated with the type
+tag. (Recall that for the ``argument_with_type_tag`` attribute, the type of the
+function argument specified by ``arg_idx`` is compared against the type
+associated with the type tag. Also recall that for the ``pointer_with_type_tag``
+attribute, the pointee type of the function argument specified by ``ptr_idx`` is
+compared against the type associated with the type tag.) There are two supported
+values for this optional third argument:
+
+  * ``layout_compatible`` will cause types to be compared according to
+    layout-compatibility rules (In C++11 [class.mem] p 17, 18, see the
+    layout-compatibility rules for two standard-layout struct types and for two
+    standard-layout union types). This is useful when creating a type tag
+    associated with a struct or union type. For example:
+
+    .. code-block:: c++
+
+      /* In mpi.h */
+      typedef int MPI_Datatype;
+      struct internal_mpi_double_int { double d; int i; };
+      extern struct mpi_datatype mpi_datatype_double_int
+          __attribute__(( type_tag_for_datatype(mpi,
+                          struct internal_mpi_double_int, layout_compatible) ));
+
+      #define MPI_DOUBLE_INT ((MPI_Datatype) &mpi_datatype_double_int)
+
+      int MPI_Send(void *buf, int count, MPI_Datatype datatype, ...)
+          __attribute__(( pointer_with_type_tag(mpi,1,3) ));
+
+      /* In user code */
+      struct my_pair { double a; int b; };
+      struct my_pair *buffer;
+      MPI_Send(buffer, 1, MPI_DOUBLE_INT /*, ...  */); // no warning because the
+                                                       // layout of my_pair is
+                                                       // compatible with that of
+                                                       // internal_mpi_double_int
+
+      struct my_int_pair { int a; int b; }
+      struct my_int_pair *buffer2;
+      MPI_Send(buffer2, 1, MPI_DOUBLE_INT /*, ...  */); // warning because the
+                                                        // layout of my_int_pair
+                                                        // does not match that of
+                                                        // internal_mpi_double_int
+
+  * ``must_be_null`` specifies that the function argument specified by either
+    ``arg_idx`` (for the ``argument_with_type_tag`` attribute) or ``ptr_idx`` (for
+    the ``pointer_with_type_tag`` attribute) should be a null pointer constant.
+    The second argument to the ``type_tag_for_datatype`` attribute is ignored. For
+    example:
+
+    .. code-block:: c++
+
+      /* In mpi.h */
+      typedef int MPI_Datatype;
+      extern struct mpi_datatype mpi_datatype_null
+          __attribute__(( type_tag_for_datatype(mpi, void, must_be_null) ));
+
+      #define MPI_DATATYPE_NULL ((MPI_Datatype) &mpi_datatype_null)
+      int MPI_Send(void *buf, int count, MPI_Datatype datatype, ...)
+          __attribute__(( pointer_with_type_tag(mpi,1,3) ));
+
+      /* In user code */
+      struct my_pair { double a; int b; };
+      struct my_pair *buffer;
+      MPI_Send(buffer, 1, MPI_DATATYPE_NULL /*, ...  */); // warning: MPI_DATATYPE_NULL
+                                                          // was specified but buffer
+                                                          // is not a null pointer
+
+
+Nullability Attributes
+======================
+Whether a particular pointer may be "null" is an important concern when working with pointers in the C family of languages. The various nullability attributes indicate whether a particular pointer can be null or not, which makes APIs more expressive and can help static analysis tools identify bugs involving null pointers. Clang supports several kinds of nullability attributes: the ``nonnull`` and ``returns_nonnull`` attributes indicate which function or method parameters and result types can never be null, while nullability type qualifiers indicate which pointer types can be null (``_Nullable``) or cannot be null (``_Nonnull``).
+
+The nullability (type) qualifiers express whether a value of a given pointer type can be null (the ``_Nullable`` qualifier), doesn't have a defined meaning for null (the ``_Nonnull`` qualifier), or for which the purpose of null is unclear (the ``_Null_unspecified`` qualifier). Because nullability qualifiers are expressed within the type system, they are more general than the ``nonnull`` and ``returns_nonnull`` attributes, allowing one to express (for example) a nullable pointer to an array of nonnull pointers. Nullability qualifiers are written to the right of the pointer to which they apply. For example:
+
+  .. code-block:: c
+
+    // No meaningful result when 'ptr' is null (here, it happens to be undefined behavior).
+    int fetch(int * _Nonnull ptr) { return *ptr; }
+
+    // 'ptr' may be null.
+    int fetch_or_zero(int * _Nullable ptr) {
+      return ptr ? *ptr : 0;
+    }
+
+    // A nullable pointer to non-null pointers to const characters.
+    const char *join_strings(const char * _Nonnull * _Nullable strings, unsigned n);
+
+In Objective-C, there is an alternate spelling for the nullability qualifiers that can be used in Objective-C methods and properties using context-sensitive, non-underscored keywords. For example:
+
+  .. code-block:: objective-c
+
+    @interface NSView : NSResponder
+      - (nullable NSView *)ancestorSharedWithView:(nonnull NSView *)aView;
+      @property (assign, nullable) NSView *superview;
+      @property (readonly, nonnull) NSArray *subviews;
+    @end
+
+_Nonnull
+--------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``_Nonnull``","",""
+
+The ``_Nonnull`` nullability qualifier indicates that null is not a meaningful value for a value of the ``_Nonnull`` pointer type. For example, given a declaration such as:
+
+  .. code-block:: c
+
+    int fetch(int * _Nonnull ptr);
+
+a caller of ``fetch`` should not provide a null value, and the compiler will produce a warning if it sees a literal null value passed to ``fetch``. Note that, unlike the declaration attribute ``nonnull``, the presence of ``_Nonnull`` does not imply that passing null is undefined behavior: ``fetch`` is free to consider null undefined behavior or (perhaps for backward-compatibility reasons) defensively handle null.
+
+
+_Null_unspecified
+-----------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``_Null_unspecified``","",""
+
+The ``_Null_unspecified`` nullability qualifier indicates that neither the ``_Nonnull`` nor ``_Nullable`` qualifiers make sense for a particular pointer type. It is used primarily to indicate that the role of null with specific pointers in a nullability-annotated header is unclear, e.g., due to overly-complex implementations or historical factors with a long-lived API.
+
+
+_Nullable
+---------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "","","","","``_Nullable``","",""
+
+The ``_Nullable`` nullability qualifier indicates that a value of the ``_Nullable`` pointer type can be null. For example, given:
+
+  .. code-block:: c
+
+    int fetch_or_zero(int * _Nullable ptr);
+
+a caller of ``fetch_or_zero`` can provide null.
+
+
+nonnull
+-------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``nonnull``","``gnu::nonnull``","","","","",""
+
+The ``nonnull`` attribute indicates that some function parameters must not be null, and can be used in several different ways. It's original usage (`from GCC <https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes>`_) is as a function (or Objective-C method) attribute that specifies which parameters of the function are nonnull in a comma-separated list. For example:
+
+  .. code-block:: c
+
+    extern void * my_memcpy (void *dest, const void *src, size_t len)
+                    __attribute__((nonnull (1, 2)));
+
+Here, the ``nonnull`` attribute indicates that parameters 1 and 2
+cannot have a null value. Omitting the parenthesized list of parameter indices means that all parameters of pointer type cannot be null:
+
+  .. code-block:: c
+
+    extern void * my_memcpy (void *dest, const void *src, size_t len)
+                    __attribute__((nonnull));
+
+Clang also allows the ``nonnull`` attribute to be placed directly on a function (or Objective-C method) parameter, eliminating the need to specify the parameter index ahead of type. For example:
+
+  .. code-block:: c
+
+    extern void * my_memcpy (void *dest __attribute__((nonnull)),
+                             const void *src __attribute__((nonnull)), size_t len);
+
+Note that the ``nonnull`` attribute indicates that passing null to a non-null parameter is undefined behavior, which the optimizer may take advantage of to, e.g., remove null checks. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable.
+
+
+returns_nonnull
+---------------
+.. csv-table:: Supported Syntaxes
+   :header: "GNU", "C++11", "C2x", "``__declspec``", "Keyword", "``#pragma``", "``#pragma clang attribute``"
+
+   "``returns_nonnull``","``gnu::returns_nonnull``","","","","","Yes"
+
+The ``returns_nonnull`` attribute indicates that a particular function (or Objective-C method) always returns a non-null pointer. For example, a particular system ``malloc`` might be defined to terminate a process when memory is not available rather than returning a null pointer:
+
+  .. code-block:: c
+
+    extern void * malloc (size_t size) __attribute__((returns_nonnull));
+
+The ``returns_nonnull`` attribute implies that returning a null pointer is undefined behavior, which the optimizer may take advantage of. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable
+
+
diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst
index e852c3e3879855..3dafac54a02bda 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -198,6 +198,10 @@ Filename (or -) to write dependency output to
 
 Emit Clang AST files for source inputs
 
+.. option:: -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang<arg>
+
+Trivial automatic variable initialization to zero is only here for benchmarks, it'll eventually be removed, and I'm OK with that because I'm only using it to benchmark
+
 .. option:: -exported\_symbols\_list <arg>
 
 .. option:: -faligned-new=<arg>
@@ -210,10 +214,6 @@ Use approximate transcendental functions
 
 Flush denormal floating point values to zero in CUDA device mode.
 
-.. option:: -fcuda-rdc, -fno-cuda-rdc
-
-Generate relocatable device code, also known as separate compilation mode.
-
 .. option:: -fcuda-short-ptr, -fno-cuda-short-ptr
 
 Use 32-bit pointers for accessing const/local/shared address spaces.
@@ -222,6 +222,10 @@ Use 32-bit pointers for accessing const/local/shared address spaces.
 
 Reserve register r19 (Hexagon only)
 
+.. option:: -fgpu-rdc, -fcuda-rdc, -fno-gpu-rdc
+
+Generate relocatable device code, also known as separate compilation mode.
+
 .. option:: -fheinous-gnu-extensions
 
 .. option:: -flat\_namespace
@@ -254,6 +258,10 @@ Use the gcc toolchain at the given directory
 
 Generate CodeView debug information
 
+.. option:: -gcodeview-ghash, -gno-codeview-ghash
+
+Emit type record hashes in a .debug$H section
+
 .. option:: -headerpad\_max\_install\_names<arg>
 
 .. option:: -help, --help
@@ -288,6 +296,10 @@ Make the next included directory (-I or -F) an indexer header map
 
 .. option:: -mbig-endian, -EB
 
+.. option:: -mbranch-protection=<arg>
+
+Enforce targets of indirect branches and function returns
+
 .. option:: --migrate
 
 Run the migrator
@@ -792,15 +804,7 @@ Don't use blacklist file for sanitizers
 
 .. option:: -fparse-all-comments
 
-.. option:: -frecord-command-line, -frecord-gcc-switches, -fno-record-command-line, -fno-record-gcc-switches
-
-Generate a section named ".GCC.command.line" containing the clang driver
-command-line. After linking, the section may contain multiple command lines,
-which will be individually terminated by null bytes. Separate arguments within
-a command line are combined with spaces; spaces and backslashes within an
-argument are escaped with backslashes. This format differs from the format of
-the equivalent section produced by GCC with the -frecord-gcc-switches flag.
-This option is currently only supported on ELF targets.
+.. option:: -frecord-command-line, -fno-record-command-line, -frecord-gcc-switches
 
 .. option:: -fsanitize-address-field-padding=<arg>
 
@@ -810,20 +814,18 @@ Level of field padding for AddressSanitizer
 
 Enable linker dead stripping of globals in AddressSanitizer
 
-.. option:: -fsanitize-address-use-odr-indicator, -fno-sanitize-address-use-odr-indicator
-
-Enable ODR indicator globals to avoid false ODR violation reports in partially sanitized programs at the cost of an increase in binary size
-
 .. option:: -fsanitize-address-poison-custom-array-cookie, -fno-sanitize-address-poison-custom-array-cookie
 
-Enable "poisoning" array cookies when allocating arrays with a custom operator new\[\] in Address Sanitizer, preventing accesses to the cookies from user code. An array cookie is a small implementation-defined header added to certain array allocations to record metadata such as the length of the array. Accesses to array cookies from user code are technically allowed by the standard but are more likely to be the result of an out-of-bounds array access.
-
-An operator new\[\] is "custom" if it is not one of the allocation functions provided by the C++ standard library. Array cookies from non-custom allocation functions are always poisoned.
+Enable poisoning array cookies when using custom operator new\[\] in AddressSanitizer
 
 .. option:: -fsanitize-address-use-after-scope, -fno-sanitize-address-use-after-scope
 
 Enable use-after-scope detection in AddressSanitizer
 
+.. option:: -fsanitize-address-use-odr-indicator, -fno-sanitize-address-use-odr-indicator
+
+Enable ODR indicator globals to avoid false ODR violation reports in partially sanitized programs at the cost of an increase in binary size
+
 .. option:: -fsanitize-blacklist=<arg>
 
 Path to blacklist file for sanitizers
@@ -840,6 +842,10 @@ Generalize pointers in CFI indirect call type signature checks
 
 Specify the type of coverage instrumentation for Sanitizers
 
+.. option:: -fsanitize-hwaddress-abi=<arg>
+
+Select the HWAddressSanitizer ABI to target (interceptor or platform, default interceptor)
+
 .. option:: -fsanitize-link-c++-runtime
 
 .. option:: -fsanitize-memory-track-origins, -fno-sanitize-memory-track-origins
@@ -1072,6 +1078,10 @@ Set directory to include search path with prefix
 
 Add directory to SYSTEM include search path, absolute paths are relative to -isysroot
 
+.. option:: --libomptarget-nvptx-path=<arg>
+
+Path to libomptarget-nvptx libraries
+
 .. option:: --ptxas-path=<arg>
 
 Path to ptxas (used for compiling CUDA code)
@@ -1283,6 +1293,8 @@ Enable C++ static destructor registration (the default)
 
 Instrument control-flow architecture protection. Options: return, branch, full, none.
 
+.. option:: -fcf-runtime-abi=<arg>
+
 .. option:: -fchar8\_t, -fno-char8\_t
 
 Enable C++ builtin type char8\_t
@@ -1343,6 +1355,10 @@ Emit macro debug information
 
 remap file source paths in debug info
 
+.. option:: -fdebug-ranges-base-address, -fno-debug-ranges-base-address
+
+Use DWARF base address selection entries in debug\_ranges
+
 .. option:: -fdebug-types-section, -fno-debug-types-section
 
 Place debug types in their own section (ELF Only)
@@ -1651,6 +1667,8 @@ Synthesize retain and release calls for Objective-C pointers
 
 Use EH-safe code when synthesizing retains and releases in -fobjc-arc
 
+.. option:: -fobjc-convert-messages-to-runtime-calls, -fno-objc-convert-messages-to-runtime-calls
+
 .. option:: -fobjc-exceptions, -fno-objc-exceptions
 
 Enable Objective-C exceptions
@@ -1737,6 +1755,14 @@ Load the named plugin (dynamic shared object)
 
 .. option:: -fprofile-dir=<arg>
 
+.. option:: -fprofile-exclude-files=<arg>
+
+Instrument only functions from files where names don't match all the regexes separated by a semi-colon
+
+.. option:: -fprofile-filter-files=<arg>
+
+Instrument only functions from files where names match any regex separated by a semi-colon
+
 .. option:: -fprofile-generate, -fno-profile-generate
 
 Generate instrumented code to collect execution counts into default.profraw (overridden by LLVM\_PROFILE\_FILE env var)
@@ -1765,6 +1791,10 @@ Generate instrumented code to collect execution counts into <file> (overridden b
 
 Use instrumentation data for profile-guided optimization
 
+.. option:: -fprofile-remapping-file=<file>, -fprofile-remapping-file <arg>
+
+Use the remappings described in <file> to match the profile data against names in the program
+
 .. option:: -fprofile-sample-accurate, -fauto-profile-accurate, -fno-profile-sample-accurate
 
 Specifies that the sample profile is accurate. If the sample
@@ -1876,19 +1906,23 @@ Enable the superword-level parallelism vectorization passes
 
 Provide minimal debug info in the object/executable to facilitate online symbolication/stack traces in the absence of .dwo/.dwp files when using Split DWARF
 
+.. option:: -fsplit-lto-unit, -fno-split-lto-unit
+
+Enables splitting of the LTO unit.
+
 .. option:: -fsplit-stack
 
 .. option:: -fstack-protector, -fno-stack-protector
 
-Enable stack protectors for functions potentially vulnerable to stack smashing
+Enable stack protectors for some functions vulnerable to stack smashing. This uses a loose heuristic which considers functions vulnerable if they contain a char (or 8bit integer) array or constant sized calls to alloca, which are of greater size than ssp-buffer-size (default: 8 bytes). All variable sized calls to alloca are considered vulnerable
 
 .. option:: -fstack-protector-all
 
-Force the usage of stack protectors for all functions
+Enable stack protectors for all functions
 
 .. option:: -fstack-protector-strong
 
-Use a strong heuristic to apply stack protectors to functions
+Enable stack protectors for some functions vulnerable to stack smashing. Compared to -fstack-protector, this uses a stronger heuristic that includes functions containing arrays of any size (and any type), as well as any calls to alloca or the taking of an address from a local variable
 
 .. option:: -fstack-size-section, -fno-stack-size-section
 
@@ -1962,6 +1996,10 @@ Specify the function to be called on overflow
 
 Process trigraph sequences
 
+.. option:: -ftrivial-auto-var-init=<arg>
+
+Initialize trivial automatic stack variables: uninitialized (default) \| pattern
+
 .. option:: -funique-section-names, -fno-unique-section-names
 
 Use unique names for text and data sections (ELF Only)
@@ -2000,6 +2038,10 @@ Enable the loop vectorization passes
 
 .. option:: -fverbose-asm, -fno-verbose-asm
 
+.. option:: -fvisibility-global-new-delete-hidden
+
+Give global C++ operator new and delete declarations hidden visibility
+
 .. option:: -fvisibility-inlines-hidden
 
 Give inline C++ member functions hidden visibility by default
@@ -2162,7 +2204,7 @@ Link stack frames through backchain on System Z
 
 .. option:: -mconsole<arg>
 
-.. option:: -mcpu=<arg>, -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 (equivalent to -mcpu=hexagonv65)
+.. option:: -mcpu=<arg>, -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 (equivalent to -mcpu=hexagonv65), -mv66 (equivalent to -mcpu=hexagonv66)
 
 .. option:: -mcrc, -mno-crc
 
@@ -2196,6 +2238,8 @@ Enable merging of globals
 
 .. option:: -mhwdiv=<arg>, --mhwdiv <arg>, --mhwdiv=<arg>
 
+.. option:: -mhwmult=<arg>
+
 .. option:: -miamcu, -mno-iamcu
 
 Use Intel MCU ABI
@@ -2272,6 +2316,8 @@ Select return address signing scope
 
 Use software floating point
 
+.. option:: -mspeculative-load-hardening, -mno-speculative-load-hardening
+
 .. option:: -mstack-alignment=<arg>
 
 Set the stack alignment
@@ -2296,6 +2342,10 @@ The thread model to use, e.g. posix, single (posix by default)
 
 .. option:: -mthumb, -mno-thumb
 
+.. option:: -mtls-direct-seg-refs, -mno-tls-direct-seg-refs
+
+Enable direct TLS access through segment registers (default)
+
 .. option:: -mtune=<arg>
 
 .. option:: -mtvos-version-min=<arg>, -mappletvos-version-min=<arg>
@@ -2314,41 +2364,33 @@ The thread model to use, e.g. posix, single (posix by default)
 
 AARCH64
 -------
-.. option:: -ffixed-x1
-
-Reserve the x1 register (AArch64 only)
-
-.. option:: -ffixed-x2
-
-Reserve the x2 register (AArch64 only)
-
-.. option:: -ffixed-x3
+.. option:: -fcall-saved-x10
 
-Reserve the x3 register (AArch64 only)
+Make the x10 register call-saved (AArch64 only)
 
-.. option:: -ffixed-x4
+.. option:: -fcall-saved-x11
 
-Reserve the x4 register (AArch64 only)
+Make the x11 register call-saved (AArch64 only)
 
-.. option:: -ffixed-x5
+.. option:: -fcall-saved-x12
 
-Reserve the x5 register (AArch64 only)
+Make the x12 register call-saved (AArch64 only)
 
-.. option:: -ffixed-x6
+.. option:: -fcall-saved-x13
 
-Reserve the x6 register (AArch64 only)
+Make the x13 register call-saved (AArch64 only)
 
-.. option:: -ffixed-x7
+.. option:: -fcall-saved-x14
 
-Reserve the x7 register (AArch64 only)
+Make the x14 register call-saved (AArch64 only)
 
-.. option:: -ffixed-x18
+.. option:: -fcall-saved-x15
 
-Reserve the x18 register (AArch64 only)
+Make the x15 register call-saved (AArch64 only)
 
-.. option:: -ffixed-x20
+.. option:: -fcall-saved-x18
 
-Reserve the x20 register (AArch64 only)
+Make the x18 register call-saved (AArch64 only)
 
 .. option:: -fcall-saved-x8
 
@@ -2358,33 +2400,41 @@ Make the x8 register call-saved (AArch64 only)
 
 Make the x9 register call-saved (AArch64 only)
 
-.. option:: -fcall-saved-x10
+.. option:: -ffixed-x1
 
-Make the x10 register call-saved (AArch64 only)
+Reserve the 1 register (AArch64 only)
 
-.. option:: -fcall-saved-x11
+.. option:: -ffixed-x18
 
-Make the x11 register call-saved (AArch64 only)
+Reserve the 18 register (AArch64 only)
 
-.. option:: -fcall-saved-x12
+.. option:: -ffixed-x2
 
-Make the x12 register call-saved (AArch64 only)
+Reserve the 2 register (AArch64 only)
 
-.. option:: -fcall-saved-x13
+.. option:: -ffixed-x20
 
-Make the x13 register call-saved (AArch64 only)
+Reserve the 20 register (AArch64 only)
 
-.. option:: -fcall-saved-x14
+.. option:: -ffixed-x3
 
-Make the x14 register call-saved (AArch64 only)
+Reserve the 3 register (AArch64 only)
 
-.. option:: -fcall-saved-x15
+.. option:: -ffixed-x4
 
-Make the x15 register call-saved (AArch64 only)
+Reserve the 4 register (AArch64 only)
 
-.. option:: -fcall-saved-x18
+.. option:: -ffixed-x5
 
-Make the x18 register call-saved (AArch64 only)
+Reserve the 5 register (AArch64 only)
+
+.. option:: -ffixed-x6
+
+Reserve the 6 register (AArch64 only)
+
+.. option:: -ffixed-x7
+
+Reserve the 7 register (AArch64 only)
 
 .. option:: -mfix-cortex-a53-835769, -mno-fix-cortex-a53-835769
 
@@ -2396,6 +2446,14 @@ Generate code which only uses the general purpose registers (AArch64 only)
 
 AMDGPU
 ------
+.. option:: -mcode-object-v3, -mno-code-object-v3
+
+Enable code object v3 (AMDGPU only)
+
+.. option:: -msram-ecc, -mno-sram-ecc
+
+Enable SRAM ECC (AMDGPU only)
+
 .. option:: -mxnack, -mno-xnack
 
 Enable XNACK (AMDGPU only)
@@ -2594,6 +2652,8 @@ WebAssembly
 
 .. option:: -msimd128, -mno-simd128
 
+.. option:: -munimplemented-simd128, -mno-unimplemented-simd128
+
 X86
 ---
 .. option:: -m3dnow, -mno-3dnow
@@ -2811,6 +2871,10 @@ ___________
 
 .. option:: -ggdb3
 
+.. option:: -gline-directives-only
+
+Emit debug line info directives only
+
 .. option:: -gline-tables-only, -g1, -gmlt
 
 Emit debug line number tables only
@@ -2841,10 +2905,16 @@ Embed source text in DWARF debug sections
 
 .. option:: -gpubnames, -gno-pubnames
 
-.. option:: -grecord-command-line, -grecord-gcc-switches, -gno-record-command-line, -gno-record-gcc-switches
+.. option:: -grecord-command-line, -gno-record-command-line, -grecord-gcc-switches
 
 .. option:: -gsplit-dwarf
 
+.. program:: clang1
+.. option:: -gsplit-dwarf=<arg>
+.. program:: clang
+
+Set DWARF fission mode to either 'split' or 'single'
+
 .. option:: -gstrict-dwarf, -gno-strict-dwarf
 
 .. option:: -gz
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index e155cefb7890d8..5782edd353701b 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -474,44 +474,58 @@ Half-Precision Floating Point
 =============================
 
 Clang supports two half-precision (16-bit) floating point types: ``__fp16`` and
-``_Float16``. ``__fp16`` is defined in the ARM C Language Extensions (`ACLE
-<http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053d/IHI0053D_acle_2_1.pdf>`_)
-and ``_Float16`` in ISO/IEC TS 18661-3:2015.
-
-``__fp16`` is a storage and interchange format only. This means that values of
-``__fp16`` promote to (at least) float when used in arithmetic operations.
-There are two ``__fp16`` formats. Clang supports the IEEE 754-2008 format and
-not the ARM alternative format.
-
-ISO/IEC TS 18661-3:2015 defines C support for additional floating point types.
-``_FloatN`` is defined as a binary floating type, where the N suffix denotes
-the number of bits and is 16, 32, 64, or greater and equal to 128 and a
-multiple of 32. Clang supports ``_Float16``. The difference from ``__fp16`` is
-that arithmetic on ``_Float16`` is performed in half-precision, thus it is not
-a storage-only format. ``_Float16`` is available as a source language type in
-both C and C++ mode.
-
-It is recommended that portable code use the ``_Float16`` type because
-``__fp16`` is an ARM C-Language Extension (ACLE), whereas ``_Float16`` is
-defined by the C standards committee, so using ``_Float16`` will not prevent
-code from being ported to architectures other than Arm.  Also, ``_Float16``
-arithmetic and operations will directly map on half-precision instructions when
-they are available (e.g. Armv8.2-A), avoiding conversions to/from
-single-precision, and thus will result in more performant code. If
-half-precision instructions are unavailable, values will be promoted to
-single-precision, similar to the semantics of ``__fp16`` except that the
-results will be stored in single-precision.
-
-In an arithmetic operation where one operand is of ``__fp16`` type and the
-other is of ``_Float16`` type, the ``_Float16`` type is first converted to
-``__fp16`` type and then the operation is completed as if both operands were of
-``__fp16`` type.
-
-To define a ``_Float16`` literal, suffix ``f16`` can be appended to the compile-time
-constant declaration. There is no default argument promotion for ``_Float16``; this
-applies to the standard floating types only. As a consequence, for example, an
-explicit cast is required for printing a ``_Float16`` value (there is no string
-format specifier for ``_Float16``).
+``_Float16``.  These types are supported in all language modes.
+
+``__fp16`` is supported on every target, as it is purely a storage format; see below.
+``_Float16`` is currently only supported on the following targets, with further
+targets pending ABI standardization:
+- 32-bit ARM
+- 64-bit ARM (AArch64)
+- SPIR
+``_Float16`` will be supported on more targets as they define ABIs for it.
+
+``__fp16`` is a storage and interchange format only.  This means that values of
+``__fp16`` are immediately promoted to (at least) ``float`` when used in arithmetic
+operations, so that e.g. the result of adding two ``__fp16`` values has type ``float``.
+The behavior of ``__fp16`` is specified by the ARM C Language Extensions (`ACLE <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053d/IHI0053D_acle_2_1.pdf>`_).
+Clang uses the ``binary16`` format from IEEE 754-2008 for ``__fp16``, not the ARM
+alternative format.
+
+``_Float16`` is an extended floating-point type.  This means that, just like arithmetic on
+``float`` or ``double``, arithmetic on ``_Float16`` operands is formally performed in the
+``_Float16`` type, so that e.g. the result of adding two ``_Float16`` values has type
+``_Float16``.  The behavior of ``_Float16`` is specified by ISO/IEC TS 18661-3:2015
+("Floating-point extensions for C").  As with ``__fp16``, Clang uses the ``binary16``
+format from IEEE 754-2008 for ``_Float16``.
+
+``_Float16`` arithmetic will be performed using native half-precision support
+when available on the target (e.g. on ARMv8.2a); otherwise it will be performed
+at a higher precision (currently always ``float``) and then truncated down to
+``_Float16``.  Note that C and C++ allow intermediate floating-point operands
+of an expression to be computed with greater precision than is expressible in
+their type, so Clang may avoid intermediate truncations in certain cases; this may
+lead to results that are inconsistent with native arithmetic.
+
+It is recommended that portable code use ``_Float16`` instead of ``__fp16``,
+as it has been defined by the C standards committee and has behavior that is
+more familiar to most programmers.
+
+Because ``__fp16`` operands are always immediately promoted to ``float``, the
+common real type of ``__fp16`` and ``_Float16`` for the purposes of the usual
+arithmetic conversions is ``float``.
+
+A literal can be given ``_Float16`` type using the suffix ``f16``; for example:
+```
+3.14f16
+```
+
+Because default argument promotion only applies to the standard floating-point
+types, ``_Float16`` values are not promoted to ``double`` when passed as variadic
+or untyped arguments.  As a consequence, some caution must be taken when using
+certain library facilities with ``_Float16``; for example, there is no ``printf`` format
+specifier for ``_Float16``, and (unlike ``float``) it will not be implicitly promoted to
+``double`` when passed to ``printf``, so the programmer must explicitly cast it to
+``double`` before using it with an ``%f`` or similar specifier.
 
 Messages on ``deprecated`` and ``unavailable`` Attributes
 =========================================================
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 04a9648ca29420..7b567c966ee53a 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -17,60 +17,50 @@
 OpenMP Support
 ==================
 
-Clang fully supports OpenMP 4.5. Clang supports offloading to X86_64, AArch64,
-PPC64[LE] and has `basic support for Cuda devices`_.
-
-Standalone directives
-=====================
-
-* #pragma omp [for] simd: :good:`Complete`.
-
-* #pragma omp declare simd: :partial:`Partial`.  We support parsing/semantic
-  analysis + generation of special attributes for X86 target, but still
-  missing the LLVM pass for vectorization.
-
-* #pragma omp taskloop [simd]: :good:`Complete`.
-
-* #pragma omp target [enter|exit] data: :good:`Complete`.
-
-* #pragma omp target update: :good:`Complete`.
-
-* #pragma omp target: :good:`Complete`.
+Clang supports the following OpenMP 5.0 features
 
-* #pragma omp declare target: :good:`Complete`.
+* The `reduction`-based clauses in the `task` and `target`-based directives.
 
-* #pragma omp teams: :good:`Complete`.
+* Support relational-op != (not-equal) as one of the canonical forms of random
+  access iterator.
 
-* #pragma omp distribute [simd]: :good:`Complete`.
+* Support for mapping of the lambdas in target regions.
 
-* #pragma omp distribute parallel for [simd]: :good:`Complete`.
+* Parsing/sema analysis for the requires directive.
 
-Combined directives
-===================
+* Nested declare target directives.
 
-* #pragma omp parallel for simd: :good:`Complete`.
+* Make the `this` pointer implicitly mapped as `map(this[:1])`.
 
-* #pragma omp target parallel: :good:`Complete`.
+* The `close` *map-type-modifier*.
 
-* #pragma omp target parallel for [simd]: :good:`Complete`.
-
-* #pragma omp target simd: :good:`Complete`.
-
-* #pragma omp target teams: :good:`Complete`.
-
-* #pragma omp teams distribute [simd]: :good:`Complete`.
-
-* #pragma omp target teams distribute [simd]: :good:`Complete`.
-
-* #pragma omp teams distribute parallel for [simd]: :good:`Complete`.
-
-* #pragma omp target teams distribute parallel for [simd]: :good:`Complete`.
+Clang fully supports OpenMP 4.5. Clang supports offloading to X86_64, AArch64,
+PPC64[LE] and has `basic support for Cuda devices`_.
 
-Clang does not support any constructs/updates from OpenMP 5.0 except
-for `reduction`-based clauses in the `task` and `target`-based directives.
+* #pragma omp declare simd: :partial:`Partial`.  We support parsing/semantic
+  analysis + generation of special attributes for X86 target, but still
+  missing the LLVM pass for vectorization.
 
 In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools
-Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and mac OS.
+Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS.
+
+General improvements
+--------------------
+- New collapse clause scheme to avoid expensive remainder operations.
+  Compute loop index variables after collapsing a loop nest via the
+  collapse clause by replacing the expensive remainder operation with
+  multiplications and additions.
+
+- The default schedules for the `distribute` and `for` constructs in a
+  parallel region and in SPMD mode have changed to ensure coalesced
+  accesses. For the `distribute` construct, a static schedule is used
+  with a chunk size equal to the number of threads per team (default
+  value of threads or as specified by the `thread_limit` clause if
+  present). For the `for` construct, the schedule is static with chunk
+  size of one.
+  
+- Simplified SPMD code generation for `distribute parallel for` when
+  the new default schedules are applicable.
 
 .. _basic support for Cuda devices:
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b6a405dbc78b2e..50bf636a51f437 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -127,6 +127,10 @@ Non-comprehensive list of changes in this release
   manually and rely on the old behaviour you will need to add appropriate
   compiler flags for finding the corresponding libc++ include directory.
 
+- The integrated assembler is used now by default for all MIPS targets.
+
+- Improved support for MIPS N32 ABI and MIPS R6 target triples.
+
 New Compiler Flags
 ------------------
 
@@ -136,6 +140,13 @@ New Compiler Flags
   instrumenting for gcov-based profiling.
   See the :doc:`UsersManual` for details.
 
+- When using a custom stack alignment, the ``stackrealign`` attribute is now
+  implicitly set on the main function.
+
+- Emission of ``R_MIPS_JALR`` and ``R_MICROMIPS_JALR`` relocations can now
+  be controlled by the ``-mrelax-pic-calls`` and ``-mno-relax-pic-calls``
+  options.
+
 - ...
 
 Deprecated Compiler Flags
@@ -179,6 +190,15 @@ Windows Support
   `dllexport` and `dllimport` attributes not apply to inline member functions.
   This can significantly reduce compile and link times. See the `User's Manual
   <UsersManual.html#the-zc-dllexportinlines-option>`_ for more info.
+
+- For MinGW, ``-municode`` now correctly defines ``UNICODE`` during
+  preprocessing.
+
+- For MinGW, clang now produces vtables and RTTI for dllexported classes
+  without key functions. This fixes building Qt in debug mode.
+
+- Allow using Address Sanitizer and Undefined Behaviour Sanitizer on MinGW.
+
 - ...
 
 
@@ -233,12 +253,15 @@ ABI Changes in Clang
 OpenMP Support in Clang
 ----------------------------------
 
-- Support relational-op != (not-equal) as one of the canonical forms of random
-  access iterator.
-
-- Added support for mapping of the lambdas in target regions.
+- OpenMP 5.0 features
 
-- Added parsing/sema analysis for OpenMP 5.0 requires directive.
+  - Support relational-op != (not-equal) as one of the canonical forms of random
+    access iterator.
+  - Added support for mapping of the lambdas in target regions.
+  - Added parsing/sema analysis for the requires directive.
+  - Support nested declare target directives.
+  - Make the `this` pointer implicitly mapped as `map(this[:1])`.
+  - Added the `close` *map-type-modifier*.
 
 - Various bugfixes and improvements.
 
@@ -250,6 +273,15 @@ New features supported for Cuda devices:
 
 - Fixed support for lastprivate/reduction variables in SPMD constructs.
 
+- New collapse clause scheme to avoid expensive remainder operations.
+
+- New default schedule for distribute and parallel constructs.
+
+- Simplified code generation for distribute and parallel in SPMD mode.
+
+- Flag (``-fopenmp_optimistic_collapse``) for user to limit collapsed
+  loop counter width when safe to do so.
+
 - General performance improvement.
 
 CUDA Support in Clang
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 1fe1dd39948a1b..bf1068019b77bd 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -329,6 +329,7 @@ def TargetMSP430 : TargetArch<["msp430"]>;
 def TargetRISCV : TargetArch<["riscv32", "riscv64"]>;
 def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
+def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
 def TargetWindows : TargetArch<["x86", "x86_64", "arm", "thumb", "aarch64"]> {
   let OSes = ["Win32"];
 }
@@ -1500,6 +1501,22 @@ def AMDGPUNumVGPR : InheritableAttr {
   let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
+def WebAssemblyImportModule : InheritableAttr,
+                              TargetSpecificAttr<TargetWebAssembly> {
+  let Spellings = [Clang<"import_module">];
+  let Args = [StringArgument<"ImportModule">];
+  let Documentation = [WebAssemblyImportModuleDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+}
+
+def WebAssemblyImportName : InheritableAttr,
+                            TargetSpecificAttr<TargetWebAssembly> {
+  let Spellings = [Clang<"import_name">];
+  let Args = [StringArgument<"ImportName">];
+  let Documentation = [WebAssemblyImportNameDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+}
+
 def NoSplitStack : InheritableAttr {
   let Spellings = [GCC<"no_split_stack">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 5773a92c9c15dd..94c8343d2368f5 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3652,7 +3652,40 @@ definition (
 For more information see
 `gcc documentation <https://gcc.gnu.org/onlinedocs/gcc-7.2.0/gcc/Microsoft-Windows-Variable-Attributes.html>`_
 or `msvc documentation <https://docs.microsoft.com/pl-pl/cpp/cpp/selectany>`_.
-}];
+}]; }
+
+def WebAssemblyImportModuleDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the ``__attribute__((import_module(<module_name>)))`` 
+attribute for the WebAssembly target. This attribute may be attached to a
+function declaration, where it modifies how the symbol is to be imported
+within the WebAssembly linking environment.
+
+WebAssembly imports use a two-level namespace scheme, consisting of a module
+name, which typically identifies a module from which to import, and a field
+name, which typically identifies a field from that module to import. By
+default, module names for C/C++ symbols are assigned automatically by the
+linker. This attribute can be used to override the default behavior, and
+reuqest a specific module name be used instead.
+  }];
+}
+
+def WebAssemblyImportNameDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the ``__attribute__((import_name(<name>)))`` 
+attribute for the WebAssembly target. This attribute may be attached to a
+function declaration, where it modifies how the symbol is to be imported
+within the WebAssembly linking environment.
+
+WebAssembly imports use a two-level namespace scheme, consisting of a module
+name, which typically identifies a module from which to import, and a field
+name, which typically identifies a field from that module to import. By
+default, field names for C/C++ symbols are the same as their C/C++ symbol
+names. This attribute can be used to override the default behavior, and
+reuqest a specific field name be used instead.
+  }];
 }
 
 def ArtificialDocs : Documentation {
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 1892ff11a31dc8..054662e6883174 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -33,7 +33,7 @@ BUILTIN(__builtin_arm_clrex, "v", "")
 
 // Bit manipulation
 BUILTIN(__builtin_arm_rbit, "UiUi", "nc")
-BUILTIN(__builtin_arm_rbit64, "LUiLUi", "nc")
+BUILTIN(__builtin_arm_rbit64, "WUiWUi", "nc")
 
 // HINT
 BUILTIN(__builtin_arm_nop, "v", "")
@@ -50,8 +50,8 @@ BUILTIN(__builtin_arm_crc32h, "UiUiUs", "nc")
 BUILTIN(__builtin_arm_crc32ch, "UiUiUs", "nc")
 BUILTIN(__builtin_arm_crc32w, "UiUiUi", "nc")
 BUILTIN(__builtin_arm_crc32cw, "UiUiUi", "nc")
-BUILTIN(__builtin_arm_crc32d, "UiUiLUi", "nc")
-BUILTIN(__builtin_arm_crc32cd, "UiUiLUi", "nc")
+BUILTIN(__builtin_arm_crc32d, "UiUiWUi", "nc")
+BUILTIN(__builtin_arm_crc32cd, "UiUiWUi", "nc")
 
 // Memory barrier
 BUILTIN(__builtin_arm_dmb, "vUi", "nc")
@@ -63,10 +63,10 @@ BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
 
 // System Registers
 BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
-BUILTIN(__builtin_arm_rsr64, "LUicC*", "nc")
+BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc")
 BUILTIN(__builtin_arm_rsrp, "v*cC*", "nc")
 BUILTIN(__builtin_arm_wsr, "vcC*Ui", "nc")
-BUILTIN(__builtin_arm_wsr64, "vcC*LUi", "nc")
+BUILTIN(__builtin_arm_wsr64, "vcC*WUi", "nc")
 BUILTIN(__builtin_arm_wsrp, "vcC*vC*", "nc")
 
 // MSVC
@@ -204,8 +204,8 @@ TARGET_HEADER_BUILTIN(_InterlockedDecrement64_rel, "LLiLLiD*", "nh", "intrin.h",
 
 TARGET_HEADER_BUILTIN(_ReadWriteBarrier, "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(__getReg, "ULLii", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_ReadStatusReg,  "ii",  "nh", "intrin.h", ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_WriteStatusReg, "vii", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_ReadStatusReg,  "LLii",  "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
 #undef BUILTIN
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 5feb877e46c51d..b71f65d146cab5 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -274,6 +274,10 @@ def warn_riscv_interrupt_attribute : Warning<
    "RISC-V 'interrupt' attribute only applies to functions that have "
    "%select{no parameters|a 'void' return type}0">,
    InGroup<IgnoredAttributes>;
+def warn_msp430_interrupt_attribute : Warning<
+   "MSP430 'interrupt' attribute only applies to functions that have "
+   "%select{no parameters|a 'void' return type}0">,
+   InGroup<IgnoredAttributes>;
 def warn_unused_parameter : Warning<"unused parameter %0">,
   InGroup<UnusedParameter>, DefaultIgnore;
 def warn_unused_variable : Warning<"unused variable %0">,
diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def
index 05464ed85f1360..8b3b59b51c5424 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -96,7 +96,7 @@ FEATURE(objc_arc, LangOpts.ObjCAutoRefCount)
 FEATURE(objc_arc_fields, true)
 FEATURE(objc_arc_weak, LangOpts.ObjCWeak)
 FEATURE(objc_default_synthesize_properties, LangOpts.ObjC)
-FEATURE(objc_fixed_enum, true)
+FEATURE(objc_fixed_enum, LangOpts.ObjC)
 FEATURE(objc_instancetype, LangOpts.ObjC)
 FEATURE(objc_kindof, LangOpts.ObjC)
 FEATURE(objc_modules, LangOpts.ObjC && LangOpts.Modules)
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index e7891baf530479..6a71289f74c1de 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -70,15 +70,14 @@ class FileEntry {
   bool IsNamedPipe;
   bool InPCH;
   bool IsValid;               // Is this \c FileEntry initialized and valid?
-  bool DeferredOpen;          // Created by getFile(OpenFile=0); may open later.
 
   /// The open file, if it is owned by the \p FileEntry.
   mutable std::unique_ptr<llvm::vfs::File> File;
 
 public:
   FileEntry()
-      : UniqueID(0, 0), IsNamedPipe(false), InPCH(false), IsValid(false),
-        DeferredOpen(false) {}
+      : UniqueID(0, 0), IsNamedPipe(false), InPCH(false), IsValid(false)
+  {}
 
   FileEntry(const FileEntry &) = delete;
   FileEntry &operator=(const FileEntry &) = delete;
diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h
index cc4e9922dca03c..c76fa88092b9be 100644
--- a/clang/include/clang/Basic/OpenCLOptions.h
+++ b/clang/include/clang/Basic/OpenCLOptions.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_BASIC_OPENCLOPTIONS_H
 #define LLVM_CLANG_BASIC_OPENCLOPTIONS_H
 
+#include "clang/Basic/LangOptions.h"
 #include "llvm/ADT/StringMap.h"
 
 namespace clang {
@@ -42,25 +43,29 @@ class OpenCLOptions {
 
   // Is supported as either an extension or an (optional) core feature for
   // OpenCL version \p CLVer.
-  bool isSupported(llvm::StringRef Ext, unsigned CLVer) const {
+  bool isSupported(llvm::StringRef Ext, LangOptions LO) const {
+    // In C++ mode all extensions should work at least as in v2.0.
+    auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
     auto I = OptMap.find(Ext)->getValue();
     return I.Supported && I.Avail <= CLVer;
   }
 
   // Is supported (optional) OpenCL core features for OpenCL version \p CLVer.
   // For supported extension, return false.
-  bool isSupportedCore(llvm::StringRef Ext, unsigned CLVer) const {
+  bool isSupportedCore(llvm::StringRef Ext, LangOptions LO) const {
+    // In C++ mode all extensions should work at least as in v2.0.
+    auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
     auto I = OptMap.find(Ext)->getValue();
-    return I.Supported && I.Avail <= CLVer &&
-      I.Core != ~0U && CLVer >= I.Core;
+    return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core;
   }
 
   // Is supported OpenCL extension for OpenCL version \p CLVer.
   // For supported (optional) core feature, return false.
- bool isSupportedExtension(llvm::StringRef Ext, unsigned CLVer) const {
+  bool isSupportedExtension(llvm::StringRef Ext, LangOptions LO) const {
+    // In C++ mode all extensions should work at least as in v2.0.
+    auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
     auto I = OptMap.find(Ext)->getValue();
-    return I.Supported && I.Avail <= CLVer &&
-      (I.Core == ~0U || CLVer < I.Core);
+    return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core);
   }
 
   void enable(llvm::StringRef Ext, bool V = true) {
@@ -122,10 +127,10 @@ class OpenCLOptions {
       I->second.Enabled = false;
   }
 
-  void enableSupportedCore(unsigned CLVer) {
-    for (llvm::StringMap<Info>::iterator I = OptMap.begin(),
-         E = OptMap.end(); I != E; ++I)
-      if (isSupportedCore(I->getKey(), CLVer))
+  void enableSupportedCore(LangOptions LO) {
+    for (llvm::StringMap<Info>::iterator I = OptMap.begin(), E = OptMap.end();
+         I != E; ++I)
+      if (isSupportedCore(I->getKey(), LO))
         I->second.Enabled = true;
   }
 
@@ -133,6 +138,6 @@ class OpenCLOptions {
   friend class ASTReader;
 };
 
-}  // end namespace clang
+} // end namespace clang
 
 #endif
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 786b1c251ca822..1e835d992bbeb8 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -64,6 +64,7 @@ class TargetInfo : public RefCountedBase<TargetInfo> {
   bool HasLegalHalfType; // True if the backend supports operations on the half
                          // LLVM IR type.
   bool HasFloat128;
+  bool HasFloat16;
   unsigned char PointerWidth, PointerAlign;
   unsigned char BoolWidth, BoolAlign;
   unsigned char IntWidth, IntAlign;
@@ -517,6 +518,9 @@ class TargetInfo : public RefCountedBase<TargetInfo> {
   /// Determine whether the __float128 type is supported on this target.
   virtual bool hasFloat128Type() const { return HasFloat128; }
 
+  /// Determine whether the _Float16 type is supported on this target.
+  virtual bool hasFloat16Type() const { return HasFloat16; }
+
   /// Return the alignment that is suitable for storing any
   /// object with a fundamental alignment requirement.
   unsigned getSuitableAlign() const { return SuitableAlign; }
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f02a7190f5a7a9..d02d9744d78d43 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -552,9 +552,9 @@ def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">,
   HelpText<"Compile CUDA code for both host and device (default).  Has no "
            "effect on non-CUDA compilations.">;
 def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[DriverOption]>,
-  HelpText<"Include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
+  HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
 def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>,
-  HelpText<"Do not include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
+  HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
 def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
   HelpText<"CUDA GPU architecture (e.g. sm_35).  May be specified more than once.">;
 def hip_link : Flag<["--"], "hip-link">,
@@ -2418,6 +2418,14 @@ def modd_spreg : Flag<["-"], "modd-spreg">, Group<m_mips_Features_Group>,
 def mno_odd_spreg : Flag<["-"], "mno-odd-spreg">, Group<m_mips_Features_Group>,
   HelpText<"Disable odd single-precision floating point registers">,
   Flags<[HelpHidden]>;
+def mrelax_pic_calls : Flag<["-"], "mrelax-pic-calls">,
+  Group<m_mips_Features_Group>,
+  HelpText<"Try turning PIC calls (j{al}r{c} $25) into direct calls "
+  "(MIPS only)">, Flags<[HelpHidden]>;
+def mno_relax_pic_calls : Flag<["-"], "mno-relax-pic-calls">,
+  Group<m_mips_Features_Group>,
+  HelpText<"Do not try turning PIC calls (j{al}r{c} $25) into direct calls "
+  "(MIPS only)">, Flags<[HelpHidden]>;
 def mglibc : Flag<["-"], "mglibc">, Group<m_libc_Group>, Flags<[HelpHidden]>;
 def muclibc : Flag<["-"], "muclibc">, Group<m_libc_Group>, Flags<[HelpHidden]>;
 def module_file_info : Flag<["-"], "module-file-info">, Flags<[DriverOption,CC1Option]>, Group<Action_Group>,
diff --git a/clang/include/clang/Tooling/ArgumentsAdjusters.h b/clang/include/clang/Tooling/ArgumentsAdjusters.h
index 94ccf1f34e5760..e31839b9a8f733 100644
--- a/clang/include/clang/Tooling/ArgumentsAdjusters.h
+++ b/clang/include/clang/Tooling/ArgumentsAdjusters.h
@@ -61,6 +61,10 @@ ArgumentsAdjuster getInsertArgumentAdjuster(
     const char *Extra,
     ArgumentInsertPosition Pos = ArgumentInsertPosition::END);
 
+/// Gets an argument adjuster which strips plugin related command line
+/// arguments.
+ArgumentsAdjuster getStripPluginsAdjuster();
+
 /// Gets an argument adjuster which adjusts the arguments in sequence
 /// with the \p First adjuster and then with the \p Second one.
 ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First,
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index f5a2d4894c13e5..01ec1d69cde53f 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -189,21 +189,15 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
       *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first;
 
   // See if there is already an entry in the map.
-  if (NamedFileEnt.second) {
-    if (NamedFileEnt.second == NON_EXISTENT_FILE)
-      return nullptr;
-    // Entry exists: return it *unless* it wasn't opened and open is requested.
-    if (!(NamedFileEnt.second->DeferredOpen && openFile))
-      return NamedFileEnt.second;
-    // We previously stat()ed the file, but didn't open it: do that below.
-    // FIXME: the below does other redundant work too (stats the dir and file).
-  } else {
-    // By default, initialize it to invalid.
-    NamedFileEnt.second = NON_EXISTENT_FILE;
-  }
+  if (NamedFileEnt.second)
+    return NamedFileEnt.second == NON_EXISTENT_FILE ? nullptr
+                                                    : NamedFileEnt.second;
 
   ++NumFileCacheMisses;
 
+  // By default, initialize it to invalid.
+  NamedFileEnt.second = NON_EXISTENT_FILE;
+
   // Get the null-terminated file name as stored as the key of the
   // SeenFileEntries map.
   StringRef InterndFileName = NamedFileEnt.first();
@@ -241,7 +235,6 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
   // It exists.  See if we have already opened a file with the same inode.
   // This occurs when one dir is symlinked to another, for example.
   FileEntry &UFE = UniqueRealFiles[Data.UniqueID];
-  UFE.DeferredOpen = !openFile;
 
   NamedFileEnt.second = &UFE;
 
@@ -258,15 +251,6 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
     InterndFileName = NamedFileEnt.first().data();
   }
 
-  // If we opened the file for the first time, record the resulting info.
-  // Do this even if the cache entry was valid, maybe we didn't previously open.
-  if (F && !UFE.File) {
-    if (auto PathName = F->getName())
-      fillRealPathName(&UFE, *PathName);
-    UFE.File = std::move(F);
-    assert(!UFE.DeferredOpen && "we just opened it!");
-  }
-
   if (UFE.isValid()) { // Already have an entry with this inode, return it.
 
     // FIXME: this hack ensures that if we look up a file by a virtual path in
@@ -297,9 +281,13 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
   UFE.UniqueID = Data.UniqueID;
   UFE.IsNamedPipe = Data.IsNamedPipe;
   UFE.InPCH = Data.InPCH;
+  UFE.File = std::move(F);
   UFE.IsValid = true;
-  // Note File and DeferredOpen were initialized above.
 
+  if (UFE.File) {
+    if (auto PathName = UFE.File->getName())
+      fillRealPathName(&UFE, *PathName);
+  }
   return &UFE;
 }
 
@@ -371,7 +359,6 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size,
   UFE->UID     = NextFileUID++;
   UFE->IsValid = true;
   UFE->File.reset();
-  UFE->DeferredOpen = false;
   return UFE;
 }
 
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 269fad38b8d57f..8b7621d7962e12 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -35,6 +35,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
   NoAsmVariants = false;
   HasLegalHalfType = false;
   HasFloat128 = false;
+  HasFloat16 = false;
   PointerWidth = PointerAlign = 32;
   BoolWidth = BoolAlign = 8;
   IntWidth = IntAlign = 32;
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index cf87bc484621cd..3c139d724796ee 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -570,19 +570,27 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
         Triple.getVendor() != llvm::Triple::UnknownVendor ||
         !Triple.isOSBinFormatWasm())
       return nullptr;
-    if (Triple.getOS() != llvm::Triple::UnknownOS &&
-        Triple.getOS() != llvm::Triple::WASI)
-      return nullptr;
-    return new WebAssemblyOSTargetInfo<WebAssembly32TargetInfo>(Triple, Opts);
+    switch (Triple.getOS()) {
+      case llvm::Triple::WASI:
+        return new WASITargetInfo<WebAssembly32TargetInfo>(Triple, Opts);
+      case llvm::Triple::UnknownOS:
+        return new WebAssemblyOSTargetInfo<WebAssembly32TargetInfo>(Triple, Opts);
+      default:
+        return nullptr;
+    }
   case llvm::Triple::wasm64:
     if (Triple.getSubArch() != llvm::Triple::NoSubArch ||
         Triple.getVendor() != llvm::Triple::UnknownVendor ||
         !Triple.isOSBinFormatWasm())
       return nullptr;
-    if (Triple.getOS() != llvm::Triple::UnknownOS &&
-        Triple.getOS() != llvm::Triple::WASI)
-      return nullptr;
-    return new WebAssemblyOSTargetInfo<WebAssembly64TargetInfo>(Triple, Opts);
+    switch (Triple.getOS()) {
+      case llvm::Triple::WASI:
+        return new WASITargetInfo<WebAssembly64TargetInfo>(Triple, Opts);
+      case llvm::Triple::UnknownOS:
+        return new WebAssemblyOSTargetInfo<WebAssembly64TargetInfo>(Triple, Opts);
+      default:
+        return nullptr;
+    }
 
   case llvm::Triple::renderscript32:
     return new LinuxTargetInfo<RenderScript32TargetInfo>(Triple, Opts);
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 62919a02dcb9fe..6297f23c5aa471 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -50,6 +50,7 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
 
   // All AArch64 implementations support ARMv8 FP, which makes half a legal type.
   HasLegalHalfType = true;
+  HasFloat16 = true;
 
   LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
   MaxVectorAlign = 128;
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 16644ace108b79..23eee34eaa93de 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -397,6 +397,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
   SoftFloat = SoftFloatABI = false;
   HWDiv = 0;
   DotProd = 0;
+  HasFloat16 = true;
 
   // This does not diagnose illegal cases like having both
   // "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp".
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 085efa02cc5f17..09867d82c382e9 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -764,8 +764,9 @@ class LLVM_LIBRARY_VISIBILITY FuchsiaTargetInfo : public OSTargetInfo<Target> {
 template <typename Target>
 class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo
     : public OSTargetInfo<Target> {
+protected:
   void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
-                    MacroBuilder &Builder) const final {
+                    MacroBuilder &Builder) const {
     // A common platform macro.
     if (Opts.POSIXThreads)
       Builder.defineMacro("_REENTRANT");
@@ -783,6 +784,21 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo
   }
 };
 
+// WASI target
+template <typename Target>
+class LLVM_LIBRARY_VISIBILITY WASITargetInfo
+    : public WebAssemblyOSTargetInfo<Target> {
+  void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
+                    MacroBuilder &Builder) const final {
+    WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder);
+    Builder.defineMacro("__wasi__");
+  }
+
+public:
+  explicit WASITargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : WebAssemblyOSTargetInfo<Target>(Triple, Opts) {}
+};
+
 } // namespace targets
 } // namespace clang
 #endif // LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 058970a0e098b5..cbe7a9a2fa85ec 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -331,9 +331,15 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo {
       break;
     }
 
-    if (getTriple().isOSFreeBSD()) {
+    switch (getTriple().getOS()) {
+    case llvm::Triple::FreeBSD:
+    case llvm::Triple::NetBSD:
+    case llvm::Triple::OpenBSD:
       LongDoubleWidth = LongDoubleAlign = 64;
       LongDoubleFormat = &llvm::APFloat::IEEEdouble();
+      break;
+    default:
+      break;
     }
 
     // PPC32 supports atomics up to 4 bytes.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 9815292fc276da..e8d92f11a1224c 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -48,6 +48,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo {
     AddrSpaceMap = &SPIRAddrSpaceMap;
     UseAddrSpaceMapMangling = true;
     HasLegalHalfType = true;
+    HasFloat16 = true;
     // Define available target features
     // These must be defined in sorted order!
     NoAsmVariants = true;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a718f2f19aa65c..ccc657493b28b1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7052,19 +7052,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
 
     llvm::Type *RegisterType = Int64Ty;
-    llvm::Type *ValueType = Int32Ty;
     llvm::Type *Types[] = { RegisterType };
 
     if (BuiltinID == AArch64::BI_ReadStatusReg) {
       llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
-      llvm::Value *Call = Builder.CreateCall(F, Metadata);
 
-      return Builder.CreateTrunc(Call, ValueType);
+      return Builder.CreateCall(F, Metadata);
     }
 
     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
     llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
-    ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
 
     return Builder.CreateCall(F, { Metadata, ArgValue });
   }
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 5959d889b45516..b98657ffd8006c 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1631,11 +1631,15 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
                   ? LangOptions::TrivialAutoVarInitKind::Uninitialized
                   : getContext().getLangOpts().getTrivialAutoVarInit()));
 
-  auto initializeWhatIsTechnicallyUninitialized = [&]() {
+  auto initializeWhatIsTechnicallyUninitialized = [&](Address Loc) {
     if (trivialAutoVarInit ==
         LangOptions::TrivialAutoVarInitKind::Uninitialized)
       return;
 
+    // Only initialize a __block's storage: we always initialize the header.
+    if (emission.IsEscapingByRef)
+      Loc = emitBlockByrefAddress(Loc, &D, /*follow=*/false);
+
     CharUnits Size = getContext().getTypeSizeInChars(type);
     if (!Size.isZero()) {
       switch (trivialAutoVarInit) {
@@ -1713,7 +1717,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   };
 
   if (isTrivialInitializer(Init)) {
-    initializeWhatIsTechnicallyUninitialized();
+    initializeWhatIsTechnicallyUninitialized(Loc);
     return;
   }
 
@@ -1727,7 +1731,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   }
 
   if (!constant) {
-    initializeWhatIsTechnicallyUninitialized();
+    initializeWhatIsTechnicallyUninitialized(Loc);
     LValue lv = MakeAddrLValue(Loc, type);
     lv.setNonGC(true);
     return EmitExprAsInit(Init, &D, lv, capturedByInit);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 244738042cef09..2ac59fb4de25f0 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3762,13 +3762,15 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
     }
   }
 
-  // Microsoft's link.exe doesn't support alignments greater than 32 for common
-  // symbols, so symbols with greater alignment requirements cannot be common.
+  // Microsoft's link.exe doesn't support alignments greater than 32 bytes for
+  // common symbols, so symbols with greater alignment requirements cannot be
+  // common.
   // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two
   // alignments for common symbols via the aligncomm directive, so this
   // restriction only applies to MSVC environments.
   if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() &&
-      Context.getTypeAlignIfKnown(D->getType()) > 32)
+      Context.getTypeAlignIfKnown(D->getType()) >
+          Context.toBits(CharUnits::fromQuantity(32)))
     return true;
 
   return false;
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index b53304528c3d82..c56875a0368099 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2463,10 +2463,12 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
     CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
 
   // Always resolve references to the wrapper at link time.
-  if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) &&
-      !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) &&
-      !llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage())))
-    Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  if (!Wrapper->hasLocalLinkage())
+    if (!isThreadWrapperReplaceable(VD, CGM) ||
+        llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) ||
+        llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage()) ||
+        VD->getVisibility() == HiddenVisibility)
+      Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
 
   if (isThreadWrapperReplaceable(VD, CGM)) {
     Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 89ec73670a7350..94fccb15ff6ec8 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -761,6 +761,22 @@ class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
+    TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+      if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) {
+        llvm::Function *Fn = cast<llvm::Function>(GV);
+        llvm::AttrBuilder B;
+        B.addAttribute("wasm-import-module", Attr->getImportModule());
+        Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+      }
+      if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) {
+        llvm::Function *Fn = cast<llvm::Function>(GV);
+        llvm::AttrBuilder B;
+        B.addAttribute("wasm-import-name", Attr->getImportName());
+        Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+      }
+    }
+
     if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
       llvm::Function *Fn = cast<llvm::Function>(GV);
       if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype())
@@ -6774,21 +6790,19 @@ void MSP430TargetCodeGenInfo::setTargetAttributes(
   if (GV->isDeclaration())
     return;
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
-    if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) {
-      // Handle 'interrupt' attribute:
-      llvm::Function *F = cast<llvm::Function>(GV);
+    const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>();
+    if (!InterruptAttr)
+      return;
 
-      // Step 1: Set ISR calling convention.
-      F->setCallingConv(llvm::CallingConv::MSP430_INTR);
+    // Handle 'interrupt' attribute:
+    llvm::Function *F = cast<llvm::Function>(GV);
 
-      // Step 2: Add attributes goodness.
-      F->addFnAttr(llvm::Attribute::NoInline);
+    // Step 1: Set ISR calling convention.
+    F->setCallingConv(llvm::CallingConv::MSP430_INTR);
 
-      // Step 3: Emit ISR vector alias.
-      unsigned Num = attr->getNumber() / 2;
-      llvm::GlobalAlias::create(llvm::Function::ExternalLinkage,
-                                "__isr_" + Twine(Num), F);
-    }
+    // Step 2: Add attributes goodness.
+    F->addFnAttr(llvm::Attribute::NoInline);
+    F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber()));
   }
 }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 75f16898dfaf66..589f53b119217f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1716,6 +1716,14 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args,
     } else
       D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName;
   }
+
+  if (Arg *A = Args.getLastArg(options::OPT_mrelax_pic_calls,
+                               options::OPT_mno_relax_pic_calls)) {
+    if (A->getOption().matches(options::OPT_mno_relax_pic_calls)) {
+      CmdArgs.push_back("-mllvm");
+      CmdArgs.push_back("-mips-jalr-reloc=0");
+    }
+  }
 }
 
 void Clang::AddPPCTargetArgs(const ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 7e34b0df5c8cfa..a164fd68e22e7c 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -1408,10 +1408,10 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL,
           DAL.AddFlagArg(
               A, Opts.getOption(options::OPT_fno_omit_frame_pointer));
       } else {
-        // Don't warn about /Oy- in 64-bit builds (where
+        // Don't warn about /Oy- in x86-64 builds (where
         // SupportsForcingFramePointer is false).  The flag having no effect
         // there is a compiler-internal optimization, and people shouldn't have
-        // to special-case their build files for 64-bit clang-cl.
+        // to special-case their build files for x86-64 clang-cl.
         A->claim();
       }
       break;
@@ -1442,8 +1442,8 @@ MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
   DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
   const OptTable &Opts = getDriver().getOpts();
 
-  // /Oy and /Oy- only has an effect under X86-32.
-  bool SupportsForcingFramePointer = getArch() == llvm::Triple::x86;
+  // /Oy and /Oy- don't have an effect on X86-64
+  bool SupportsForcingFramePointer = getArch() != llvm::Triple::x86_64;
 
   // The -O[12xd] flag actually expands to several flags.  We must desugar the
   // flags so that options embedded can be negated.  For example, the '-O2' flag
diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp
index b1321cacaf7a3a..c1eae5b05acec6 100644
--- a/clang/lib/Driver/ToolChains/NetBSD.cpp
+++ b/clang/lib/Driver/ToolChains/NetBSD.cpp
@@ -256,6 +256,13 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
 
+  const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs();
+  if (SanArgs.needsSharedRt()) {
+    CmdArgs.push_back("-rpath");
+    CmdArgs.push_back(Args.MakeArgString(
+        ToolChain.getCompilerRTPath().c_str()));
+  }
+
   unsigned Major, Minor, Micro;
   ToolChain.getTriple().getOSVersion(Major, Minor, Micro);
   bool useLibgcc = true;
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 66807b097d4075..4cde22ce9aa445 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1059,10 +1059,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
 
   // OpenCL definitions.
   if (LangOpts.OpenCL) {
-#define OPENCLEXT(Ext) \
-    if (TI.getSupportedOpenCLOpts().isSupported(#Ext, \
-        LangOpts.OpenCLVersion)) \
-      Builder.defineMacro(#Ext);
+#define OPENCLEXT(Ext)                                                         \
+  if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts))                 \
+    Builder.defineMacro(#Ext);
 #include "clang/Basic/OpenCLExtensions.def"
 
     auto Arch = TI.getTriple().getArch();
diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h
index c86f41faeb88a9..966258bab4b329 100644
--- a/clang/lib/Headers/intrin.h
+++ b/clang/lib/Headers/intrin.h
@@ -564,8 +564,8 @@ __nop(void) {
 #if defined(__aarch64__)
 unsigned __int64 __getReg(int);
 long _InterlockedAdd(long volatile *Addend, long Value);
-int _ReadStatusReg(int);
-void _WriteStatusReg(int, int);
+__int64 _ReadStatusReg(int);
+void _WriteStatusReg(int, __int64);
 
 static inline unsigned short _byteswap_ushort (unsigned short val) {
   return __builtin_bswap16(val);
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 160bae807174c3..3d3dfb74905f65 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -14470,7 +14470,7 @@ half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);
 #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf
 
-int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
+int printf(__constant const char* st, ...);
 #endif
 
 // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index fa0815eb9c6c5c..3c1da075b0e268 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -617,10 +617,11 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       if (isHalf || isFloat || isLong || isFloat128)
         break; // HF, FF, LF, QF invalid.
 
-      if (s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
-          s += 2; // success, eat up 2 characters.
-          isFloat16 = true;
-          continue;
+      if (PP.getTargetInfo().hasFloat16Type() && s + 2 < ThisTokEnd &&
+          s[1] == '1' && s[2] == '6') {
+        s += 2; // success, eat up 2 characters.
+        isFloat16 = true;
+        continue;
       }
 
       isFloat = true;
diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp
index 380eb64997a711..7e9b1011e81af2 100644
--- a/clang/lib/Parse/ParsePragma.cpp
+++ b/clang/lib/Parse/ParsePragma.cpp
@@ -693,13 +693,12 @@ void Parser::HandlePragmaOpenCLExtension() {
   if (Name == "all") {
     if (State == Disable) {
       Opt.disableAll();
-      Opt.enableSupportedCore(getLangOpts().OpenCLVersion);
+      Opt.enableSupportedCore(getLangOpts());
     } else {
       PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1;
     }
   } else if (State == Begin) {
-    if (!Opt.isKnown(Name) ||
-        !Opt.isSupported(Name, getLangOpts().OpenCLVersion)) {
+    if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) {
       Opt.support(Name);
     }
     Actions.setCurrentOpenCLExtension(Name);
@@ -709,9 +708,9 @@ void Parser::HandlePragmaOpenCLExtension() {
     Actions.setCurrentOpenCLExtension("");
   } else if (!Opt.isKnown(Name))
     PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident;
-  else if (Opt.isSupportedExtension(Name, getLangOpts().OpenCLVersion))
+  else if (Opt.isSupportedExtension(Name, getLangOpts()))
     Opt.enable(Name, State == Enable);
-  else if (Opt.isSupportedCore(Name, getLangOpts().OpenCLVersion))
+  else if (Opt.isSupportedCore(Name, getLangOpts()))
     PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident;
   else
     PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident;
diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp
index 9b96c5150e5695..3f5af7d44f36ce 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -637,7 +637,7 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
   // Filter out "fpsw" and "mxcsr". They aren't valid GCC asm clobber
   // constraints. Clang always adds fpsr to the clobber list anyway.
   llvm::erase_if(Clobbers, [](const std::string &C) {
-    return C == "fpsw" || C == "mxcsr";
+    return C == "fpsr" || C == "mxcsr";
   });
 
   // Build the vector of clobber StringRefs.
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 9fa39968625a60..9d33ec51909240 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -256,11 +256,12 @@ void Sema::Initialize() {
   // Initialize predefined OpenCL types and supported extensions and (optional)
   // core features.
   if (getLangOpts().OpenCL) {
-    getOpenCLOptions().addSupport(Context.getTargetInfo().getSupportedOpenCLOpts());
-    getOpenCLOptions().enableSupportedCore(getLangOpts().OpenCLVersion);
+    getOpenCLOptions().addSupport(
+        Context.getTargetInfo().getSupportedOpenCLOpts());
+    getOpenCLOptions().enableSupportedCore(getLangOpts());
     addImplicitTypedef("sampler_t", Context.OCLSamplerTy);
     addImplicitTypedef("event_t", Context.OCLEventTy);
-    if (getLangOpts().OpenCLVersion >= 200) {
+    if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) {
       addImplicitTypedef("clk_event_t", Context.OCLClkEventTy);
       addImplicitTypedef("queue_t", Context.OCLQueueTy);
       addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy);
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index d9f007a46da5e0..980d7b455ecec8 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -681,7 +681,8 @@ QualType clang::getDeclUsageType(ASTContext &C, const NamedDecl *ND) {
     T = Property->getType();
   else if (const auto *Value = dyn_cast<ValueDecl>(ND))
     T = Value->getType();
-  else
+
+  if (T.isNull())
     return QualType();
 
   // Dig through references, function pointers, and block pointers to
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 0e10804a2ec761..8819f0396a270d 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5377,6 +5377,27 @@ static void handleARMInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
 }
 
 static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  // MSP430 'interrupt' attribute is applied to
+  // a function with no parameters and void return type.
+  if (!isFunctionOrMethod(D)) {
+    S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
+        << "'interrupt'" << ExpectedFunctionOrMethod;
+    return;
+  }
+
+  if (hasFunctionProto(D) && getFunctionOrMethodNumParams(D) != 0) {
+    S.Diag(D->getLocation(), diag::warn_msp430_interrupt_attribute)
+        << 0;
+    return;
+  }
+
+  if (!getFunctionOrMethodResultType(D)->isVoidType()) {
+    S.Diag(D->getLocation(), diag::warn_msp430_interrupt_attribute)
+        << 1;
+    return;
+  }
+
+  // The attribute takes one integer argument.
   if (!checkAttributeNumArgs(S, AL, 1))
     return;
 
@@ -5386,8 +5407,6 @@ static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  // FIXME: Check for decl - it should be void ()(void).
-
   Expr *NumParamsExpr = static_cast<Expr *>(AL.getArgAsExpr(0));
   llvm::APSInt NumParams(32);
   if (!NumParamsExpr->isIntegerConstantExpr(NumParams, S.Context)) {
@@ -5396,9 +5415,9 @@ static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
         << NumParamsExpr->getSourceRange();
     return;
   }
-
+  // The argument should be in range 0..63.
   unsigned Num = NumParams.getLimitedValue(255);
-  if ((Num & 1) || Num > 30) {
+  if (Num > 63) {
     S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
         << AL << (int)NumParams.getSExtValue()
         << NumParamsExpr->getSourceRange();
@@ -5558,6 +5577,51 @@ static void handleAVRSignalAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   handleSimpleAttribute<AVRSignalAttr>(S, D, AL);
 }
 
+static void handleWebAssemblyImportModuleAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (!isFunctionOrMethod(D)) {
+    S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
+        << "'import_module'" << ExpectedFunction;
+    return;
+  }
+
+  auto *FD = cast<FunctionDecl>(D);
+  if (FD->isThisDeclarationADefinition()) {
+    S.Diag(D->getLocation(), diag::err_alias_is_definition) << FD << 0;
+    return;
+  }
+
+  StringRef Str;
+  SourceLocation ArgLoc;
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &ArgLoc))
+    return;
+
+  FD->addAttr(::new (S.Context) WebAssemblyImportModuleAttr(
+      AL.getRange(), S.Context, Str,
+      AL.getAttributeSpellingListIndex()));
+}
+
+static void handleWebAssemblyImportNameAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (!isFunctionOrMethod(D)) {
+    S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
+        << "'import_name'" << ExpectedFunction;
+    return;
+  }
+
+  auto *FD = cast<FunctionDecl>(D);
+  if (FD->isThisDeclarationADefinition()) {
+    S.Diag(D->getLocation(), diag::err_alias_is_definition) << FD << 0;
+    return;
+  }
+
+  StringRef Str;
+  SourceLocation ArgLoc;
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &ArgLoc))
+    return;
+
+  FD->addAttr(::new (S.Context) WebAssemblyImportNameAttr(
+      AL.getRange(), S.Context, Str,
+      AL.getAttributeSpellingListIndex()));
+}
 
 static void handleRISCVInterruptAttr(Sema &S, Decl *D,
                                      const ParsedAttr &AL) {
@@ -6311,6 +6375,12 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case ParsedAttr::AT_AVRSignal:
     handleAVRSignalAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_WebAssemblyImportModule:
+    handleWebAssemblyImportModuleAttr(S, D, AL);
+    break;
+  case ParsedAttr::AT_WebAssemblyImportName:
+    handleWebAssemblyImportNameAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_IBAction:
     handleSimpleAttribute<IBActionAttr>(S, D, AL);
     break;
@@ -7346,13 +7416,11 @@ ShouldDiagnoseAvailabilityInContext(Sema &S, AvailabilityResult K,
         return true;
     } else if (K == AR_Unavailable) {
       // It is perfectly fine to refer to an 'unavailable' Objective-C method
-      // when it's actually defined and is referenced from within the
-      // @implementation itself. In this context, we interpret unavailable as a
-      // form of access control.
+      // when it is referenced from within the @implementation itself. In this
+      // context, we interpret unavailable as a form of access control.
       if (const auto *MD = dyn_cast<ObjCMethodDecl>(OffendingDecl)) {
         if (const auto *Impl = dyn_cast<ObjCImplDecl>(C)) {
-          if (MD->getClassInterface() == Impl->getClassInterface() &&
-              MD->isDefined())
+          if (MD->getClassInterface() == Impl->getClassInterface())
             return true;
         }
       }
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 43b289d8d0de13..8b3556f715bfdc 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -1301,6 +1301,10 @@ static DeclAccessPair findDecomposableBaseClass(Sema &S, SourceLocation Loc,
 static bool checkMemberDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
                                      ValueDecl *Src, QualType DecompType,
                                      const CXXRecordDecl *OrigRD) {
+  if (S.RequireCompleteType(Src->getLocation(), DecompType,
+                            diag::err_incomplete_type))
+    return true;
+
   CXXCastPath BasePath;
   DeclAccessPair BasePair =
       findDecomposableBaseClass(S, Src->getLocation(), OrigRD, BasePath);
@@ -5886,9 +5890,6 @@ static bool canPassInRegisters(Sema &S, CXXRecordDecl *D,
   if (D->isDependentType() || D->isInvalidDecl())
     return false;
 
-  if (D->hasAttr<TrivialABIAttr>())
-    return true;
-
   // Clang <= 4 used the pre-C++11 rule, which ignores move operations.
   // The PS4 platform ABI follows the behavior of Clang 3.2.
   if (CCK == TargetInfo::CCK_ClangABI4OrPS4)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index d5416d4d057c73..2bcd47abe35699 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -738,33 +738,20 @@ ExprResult Sema::DefaultArgumentPromotion(Expr *E) {
     return ExprError();
   E = Res.get();
 
-  QualType ScalarTy = Ty;
-  unsigned NumElts = 0;
-  if (const ExtVectorType *VecTy = Ty->getAs<ExtVectorType>()) {
-    NumElts = VecTy->getNumElements();
-    ScalarTy = VecTy->getElementType();
-  }
-
   // If this is a 'float'  or '__fp16' (CVR qualified or typedef)
   // promote to double.
   // Note that default argument promotion applies only to float (and
   // half/fp16); it does not apply to _Float16.
-  const BuiltinType *BTy = ScalarTy->getAs<BuiltinType>();
+  const BuiltinType *BTy = Ty->getAs<BuiltinType>();
   if (BTy && (BTy->getKind() == BuiltinType::Half ||
               BTy->getKind() == BuiltinType::Float)) {
     if (getLangOpts().OpenCL &&
         !getOpenCLOptions().isEnabled("cl_khr_fp64")) {
-      if (BTy->getKind() == BuiltinType::Half) {
-        QualType Ty = Context.FloatTy;
-        if (NumElts != 0)
-          Ty = Context.getExtVectorType(Ty, NumElts);
-        E = ImpCastExprToType(E, Ty, CK_FloatingCast).get();
-      }
+        if (BTy->getKind() == BuiltinType::Half) {
+            E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get();
+        }
     } else {
-      QualType Ty = Context.DoubleTy;
-      if (NumElts != 0)
-        Ty = Context.getExtVectorType(Ty, NumElts);
-      E = ImpCastExprToType(E, Ty, CK_FloatingCast).get();
+      E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get();
     }
   }
 
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 3f9dc989103faa..f974bedffe0057 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -6309,7 +6309,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
       // -- a predefined __func__ variable
       if (auto *E = Value.getLValueBase().dyn_cast<const Expr*>()) {
         if (isa<CXXUuidofExpr>(E)) {
-          Converted = TemplateArgument(ArgResult.get());
+          Converted = TemplateArgument(ArgResult.get()->IgnoreImpCasts());
           break;
         }
         Diag(Arg->getBeginLoc(), diag::err_template_arg_not_decl_ref)
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index b4c075e9c46d42..1ae94c8aec9991 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -1442,7 +1442,12 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     else
       Result = Context.Int128Ty;
     break;
-  case DeclSpec::TST_float16: Result = Context.Float16Ty; break;
+  case DeclSpec::TST_float16:
+    if (!S.Context.getTargetInfo().hasFloat16Type())
+      S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
+        << "_Float16";
+    Result = Context.Float16Ty;
+    break;
   case DeclSpec::TST_half:    Result = Context.HalfTy; break;
   case DeclSpec::TST_float:   Result = Context.FloatTy; break;
   case DeclSpec::TST_double:
diff --git a/clang/lib/Tooling/ArgumentsAdjusters.cpp b/clang/lib/Tooling/ArgumentsAdjusters.cpp
index c8e9c167422e80..f5040b8a09d571 100644
--- a/clang/lib/Tooling/ArgumentsAdjusters.cpp
+++ b/clang/lib/Tooling/ArgumentsAdjusters.cpp
@@ -108,5 +108,27 @@ ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First,
   };
 }
 
+ArgumentsAdjuster getStripPluginsAdjuster() {
+  return [](const CommandLineArguments &Args, StringRef /*unused*/) {
+    CommandLineArguments AdjustedArgs;
+    for (size_t I = 0, E = Args.size(); I != E; I++) {
+      // According to https://clang.llvm.org/docs/ClangPlugins.html
+      // plugin arguments are in the form:
+      // -Xclang {-load, -plugin, -plugin-arg-<plugin-name>, -add-plugin}
+      // -Xclang <arbitrary-argument>
+      if (I + 4 < E && Args[I] == "-Xclang" &&
+          (Args[I + 1] == "-load" || Args[I + 1] == "-plugin" ||
+           llvm::StringRef(Args[I + 1]).startswith("-plugin-arg-") ||
+           Args[I + 1] == "-add-plugin") &&
+          Args[I + 2] == "-Xclang") {
+        I += 3;
+        continue;
+      }
+      AdjustedArgs.push_back(Args[I]);
+    }
+    return AdjustedArgs;
+  };
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/clang/test/AST/float16.cpp b/clang/test/AST/float16.cpp
index aa65270c75d4c6..2f428e7085ff1e 100644
--- a/clang/test/AST/float16.cpp
+++ b/clang/test/AST/float16.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -ast-dump %s | FileCheck %s --strict-whitespace
-// RUN: %clang_cc1 -std=c++11 -ast-dump -fnative-half-type %s | FileCheck %s --check-prefix=CHECK-NATIVE --strict-whitespace
+// RUN: %clang_cc1 -std=c++11 -ast-dump -triple aarch64-linux-gnu %s | FileCheck %s --strict-whitespace
+// RUN: %clang_cc1 -std=c++11 -ast-dump -triple aarch64-linux-gnu -fnative-half-type %s | FileCheck %s --check-prefix=CHECK-NATIVE --strict-whitespace
 
 /*  Various contexts where type _Float16 can appear. */
 
diff --git a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp
index 686aac2802adaf..e435bee2c88ee1 100644
--- a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp
+++ b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.align/p8.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -verify %s
+// RUN: %clang_cc1 -std=c++11 -verify %s -triple x86_64-linux-gnu
 
 alignas(double) void f(); // expected-error {{'alignas' attribute only applies to variables, data members and tag types}}
 alignas(double) unsigned char c[sizeof(double)]; // expected-note {{previous}}
diff --git a/clang/test/CodeCompletion/crash-null-type.cpp b/clang/test/CodeCompletion/crash-null-type.cpp
new file mode 100644
index 00000000000000..c5b3d1e79390c3
--- /dev/null
+++ b/clang/test/CodeCompletion/crash-null-type.cpp
@@ -0,0 +1,8 @@
+void test() {
+  for (auto [loopVar] : y) { // y has to be unresolved
+    loopVa
+  }
+}
+// RUN: not %clang_cc1 -fsyntax-only -code-completion-at=%s:3:11 %s -o - \
+// RUN:            | FileCheck %s
+// CHECK: COMPLETION: loopVar
diff --git a/clang/test/CodeGen/arm64-crc32.c b/clang/test/CodeGen/arm64-crc32.c
index 2d913fb123b7c8..26d69a23b6a1a6 100644
--- a/clang/test/CodeGen/arm64-crc32.c
+++ b/clang/test/CodeGen/arm64-crc32.c
@@ -1,54 +1,57 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple arm64-none-linux-gnu \
 // RUN:  -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-windows \
+// RUN:  -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+#include <stdint.h>
 
-int crc32b(int a, char b)
+uint32_t crc32b(uint32_t a, uint8_t b)
 {
         return __builtin_arm_crc32b(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32b(i32 %a, i32 [[T0]])
 }
 
-int crc32cb(int a, char b)
+uint32_t crc32cb(uint32_t a, uint8_t b)
 {
         return __builtin_arm_crc32cb(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32cb(i32 %a, i32 [[T0]])
 }
 
-int crc32h(int a, short b)
+uint32_t crc32h(uint32_t a, uint16_t b)
 {
         return __builtin_arm_crc32h(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32h(i32 %a, i32 [[T0]])
 }
 
-int crc32ch(int a, short b)
+uint32_t crc32ch(uint32_t a, uint16_t b)
 {
         return __builtin_arm_crc32ch(a,b);
 // CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32
 // CHECK: call i32 @llvm.aarch64.crc32ch(i32 %a, i32 [[T0]])
 }
 
-int crc32w(int a, int b)
+uint32_t crc32w(uint32_t a, uint32_t b)
 {
         return __builtin_arm_crc32w(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32w(i32 %a, i32 %b)
 }
 
-int crc32cw(int a, int b)
+uint32_t crc32cw(uint32_t a, uint32_t b)
 {
         return __builtin_arm_crc32cw(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32cw(i32 %a, i32 %b)
 }
 
-int crc32d(int a, long b)
+uint32_t crc32d(uint32_t a, uint64_t b)
 {
         return __builtin_arm_crc32d(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32x(i32 %a, i64 %b)
 }
 
-int crc32cd(int a, long b)
+uint32_t crc32cd(uint32_t a, uint64_t b)
 {
         return __builtin_arm_crc32cd(a,b);
 // CHECK: call i32 @llvm.aarch64.crc32cx(i32 %a, i64 %b)
diff --git a/clang/test/CodeGen/arm64-microsoft-status-reg.cpp b/clang/test/CodeGen/arm64-microsoft-status-reg.cpp
index eb59bae50f0ae3..524b5af120c52a 100644
--- a/clang/test/CodeGen/arm64-microsoft-status-reg.cpp
+++ b/clang/test/CodeGen/arm64-microsoft-status-reg.cpp
@@ -23,88 +23,112 @@
 #define ARM64_TPIDRRO_EL0       ARM64_SYSREG(3,3,13, 0,3)  // Thread ID Register, User Read Only [CP15_TPIDRURO]
 #define ARM64_TPIDR_EL1         ARM64_SYSREG(3,0,13, 0,4)  // Thread ID Register, Privileged Only [CP15_TPIDRPRW]
 
-void check_ReadWriteStatusReg(int v) {
-  int ret;
+// From intrin.h
+__int64 _ReadStatusReg(int);
+void _WriteStatusReg(int, __int64);
+
+void check_ReadWriteStatusReg(__int64 v) {
+  __int64 ret;
   ret = _ReadStatusReg(ARM64_CNTVCT);
-// CHECK-ASM: mrs     x8, CNTVCT_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD2:.*]])
+// CHECK-ASM: mrs     x0, CNTVCT_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD2:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMCCNTR_EL0);
-// CHECK-ASM: mrs     x8, PMCCNTR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD3:.*]])
+// CHECK-ASM: mrs     x0, PMCCNTR_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD3:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMSELR_EL0);
-// CHECK-ASM: mrs     x8, PMSELR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD4:.*]])
+// CHECK-ASM: mrs     x0, PMSELR_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD4:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTR_EL0);
-// CHECK-ASM: mrs     x8, PMXEVCNTR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD5:.*]])
+// CHECK-ASM: mrs     x0, PMXEVCNTR_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD5:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(0));
-// CHECK-ASM: mrs     x8, PMEVCNTR0_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD6:.*]])
+// CHECK-ASM: mrs     x0, PMEVCNTR0_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD6:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(1));
-// CHECK-ASM: mrs     x8, PMEVCNTR1_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD7:.*]])
+// CHECK-ASM: mrs     x0, PMEVCNTR1_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD7:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_PMXEVCNTRn_EL0(30));
-// CHECK-ASM: mrs     x8, PMEVCNTR30_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD8:.*]])
+// CHECK-ASM: mrs     x0, PMEVCNTR30_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD8:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_TPIDR_EL0);
-// CHECK-ASM: mrs     x8, TPIDR_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD9:.*]])
+// CHECK-ASM: mrs     x0, TPIDR_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD9:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_TPIDRRO_EL0);
-// CHECK-ASM: mrs     x8, TPIDRRO_EL0
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD10:.*]])
+// CHECK-ASM: mrs     x0, TPIDRRO_EL0
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD10:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
   ret = _ReadStatusReg(ARM64_TPIDR_EL1);
-// CHECK-ASM: mrs     x8, TPIDR_EL1
-// CHECK-IR: call i64 @llvm.read_register.i64(metadata ![[MD11:.*]])
+// CHECK-ASM: mrs     x0, TPIDR_EL1
+// CHECK-IR: %[[VAR:.*]] = call i64 @llvm.read_register.i64(metadata ![[MD11:.*]])
+// CHECK-IR-NEXT: store i64 %[[VAR]]
 
 
   _WriteStatusReg(ARM64_CNTVCT, v);
-// CHECK-ASM: msr     S3_3_C14_C0_2, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD2:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     S3_3_C14_C0_2, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD2:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMCCNTR_EL0, v);
-// CHECK-ASM: msr     PMCCNTR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD3:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     PMCCNTR_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD3:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMSELR_EL0, v);
-// CHECK-ASM: msr     PMSELR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD4:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     PMSELR_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD4:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTR_EL0, v);
-// CHECK-ASM: msr     PMXEVCNTR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD5:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     PMXEVCNTR_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD5:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(0), v);
-// CHECK-ASM: msr     PMEVCNTR0_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD6:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     PMEVCNTR0_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD6:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(1), v);
-// CHECK-ASM: msr     PMEVCNTR1_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD7:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     PMEVCNTR1_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD7:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_PMXEVCNTRn_EL0(30), v);
-// CHECK-ASM: msr     PMEVCNTR30_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD8:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     PMEVCNTR30_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD8:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_TPIDR_EL0, v);
-// CHECK-ASM: msr     TPIDR_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD9:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     TPIDR_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD9:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_TPIDRRO_EL0, v);
-// CHECK-ASM: msr     TPIDRRO_EL0, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD10:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     TPIDRRO_EL0, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD10:.*]], i64 %[[VAR]])
 
   _WriteStatusReg(ARM64_TPIDR_EL1, v);
-// CHECK-ASM: msr     TPIDR_EL1, x8
-// CHECK-IR: call void @llvm.write_register.i64(metadata ![[MD11:.*]], i64 {{%.*}})
+// CHECK-ASM: msr     TPIDR_EL1, x0
+// CHECK-IR: %[[VAR:.*]] = load i64,
+// CHECK-IR-NEXT: call void @llvm.write_register.i64(metadata ![[MD11:.*]], i64 %[[VAR]])
 }
 
 // CHECK-IR: ![[MD2]] = !{!"3:3:14:0:2"}
diff --git a/clang/test/CodeGen/attr-msp430.c b/clang/test/CodeGen/attr-msp430.c
new file mode 100644
index 00000000000000..e8b6d0d0fa3ea7
--- /dev/null
+++ b/clang/test/CodeGen/attr-msp430.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple msp430-unknown-unknown -emit-llvm < %s| FileCheck %s
+
+__attribute__((interrupt(1))) void foo(void) {}
+// CHECK: @llvm.used
+// CHECK-SAME: @foo
+
+// CHECK: define msp430_intrcc void @foo() #0
+// CHECK: attributes #0
+// CHECK-SAME: noinline
+// CHECK-SAME: "interrupt"="1"
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 7027a6e220f3d8..f164c2f6f3647c 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LINUX
+// RUN: %clang_cc1 -triple aarch64-windows -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-WIN
+#include <stdint.h>
 
 void f0(void *a, void *b) {
 	__clear_cache(a,b);
@@ -15,8 +17,15 @@ unsigned rbit(unsigned a) {
   return __builtin_arm_rbit(a);
 }
 
+// CHECK-WIN: [[A64:%[^ ]+]] = zext i32 %a to i64
+// CHECK-WIN: call i64 @llvm.bitreverse.i64(i64 [[A64]])
+// CHECK-LINUX: call i64 @llvm.bitreverse.i64(i64 %a)
+unsigned long rbitl(unsigned long a) {
+  return __builtin_arm_rbit64(a);
+}
+
 // CHECK: call {{.*}} @llvm.bitreverse.i64(i64 %a)
-unsigned long long rbit64(unsigned long long a) {
+uint64_t rbit64(uint64_t a) {
   return __builtin_arm_rbit64(a);
 }
 
@@ -49,13 +58,17 @@ void prefetch() {
 // CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 3, i32 0)
 }
 
-unsigned rsr() {
+__typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void);
+
+uint32_t rsr() {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: trunc i64 [[V0]] to i32
   return __builtin_arm_rsr("1:2:3:4:5");
 }
 
-unsigned long rsr64() {
+__typeof__(__builtin_arm_rsr64("1:2:3:4:5")) rsr64(void);
+
+uint64_t rsr64(void) {
   // CHECK: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
   return __builtin_arm_rsr64("1:2:3:4:5");
 }
@@ -66,13 +79,17 @@ void *rsrp() {
   return __builtin_arm_rsrp("1:2:3:4:5");
 }
 
+__typeof__(__builtin_arm_wsr("1:2:3:4:5", 0)) wsr(unsigned);
+
 void wsr(unsigned v) {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = zext i32 %v to i64
   // CHECK-NEXT: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 [[V0]])
   __builtin_arm_wsr("1:2:3:4:5", v);
 }
 
-void wsr64(unsigned long v) {
+__typeof__(__builtin_arm_wsr64("1:2:3:4:5", 0)) wsr64(uint64_t);
+
+void wsr64(uint64_t v) {
   // CHECK: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 %v)
   __builtin_arm_wsr64("1:2:3:4:5", v);
 }
diff --git a/clang/test/CodeGen/microsoft-no-common-align.c b/clang/test/CodeGen/microsoft-no-common-align.c
index fc46946c00ed23..a7a27a062704fa 100644
--- a/clang/test/CodeGen/microsoft-no-common-align.c
+++ b/clang/test/CodeGen/microsoft-no-common-align.c
@@ -6,3 +6,6 @@ TooLargeAlignment TooBig;
 // CHECK: @TooBig = dso_local global <16 x float>  zeroinitializer, align 64
 NormalAlignment JustRight;
 // CHECK: @JustRight = common dso_local global <1 x float>  zeroinitializer, align 4
+
+TooLargeAlignment *IsAPointer;
+// CHECK: @IsAPointer = common dso_local global <16 x float>* null, align 8
diff --git a/clang/test/CodeGen/powerpc_types.c b/clang/test/CodeGen/powerpc_types.c
index b7d0f5de49859d..86eb7f8356801d 100644
--- a/clang/test/CodeGen/powerpc_types.c
+++ b/clang/test/CodeGen/powerpc_types.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -triple powerpc-unknown-freebsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s
+// RUN: %clang_cc1 -triple powerpc-unknown-netbsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s
+// RUN: %clang_cc1 -triple powerpc-unknown-openbsd -emit-llvm -o - %s| FileCheck -check-prefix=SVR4-CHECK %s
 
 #include <stdarg.h>
 
diff --git a/clang/test/CodeGen/wasm-import-module.c b/clang/test/CodeGen/wasm-import-module.c
new file mode 100644
index 00000000000000..866a3a459949bc
--- /dev/null
+++ b/clang/test/CodeGen/wasm-import-module.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-llvm -o - %s | FileCheck %s
+
+void __attribute__((import_module("bar"))) foo(void);
+
+void call(void) {
+  foo();
+}
+
+// CHECK: declare void @foo() [[A:#[0-9]+]]
+
+// CHECK: attributes [[A]] = {{{.*}} "wasm-import-module"="bar" {{.*}}}
diff --git a/clang/test/CodeGen/wasm-import-name.c b/clang/test/CodeGen/wasm-import-name.c
new file mode 100644
index 00000000000000..7c3b094b9e435c
--- /dev/null
+++ b/clang/test/CodeGen/wasm-import-name.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-llvm -o - %s | FileCheck %s
+
+void __attribute__((import_name("bar"))) foo(void);
+
+void call(void) {
+  foo();
+}
+
+// CHECK: declare void @foo() [[A:#[0-9]+]]
+
+// CHECK: attributes [[A]] = {{{.*}} "wasm-import-name"="bar" {{.*}}}
diff --git a/clang/test/CodeGenCXX/cxx11-thread-local-visibility.cpp b/clang/test/CodeGenCXX/cxx11-thread-local-visibility.cpp
new file mode 100644
index 00000000000000..b46d41d7c96049
--- /dev/null
+++ b/clang/test/CodeGenCXX/cxx11-thread-local-visibility.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck --check-prefix=LINUX %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-apple-darwin12 | FileCheck --check-prefix=DARWIN %s
+
+// Regression test for PR40327
+
+// LINUX: @default_tls = thread_local global i32
+// LINUX: @hidden_tls = hidden thread_local global i32
+// LINUX: define weak_odr hidden i32* @_ZTW11default_tls()
+// LINUX: define weak_odr hidden i32* @_ZTW10hidden_tls()
+//
+// DARWIN: @default_tls = internal thread_local global i32
+// DARWIN: @hidden_tls = internal thread_local global i32
+// DARWIN: define cxx_fast_tlscc i32* @_ZTW11default_tls()
+// DARWIN: define hidden cxx_fast_tlscc i32* @_ZTW10hidden_tls()
+
+__attribute__((visibility("default"))) thread_local int default_tls;
+__attribute__((visibility("hidden"))) thread_local int hidden_tls;
diff --git a/clang/test/CodeGenCXX/cxx11-thread-local.cpp b/clang/test/CodeGenCXX/cxx11-thread-local.cpp
index 156c4f591908b1..de941af1afb87a 100644
--- a/clang/test/CodeGenCXX/cxx11-thread-local.cpp
+++ b/clang/test/CodeGenCXX/cxx11-thread-local.cpp
@@ -318,7 +318,7 @@ void set_anon_i() {
 // CHECK-NOT: call void @[[V_M_INIT]]()
 
 
-// LIUNX: define weak_odr hidden i32* @_ZTW1a() {
+// LINUX: define weak_odr hidden i32* @_ZTW1a()
 // DARWIN: define cxx_fast_tlscc i32* @_ZTW1a()
 // LINUX:   call void @_ZTH1a()
 // DARWIN: call cxx_fast_tlscc void @_ZTH1a()
diff --git a/clang/test/CodeGenCXX/float16-declarations.cpp b/clang/test/CodeGenCXX/float16-declarations.cpp
index 7e1c1e8db93caf..7d07eac48111f5 100644
--- a/clang/test/CodeGenCXX/float16-declarations.cpp
+++ b/clang/test/CodeGenCXX/float16-declarations.cpp
@@ -1,5 +1,4 @@
 // RUN: %clang -std=c++11 --target=aarch64-arm--eabi -S -emit-llvm %s -o - | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-AARCH64
-// RUN: %clang -std=c++11 --target=x86_64 -S -emit-llvm %s -o - | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-X86
 
 /*  Various contexts where type _Float16 can appear. */
 
@@ -15,7 +14,6 @@ namespace {
 
   _Float16 arr1n[10];
 // CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2
-// CHECK-X86-DAG:     @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16
 
   _Float16 arr2n[] = { 1.2, 3.0, 3.e4 };
 // CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
@@ -30,14 +28,12 @@ namespace {
 
 _Float16 f1f;
 // CHECK-AARCH64-DAG: @f1f = dso_local global half 0xH0000, align 2
-// CHECK-X86-DAG: @f1f = dso_local global half 0xH0000, align 2
 
 _Float16 f2f = 32.4;
 // CHECK-DAG: @f2f = dso_local global half 0xH500D, align 2
 
 _Float16 arr1f[10];
 // CHECK-AARCH64-DAG: @arr1f = dso_local global [10 x half] zeroinitializer, align 2
-// CHECK-X86-DAG: @arr1f = dso_local global [10 x half] zeroinitializer, align 16
 
 _Float16 arr2f[] = { -1.2, -3.0, -3.e4 };
 // CHECK-DAG: @arr2f = dso_local global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
@@ -137,8 +133,6 @@ int main(void) {
   long double cvtld = f2n;
 //CHECK-AARCh64-DAG: [[H2LD:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to fp128
 //CHECK-AARCh64-DAG: store fp128 [[H2LD]], fp128* %{{.*}}, align 16
-//CHECK-X86-DAG:     [[H2LD:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to x86_fp80
-//CHECK-X86-DAG:     store x86_fp80 [[H2LD]], x86_fp80* %{{.*}}, align 16
 
   _Float16 f2h = 42.0f;
 //CHECK-DAG: store half 0xH5140, half* %{{.*}}, align 2
diff --git a/clang/test/CodeGenCXX/mangle-ms.cpp b/clang/test/CodeGenCXX/mangle-ms.cpp
index e128c944315320..0175b961e5e90a 100644
--- a/clang/test/CodeGenCXX/mangle-ms.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fblocks -emit-llvm %s -o - -triple=i386-pc-win32 -std=c++98 | FileCheck %s
 // RUN: %clang_cc1 -fblocks -emit-llvm %s -o - -triple=x86_64-pc-win32 -std=c++98| FileCheck -check-prefix X64 %s
+// RUN: %clang_cc1 -fblocks -emit-llvm %s -o - -triple=aarch64-pc-win32 -std=c++98 -DARM | FileCheck -check-prefixes=X64,ARM %s
 
 int a;
 // CHECK-DAG: @"?a@@3HA"
@@ -466,10 +467,12 @@ namespace Complex {
 // CHECK-DAG: define dso_local void @"?f@Complex@@YAXU?$_Complex@H@__clang@@@Z"(
 void f(_Complex int) {}
 }
+#ifdef ARM
 namespace Float16 {
-// CHECK-DAG: define dso_local void @"?f@Float16@@YAXU_Float16@__clang@@@Z"(
+// ARM-DAG: define dso_local void @"?f@Float16@@YAXU_Float16@__clang@@@Z"(
 void f(_Float16) {}
 }
+#endif // ARM
 
 namespace PR26029 {
 template <class>
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index b795c0755bd41d..37ff770abf57eb 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -30,6 +30,32 @@ void test_block() {
   used(block);
 }
 
+// Using the variable being initialized is typically UB in C, but for blocks we
+// can be nice: they imply extra book-keeping and we can do the auto-init before
+// any of said book-keeping.
+//
+// UNINIT-LABEL:  test_block_self_init(
+// ZERO-LABEL:    test_block_self_init(
+// ZERO:          %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+// ZERO:          %captured1 = getelementptr inbounds %struct.__block_byref_captured, %struct.__block_byref_captured* %captured, i32 0, i32 4
+// ZERO-NEXT:     store %struct.XYZ* null, %struct.XYZ** %captured1, align 8
+// ZERO:          %call = call %struct.XYZ* @create(
+// PATTERN-LABEL: test_block_self_init(
+// PATTERN:       %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+// PATTERN:       %captured1 = getelementptr inbounds %struct.__block_byref_captured, %struct.__block_byref_captured* %captured, i32 0, i32 4
+// PATTERN-NEXT:  store %struct.XYZ* inttoptr (i64 -6148914691236517206 to %struct.XYZ*), %struct.XYZ** %captured1, align 8
+// PATTERN:       %call = call %struct.XYZ* @create(
+void test_block_self_init() {
+  using Block = void (^)();
+  typedef struct XYZ {
+    Block block;
+  } * xyz_t;
+  extern xyz_t create(Block block);
+  __block xyz_t captured = create(^() {
+    (void)captured;
+  });
+}
+
 // This type of code is currently not handled by zero / pattern initialization.
 // The test will break when that is fixed.
 // UNINIT-LABEL:  test_goto_unreachable_value(
diff --git a/clang/test/CodeGenCXX/trivial_abi.cpp b/clang/test/CodeGenCXX/trivial_abi.cpp
index e37c8ff615a268..2cf07b22581a2c 100644
--- a/clang/test/CodeGenCXX/trivial_abi.cpp
+++ b/clang/test/CodeGenCXX/trivial_abi.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++17 -fcxx-exceptions -fexceptions -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++17 -fcxx-exceptions -fexceptions -fclang-abi-compat=4.0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fcxx-exceptions -fexceptions -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -std=c++11 -fcxx-exceptions -fexceptions -fclang-abi-compat=4.0 -emit-llvm -o - %s | FileCheck %s
 
 // CHECK: %[[STRUCT_SMALL:.*]] = type { i32* }
 // CHECK: %[[STRUCT_LARGE:.*]] = type { i32*, [128 x i32] }
@@ -43,13 +43,6 @@ struct HasNonTrivial {
   NonTrivial m;
 };
 
-struct __attribute__((trivial_abi)) CopyMoveDeleted {
-  CopyMoveDeleted(int);
-  CopyMoveDeleted(const CopyMoveDeleted &) = delete;
-  CopyMoveDeleted(CopyMoveDeleted &&) = delete;
-  int a;
-};
-
 // CHECK: define void @_Z14testParamSmall5Small(i64 %[[A_COERCE:.*]])
 // CHECK: %[[A:.*]] = alloca %[[STRUCT_SMALL]], align 8
 // CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_SMALL]], %[[STRUCT_SMALL]]* %[[A]], i32 0, i32 0
@@ -244,11 +237,3 @@ void calleeExceptionLarge(Large, Large);
 void testExceptionLarge() {
   calleeExceptionLarge(Large(), Large());
 }
-
-// A class with deleted copy and move constructors can still be passed or
-// returned in registers if the class is annotated with trivial_abi.
-
-// CHECK: define i64 @_Z19testCopyMoveDeletedi(i32 %
-CopyMoveDeleted testCopyMoveDeleted(int a) {
-  return a;
-}
diff --git a/clang/test/CodeGenOpenCL/printf.cl b/clang/test/CodeGenOpenCL/printf.cl
index 346f6c35bae469..fc139d776db6ef 100644
--- a/clang/test/CodeGenOpenCL/printf.cl
+++ b/clang/test/CodeGenOpenCL/printf.cl
@@ -12,28 +12,26 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)))
 
 
 // ALL-LABEL: @test_printf_float2(
-// FP64: %conv = fpext <2 x float> %0 to <2 x double>
-// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv)
+// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0)
 
-// NOFP64:  call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0)
+
+// NOFP64:  call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0)
 kernel void test_printf_float2(float2 arg) {
-  printf("%v2f", arg);
+  printf("%v2hlf", arg);
 }
 
 // ALL-LABEL: @test_printf_half2(
-// FP64: %conv = fpext <2 x half> %0 to <2 x double>
-// FP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) #2
+// FP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0)
 
-// NOFP64: %conv = fpext <2 x half> %0 to <2 x float>
-// NOFP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %conv) #2
+// NOFP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0)
 kernel void test_printf_half2(half2 arg) {
-  printf("%v2f", arg);
+  printf("%v2hf", arg);
 }
 
 #ifdef cl_khr_fp64
 // FP64-LABEL: @test_printf_double2(
-// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %0) #2
+// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.2, i32 0, i32 0), <2 x double> %0)
 kernel void test_printf_double2(double2 arg) {
-  printf("%v2f", arg);
+  printf("%v2lf", arg);
 }
 #endif
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index f5171d5c040c5a..d8db081ac8a684 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -178,6 +178,10 @@
 // Oy_2: -momit-leaf-frame-pointer
 // Oy_2: -O2
 
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc -Werror /Oy- /O2 -### -- %s 2>&1 | FileCheck -check-prefix=Oy_aarch64 %s
+// Oy_aarch64: -mdisable-fp-elim
+// Oy_aarch64: -O2
+
 // RUN: %clang_cl --target=i686-pc-win32 -Werror /O2 /O2 -### -- %s 2>&1 | FileCheck -check-prefix=O2O2 %s
 // O2O2: "-O2"
 
diff --git a/clang/test/Driver/mips-features.c b/clang/test/Driver/mips-features.c
index f63fb8de55d6c9..19725bc096b5d6 100644
--- a/clang/test/Driver/mips-features.c
+++ b/clang/test/Driver/mips-features.c
@@ -444,3 +444,15 @@
 // RUN:     -mginv -mno-ginv 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-GINV %s
 // CHECK-NO-GINV: "-target-feature" "-ginv"
+//
+// -mrelax-pic-calls
+// RUN: %clang -target mips-unknown-linux-gnu -### -c %s \
+// RUN:     -mno-relax-pic-calls -mrelax-pic-calls 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-RELAX-PIC-CALLS %s
+// CHECK-RELAX-PIC-CALLS-NOT: "-mllvm" "-mips-jalr-reloc=0"
+//
+// -mno-relax-pic-calls
+// RUN: %clang -target mips-unknown-linux-gnu -### -c %s \
+// RUN:     -mrelax-pic-calls -mno-relax-pic-calls 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-RELAX-PIC-CALLS %s
+// CHECK-NO-RELAX-PIC-CALLS: "-mllvm" "-mips-jalr-reloc=0"
diff --git a/clang/test/Driver/msp430-toolchain.c b/clang/test/Driver/msp430-toolchain.c
index ae5ed9189c8281..62ef1c0c1f150f 100644
--- a/clang/test/Driver/msp430-toolchain.c
+++ b/clang/test/Driver/msp430-toolchain.c
@@ -8,44 +8,44 @@
 // RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430 %s
 
-// MSP430: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld"
+// MSP430: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
 // MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crt0.o"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o"
+// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o"
 // MSP430: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crtn.o"
+// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o"
+// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nodefaultlibs \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430-NO-DFT-LIB %s
 
-// MSP430-NO-DFT-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld"
+// MSP430-NO-DFT-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
 // MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crt0.o"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o"
+// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o"
 // MSP430-NO-DFT-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430/crtn.o"
+// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o"
+// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostartfiles \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430-NO-START %s
 
-// MSP430-NO-START: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld"
+// MSP430-NO-START: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
 // MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430"
+// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
 // MSP430-NO-START: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostdlib \
 // RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree 2>&1 \
 // RUN:   | FileCheck -check-prefix=MSP430-NO-STD-LIB %s
 
-// MSP430-NO-STD-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../bin/msp430-elf-ld"
+// MSP430-NO-STD-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
 // MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../../../msp430-elf/lib/430"
+// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
 // MSP430-NO-STD-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f147 2>&1 \
diff --git a/clang/test/Lexer/half-literal.cpp b/clang/test/Lexer/half-literal.cpp
index 8e0034d491dd72..2f1cf9589fab0a 100644
--- a/clang/test/Lexer/half-literal.cpp
+++ b/clang/test/Lexer/half-literal.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -pedantic %s
+// RUN: %clang_cc1 -fsyntax-only -verify -pedantic -triple aarch64-linux-gnu %s
 float a = 1.0h; // expected-error{{no matching literal operator for call to 'operator""h' with argument of type 'long double' or 'const char *', and no matching literal operator template}}
 float b = 1.0H; // expected-error{{invalid suffix 'H' on floating constant}}
 
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 9a6bcca1bd36bf..98935fc21355b9 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -136,6 +136,8 @@
 // CHECK-NEXT: WarnUnusedResult (SubjectMatchRule_objc_method, SubjectMatchRule_enum, SubjectMatchRule_record, SubjectMatchRule_hasType_functionType)
 // CHECK-NEXT: Weak (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record)
 // CHECK-NEXT: WeakRef (SubjectMatchRule_variable, SubjectMatchRule_function)
+// CHECK-NEXT: WebAssemblyImportModule (SubjectMatchRule_function)
+// CHECK-NEXT: WebAssemblyImportName (SubjectMatchRule_function)
 // CHECK-NEXT: WorkGroupSizeHint (SubjectMatchRule_function)
 // CHECK-NEXT: XRayInstrument (SubjectMatchRule_function, SubjectMatchRule_objc_method)
 // CHECK-NEXT: XRayLogArgs (SubjectMatchRule_function, SubjectMatchRule_objc_method)
diff --git a/clang/test/PCH/leakfiles b/clang/test/PCH/leakfiles
new file mode 100644
index 00000000000000..90b279026bc1ab
--- /dev/null
+++ b/clang/test/PCH/leakfiles
@@ -0,0 +1,29 @@
+// Test that compiling using a PCH doesn't leak file descriptors.
+// https://bugs.chromium.org/p/chromium/issues/detail?id=924225
+//
+// This test requires bash loops and ulimit.
+// REQUIRES: shell
+// UNSUPPORTED: win32
+//
+// Set up source files. lib/lib.h includes lots of lib*.h files in that dir.
+// client.c includes lib/lib.h, and also the individual files directly.
+//
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: cd %t
+// RUN: mkdir lib
+// RUN: for i in {1..300}; do touch lib/lib$i.h; done
+// RUN: for i in {1..300}; do echo "#include \"lib$i.h\"" >> lib/lib.h; done
+// RUN: echo "#include \"lib/lib.h\"" > client.c
+// RUN: for i in {1..300}; do echo "#include \"lib/lib$i.h\"" >> client.c; done
+//
+// We want to verify that we don't hold all the files open at the same time.
+// This is important e.g. on mac, which has a low default FD limit.
+// RUN: ulimit -n 100
+//
+// Test without PCH.
+// RUN: %clang_cc1 -fsyntax-only -Ilib/ client.c
+//
+// Test with PCH.
+// RUN: %clang_cc1 -emit-pch -o pch -Ilib/ client.c
+// RUN: %clang_cc1 -include-pch pch -Ilib/ client.c -fsyntax-only
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index 940dddade33786..770e52cc784355 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -9110,667 +9110,383 @@
 //
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm32-unknown-unknown \
 // RUN:   < /dev/null \
-// RUN:   | FileCheck -match-full-lines -check-prefix=WEBASSEMBLY32 %s
+// RUN:   | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY32 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-unknown \
+// RUN:   < /dev/null \
+// RUN:   | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY64 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm32-unknown-wasi \
+// RUN:   < /dev/null \
+// RUN:   | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY32,WEBASSEMBLY-WASI %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-wasi \
+// RUN:   < /dev/null \
+// RUN:   | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY64,WEBASSEMBLY-WASI %s
 //
 // WEBASSEMBLY32:#define _ILP32 1
 // WEBASSEMBLY32-NOT:#define _LP64
-// WEBASSEMBLY32-NEXT:#define __ATOMIC_ACQUIRE 2
-// WEBASSEMBLY32-NEXT:#define __ATOMIC_ACQ_REL 4
-// WEBASSEMBLY32-NEXT:#define __ATOMIC_CONSUME 1
-// WEBASSEMBLY32-NEXT:#define __ATOMIC_RELAXED 0
-// WEBASSEMBLY32-NEXT:#define __ATOMIC_RELEASE 3
-// WEBASSEMBLY32-NEXT:#define __ATOMIC_SEQ_CST 5
-// WEBASSEMBLY32-NEXT:#define __BIGGEST_ALIGNMENT__ 16
-// WEBASSEMBLY32-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
-// WEBASSEMBLY32-NEXT:#define __CHAR16_TYPE__ unsigned short
-// WEBASSEMBLY32-NEXT:#define __CHAR32_TYPE__ unsigned int
-// WEBASSEMBLY32-NEXT:#define __CHAR_BIT__ 8
-// WEBASSEMBLY32-NOT:#define __CHAR_UNSIGNED__
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_CHAR_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_INT_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_LLONG_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_LONG_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_POINTER_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __CONSTANT_CFSTRINGS__ 1
-// WEBASSEMBLY32-NEXT:#define __DBL_DECIMAL_DIG__ 17
-// WEBASSEMBLY32-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324
-// WEBASSEMBLY32-NEXT:#define __DBL_DIG__ 15
-// WEBASSEMBLY32-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16
-// WEBASSEMBLY32-NEXT:#define __DBL_HAS_DENORM__ 1
-// WEBASSEMBLY32-NEXT:#define __DBL_HAS_INFINITY__ 1
-// WEBASSEMBLY32-NEXT:#define __DBL_HAS_QUIET_NAN__ 1
-// WEBASSEMBLY32-NEXT:#define __DBL_MANT_DIG__ 53
-// WEBASSEMBLY32-NEXT:#define __DBL_MAX_10_EXP__ 308
-// WEBASSEMBLY32-NEXT:#define __DBL_MAX_EXP__ 1024
-// WEBASSEMBLY32-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308
-// WEBASSEMBLY32-NEXT:#define __DBL_MIN_10_EXP__ (-307)
-// WEBASSEMBLY32-NEXT:#define __DBL_MIN_EXP__ (-1021)
-// WEBASSEMBLY32-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308
-// WEBASSEMBLY32-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__
-// WEBASSEMBLY32-NOT:#define __ELF__
-// WEBASSEMBLY32-NEXT:#define __FINITE_MATH_ONLY__ 0
-// WEBASSEMBLY32:#define __FLT_DECIMAL_DIG__ 9
-// WEBASSEMBLY32-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F
-// WEBASSEMBLY32-NEXT:#define __FLT_DIG__ 6
-// WEBASSEMBLY32-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F
-// WEBASSEMBLY32-NEXT:#define __FLT_EVAL_METHOD__ 0
-// WEBASSEMBLY32-NEXT:#define __FLT_HAS_DENORM__ 1
-// WEBASSEMBLY32-NEXT:#define __FLT_HAS_INFINITY__ 1
-// WEBASSEMBLY32-NEXT:#define __FLT_HAS_QUIET_NAN__ 1
-// WEBASSEMBLY32-NEXT:#define __FLT_MANT_DIG__ 24
-// WEBASSEMBLY32-NEXT:#define __FLT_MAX_10_EXP__ 38
-// WEBASSEMBLY32-NEXT:#define __FLT_MAX_EXP__ 128
-// WEBASSEMBLY32-NEXT:#define __FLT_MAX__ 3.40282347e+38F
-// WEBASSEMBLY32-NEXT:#define __FLT_MIN_10_EXP__ (-37)
-// WEBASSEMBLY32-NEXT:#define __FLT_MIN_EXP__ (-125)
-// WEBASSEMBLY32-NEXT:#define __FLT_MIN__ 1.17549435e-38F
-// WEBASSEMBLY32-NEXT:#define __FLT_RADIX__ 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
-// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
-// WEBASSEMBLY32-NEXT:#define __GNUC_MINOR__ {{.*}}
-// WEBASSEMBLY32-NEXT:#define __GNUC_PATCHLEVEL__ {{.*}}
-// WEBASSEMBLY32-NEXT:#define __GNUC_STDC_INLINE__ 1
-// WEBASSEMBLY32-NEXT:#define __GNUC__ {{.*}}
-// WEBASSEMBLY32-NEXT:#define __GXX_ABI_VERSION 1002
+// WEBASSEMBLY64-NOT:#define _ILP32
+// WEBASSEMBLY64:#define _LP64 1
+// WEBASSEMBLY-NEXT:#define __ATOMIC_ACQUIRE 2
+// WEBASSEMBLY-NEXT:#define __ATOMIC_ACQ_REL 4
+// WEBASSEMBLY-NEXT:#define __ATOMIC_CONSUME 1
+// WEBASSEMBLY-NEXT:#define __ATOMIC_RELAXED 0
+// WEBASSEMBLY-NEXT:#define __ATOMIC_RELEASE 3
+// WEBASSEMBLY-NEXT:#define __ATOMIC_SEQ_CST 5
+// WEBASSEMBLY-NEXT:#define __BIGGEST_ALIGNMENT__ 16
+// WEBASSEMBLY-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
+// WEBASSEMBLY-NEXT:#define __CHAR16_TYPE__ unsigned short
+// WEBASSEMBLY-NEXT:#define __CHAR32_TYPE__ unsigned int
+// WEBASSEMBLY-NEXT:#define __CHAR_BIT__ 8
+// WEBASSEMBLY-NOT:#define __CHAR_UNSIGNED__
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_CHAR_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_INT_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_LLONG_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_LONG_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_POINTER_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __CONSTANT_CFSTRINGS__ 1
+// WEBASSEMBLY-NEXT:#define __DBL_DECIMAL_DIG__ 17
+// WEBASSEMBLY-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324
+// WEBASSEMBLY-NEXT:#define __DBL_DIG__ 15
+// WEBASSEMBLY-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16
+// WEBASSEMBLY-NEXT:#define __DBL_HAS_DENORM__ 1
+// WEBASSEMBLY-NEXT:#define __DBL_HAS_INFINITY__ 1
+// WEBASSEMBLY-NEXT:#define __DBL_HAS_QUIET_NAN__ 1
+// WEBASSEMBLY-NEXT:#define __DBL_MANT_DIG__ 53
+// WEBASSEMBLY-NEXT:#define __DBL_MAX_10_EXP__ 308
+// WEBASSEMBLY-NEXT:#define __DBL_MAX_EXP__ 1024
+// WEBASSEMBLY-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308
+// WEBASSEMBLY-NEXT:#define __DBL_MIN_10_EXP__ (-307)
+// WEBASSEMBLY-NEXT:#define __DBL_MIN_EXP__ (-1021)
+// WEBASSEMBLY-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308
+// WEBASSEMBLY-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__
+// WEBASSEMBLY-NOT:#define __ELF__
+// WEBASSEMBLY-NEXT:#define __FINITE_MATH_ONLY__ 0
+// WEBASSEMBLY-NEXT:#define __FLT16_DECIMAL_DIG__ 5
+// WEBASSEMBLY-NEXT:#define __FLT16_DENORM_MIN__ 5.9604644775390625e-8F16
+// WEBASSEMBLY-NEXT:#define __FLT16_DIG__ 3
+// WEBASSEMBLY-NEXT:#define __FLT16_EPSILON__ 9.765625e-4F16
+// WEBASSEMBLY-NEXT:#define __FLT16_HAS_DENORM__ 1
+// WEBASSEMBLY-NEXT:#define __FLT16_HAS_INFINITY__ 1
+// WEBASSEMBLY-NEXT:#define __FLT16_HAS_QUIET_NAN__ 1
+// WEBASSEMBLY-NEXT:#define __FLT16_MANT_DIG__ 11
+// WEBASSEMBLY-NEXT:#define __FLT16_MAX_10_EXP__ 4
+// WEBASSEMBLY-NEXT:#define __FLT16_MAX_EXP__ 15
+// WEBASSEMBLY-NEXT:#define __FLT16_MAX__ 6.5504e+4F16
+// WEBASSEMBLY-NEXT:#define __FLT16_MIN_10_EXP__ (-13)
+// WEBASSEMBLY-NEXT:#define __FLT16_MIN_EXP__ (-14)
+// WEBASSEMBLY-NEXT:#define __FLT16_MIN__ 6.103515625e-5F16
+// WEBASSEMBLY-NEXT:#define __FLT_DECIMAL_DIG__ 9
+// WEBASSEMBLY-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F
+// WEBASSEMBLY-NEXT:#define __FLT_DIG__ 6
+// WEBASSEMBLY-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F
+// WEBASSEMBLY-NEXT:#define __FLT_EVAL_METHOD__ 0
+// WEBASSEMBLY-NEXT:#define __FLT_HAS_DENORM__ 1
+// WEBASSEMBLY-NEXT:#define __FLT_HAS_INFINITY__ 1
+// WEBASSEMBLY-NEXT:#define __FLT_HAS_QUIET_NAN__ 1
+// WEBASSEMBLY-NEXT:#define __FLT_MANT_DIG__ 24
+// WEBASSEMBLY-NEXT:#define __FLT_MAX_10_EXP__ 38
+// WEBASSEMBLY-NEXT:#define __FLT_MAX_EXP__ 128
+// WEBASSEMBLY-NEXT:#define __FLT_MAX__ 3.40282347e+38F
+// WEBASSEMBLY-NEXT:#define __FLT_MIN_10_EXP__ (-37)
+// WEBASSEMBLY-NEXT:#define __FLT_MIN_EXP__ (-125)
+// WEBASSEMBLY-NEXT:#define __FLT_MIN__ 1.17549435e-38F
+// WEBASSEMBLY-NEXT:#define __FLT_RADIX__ 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// WEBASSEMBLY-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
+// WEBASSEMBLY-NEXT:#define __GNUC_MINOR__ {{.*}}
+// WEBASSEMBLY-NEXT:#define __GNUC_PATCHLEVEL__ {{.*}}
+// WEBASSEMBLY-NEXT:#define __GNUC_STDC_INLINE__ 1
+// WEBASSEMBLY-NEXT:#define __GNUC__ {{.*}}
+// WEBASSEMBLY-NEXT:#define __GXX_ABI_VERSION 1002
 // WEBASSEMBLY32-NEXT:#define __ILP32__ 1
-// WEBASSEMBLY32-NEXT:#define __INT16_C_SUFFIX__
-// WEBASSEMBLY32-NEXT:#define __INT16_FMTd__ "hd"
-// WEBASSEMBLY32-NEXT:#define __INT16_FMTi__ "hi"
-// WEBASSEMBLY32-NEXT:#define __INT16_MAX__ 32767
-// WEBASSEMBLY32-NEXT:#define __INT16_TYPE__ short
-// WEBASSEMBLY32-NEXT:#define __INT32_C_SUFFIX__
-// WEBASSEMBLY32-NEXT:#define __INT32_FMTd__ "d"
-// WEBASSEMBLY32-NEXT:#define __INT32_FMTi__ "i"
-// WEBASSEMBLY32-NEXT:#define __INT32_MAX__ 2147483647
-// WEBASSEMBLY32-NEXT:#define __INT32_TYPE__ int
-// WEBASSEMBLY32-NEXT:#define __INT64_C_SUFFIX__ LL
-// WEBASSEMBLY32-NEXT:#define __INT64_FMTd__ "lld"
-// WEBASSEMBLY32-NEXT:#define __INT64_FMTi__ "lli"
-// WEBASSEMBLY32-NEXT:#define __INT64_MAX__ 9223372036854775807LL
-// WEBASSEMBLY32-NEXT:#define __INT64_TYPE__ long long int
-// WEBASSEMBLY32-NEXT:#define __INT8_C_SUFFIX__
-// WEBASSEMBLY32-NEXT:#define __INT8_FMTd__ "hhd"
-// WEBASSEMBLY32-NEXT:#define __INT8_FMTi__ "hhi"
-// WEBASSEMBLY32-NEXT:#define __INT8_MAX__ 127
-// WEBASSEMBLY32-NEXT:#define __INT8_TYPE__ signed char
-// WEBASSEMBLY32-NEXT:#define __INTMAX_C_SUFFIX__ LL
-// WEBASSEMBLY32-NEXT:#define __INTMAX_FMTd__ "lld"
-// WEBASSEMBLY32-NEXT:#define __INTMAX_FMTi__ "lli"
-// WEBASSEMBLY32-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL
-// WEBASSEMBLY32-NEXT:#define __INTMAX_TYPE__ long long int
-// WEBASSEMBLY32-NEXT:#define __INTMAX_WIDTH__ 64
-// WEBASSEMBLY32-NEXT:#define __INTPTR_FMTd__ "ld"
-// WEBASSEMBLY32-NEXT:#define __INTPTR_FMTi__ "li"
+// WEBASSEMBLY64-NOT:#define __ILP32__
+// WEBASSEMBLY-NEXT:#define __INT16_C_SUFFIX__
+// WEBASSEMBLY-NEXT:#define __INT16_FMTd__ "hd"
+// WEBASSEMBLY-NEXT:#define __INT16_FMTi__ "hi"
+// WEBASSEMBLY-NEXT:#define __INT16_MAX__ 32767
+// WEBASSEMBLY-NEXT:#define __INT16_TYPE__ short
+// WEBASSEMBLY-NEXT:#define __INT32_C_SUFFIX__
+// WEBASSEMBLY-NEXT:#define __INT32_FMTd__ "d"
+// WEBASSEMBLY-NEXT:#define __INT32_FMTi__ "i"
+// WEBASSEMBLY-NEXT:#define __INT32_MAX__ 2147483647
+// WEBASSEMBLY-NEXT:#define __INT32_TYPE__ int
+// WEBASSEMBLY-NEXT:#define __INT64_C_SUFFIX__ LL
+// WEBASSEMBLY-NEXT:#define __INT64_FMTd__ "lld"
+// WEBASSEMBLY-NEXT:#define __INT64_FMTi__ "lli"
+// WEBASSEMBLY-NEXT:#define __INT64_MAX__ 9223372036854775807LL
+// WEBASSEMBLY-NEXT:#define __INT64_TYPE__ long long int
+// WEBASSEMBLY-NEXT:#define __INT8_C_SUFFIX__
+// WEBASSEMBLY-NEXT:#define __INT8_FMTd__ "hhd"
+// WEBASSEMBLY-NEXT:#define __INT8_FMTi__ "hhi"
+// WEBASSEMBLY-NEXT:#define __INT8_MAX__ 127
+// WEBASSEMBLY-NEXT:#define __INT8_TYPE__ signed char
+// WEBASSEMBLY-NEXT:#define __INTMAX_C_SUFFIX__ LL
+// WEBASSEMBLY-NEXT:#define __INTMAX_FMTd__ "lld"
+// WEBASSEMBLY-NEXT:#define __INTMAX_FMTi__ "lli"
+// WEBASSEMBLY-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL
+// WEBASSEMBLY-NEXT:#define __INTMAX_TYPE__ long long int
+// WEBASSEMBLY-NEXT:#define __INTMAX_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __INTPTR_FMTd__ "ld"
+// WEBASSEMBLY-NEXT:#define __INTPTR_FMTi__ "li"
 // WEBASSEMBLY32-NEXT:#define __INTPTR_MAX__ 2147483647L
-// WEBASSEMBLY32-NEXT:#define __INTPTR_TYPE__ long int
+// WEBASSEMBLY64-NEXT:#define __INTPTR_MAX__ 9223372036854775807L
+// WEBASSEMBLY-NEXT:#define __INTPTR_TYPE__ long int
 // WEBASSEMBLY32-NEXT:#define __INTPTR_WIDTH__ 32
-// WEBASSEMBLY32-NEXT:#define __INT_FAST16_FMTd__ "hd"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST16_FMTi__ "hi"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST16_MAX__ 32767
-// WEBASSEMBLY32-NEXT:#define __INT_FAST16_TYPE__ short
-// WEBASSEMBLY32-NEXT:#define __INT_FAST32_FMTd__ "d"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST32_FMTi__ "i"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST32_MAX__ 2147483647
-// WEBASSEMBLY32-NEXT:#define __INT_FAST32_TYPE__ int
-// WEBASSEMBLY32-NEXT:#define __INT_FAST64_FMTd__ "lld"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST64_FMTi__ "lli"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL
-// WEBASSEMBLY32-NEXT:#define __INT_FAST64_TYPE__ long long int
-// WEBASSEMBLY32-NEXT:#define __INT_FAST8_FMTd__ "hhd"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST8_FMTi__ "hhi"
-// WEBASSEMBLY32-NEXT:#define __INT_FAST8_MAX__ 127
-// WEBASSEMBLY32-NEXT:#define __INT_FAST8_TYPE__ signed char
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_FMTd__ "hd"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_FMTi__ "hi"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_MAX__ 32767
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_TYPE__ short
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_FMTd__ "d"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_FMTi__ "i"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_MAX__ 2147483647
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_TYPE__ int
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_FMTd__ "lld"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_FMTi__ "lli"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_TYPE__ long long int
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_FMTd__ "hhd"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_FMTi__ "hhi"
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_MAX__ 127
-// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_TYPE__ signed char
-// WEBASSEMBLY32-NEXT:#define __INT_MAX__ 2147483647
-// WEBASSEMBLY32-NEXT:#define __LDBL_DECIMAL_DIG__ 36
-// WEBASSEMBLY32-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L
-// WEBASSEMBLY32-NEXT:#define __LDBL_DIG__ 33
-// WEBASSEMBLY32-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L
-// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_DENORM__ 1
-// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_INFINITY__ 1
-// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1
-// WEBASSEMBLY32-NEXT:#define __LDBL_MANT_DIG__ 113
-// WEBASSEMBLY32-NEXT:#define __LDBL_MAX_10_EXP__ 4932
-// WEBASSEMBLY32-NEXT:#define __LDBL_MAX_EXP__ 16384
-// WEBASSEMBLY32-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L
-// WEBASSEMBLY32-NEXT:#define __LDBL_MIN_10_EXP__ (-4931)
-// WEBASSEMBLY32-NEXT:#define __LDBL_MIN_EXP__ (-16381)
-// WEBASSEMBLY32-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L
-// WEBASSEMBLY32-NEXT:#define __LITTLE_ENDIAN__ 1
-// WEBASSEMBLY32-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL
+// WEBASSEMBLY64-NEXT:#define __INTPTR_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __INT_FAST16_FMTd__ "hd"
+// WEBASSEMBLY-NEXT:#define __INT_FAST16_FMTi__ "hi"
+// WEBASSEMBLY-NEXT:#define __INT_FAST16_MAX__ 32767
+// WEBASSEMBLY-NEXT:#define __INT_FAST16_TYPE__ short
+// WEBASSEMBLY-NEXT:#define __INT_FAST32_FMTd__ "d"
+// WEBASSEMBLY-NEXT:#define __INT_FAST32_FMTi__ "i"
+// WEBASSEMBLY-NEXT:#define __INT_FAST32_MAX__ 2147483647
+// WEBASSEMBLY-NEXT:#define __INT_FAST32_TYPE__ int
+// WEBASSEMBLY-NEXT:#define __INT_FAST64_FMTd__ "lld"
+// WEBASSEMBLY-NEXT:#define __INT_FAST64_FMTi__ "lli"
+// WEBASSEMBLY-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL
+// WEBASSEMBLY-NEXT:#define __INT_FAST64_TYPE__ long long int
+// WEBASSEMBLY-NEXT:#define __INT_FAST8_FMTd__ "hhd"
+// WEBASSEMBLY-NEXT:#define __INT_FAST8_FMTi__ "hhi"
+// WEBASSEMBLY-NEXT:#define __INT_FAST8_MAX__ 127
+// WEBASSEMBLY-NEXT:#define __INT_FAST8_TYPE__ signed char
+// WEBASSEMBLY-NEXT:#define __INT_LEAST16_FMTd__ "hd"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST16_FMTi__ "hi"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST16_MAX__ 32767
+// WEBASSEMBLY-NEXT:#define __INT_LEAST16_TYPE__ short
+// WEBASSEMBLY-NEXT:#define __INT_LEAST32_FMTd__ "d"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST32_FMTi__ "i"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST32_MAX__ 2147483647
+// WEBASSEMBLY-NEXT:#define __INT_LEAST32_TYPE__ int
+// WEBASSEMBLY-NEXT:#define __INT_LEAST64_FMTd__ "lld"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST64_FMTi__ "lli"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL
+// WEBASSEMBLY-NEXT:#define __INT_LEAST64_TYPE__ long long int
+// WEBASSEMBLY-NEXT:#define __INT_LEAST8_FMTd__ "hhd"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST8_FMTi__ "hhi"
+// WEBASSEMBLY-NEXT:#define __INT_LEAST8_MAX__ 127
+// WEBASSEMBLY-NEXT:#define __INT_LEAST8_TYPE__ signed char
+// WEBASSEMBLY-NEXT:#define __INT_MAX__ 2147483647
+// WEBASSEMBLY-NEXT:#define __LDBL_DECIMAL_DIG__ 36
+// WEBASSEMBLY-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L
+// WEBASSEMBLY-NEXT:#define __LDBL_DIG__ 33
+// WEBASSEMBLY-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L
+// WEBASSEMBLY-NEXT:#define __LDBL_HAS_DENORM__ 1
+// WEBASSEMBLY-NEXT:#define __LDBL_HAS_INFINITY__ 1
+// WEBASSEMBLY-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1
+// WEBASSEMBLY-NEXT:#define __LDBL_MANT_DIG__ 113
+// WEBASSEMBLY-NEXT:#define __LDBL_MAX_10_EXP__ 4932
+// WEBASSEMBLY-NEXT:#define __LDBL_MAX_EXP__ 16384
+// WEBASSEMBLY-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L
+// WEBASSEMBLY-NEXT:#define __LDBL_MIN_10_EXP__ (-4931)
+// WEBASSEMBLY-NEXT:#define __LDBL_MIN_EXP__ (-16381)
+// WEBASSEMBLY-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L
+// WEBASSEMBLY-NEXT:#define __LITTLE_ENDIAN__ 1
+// WEBASSEMBLY-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL
 // WEBASSEMBLY32-NEXT:#define __LONG_MAX__ 2147483647L
 // WEBASSEMBLY32-NOT:#define __LP64__
-// WEBASSEMBLY32-NEXT:#define __NO_INLINE__ 1
-// WEBASSEMBLY32-NEXT:#define __OBJC_BOOL_IS_BOOL 0
-// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
-// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
-// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
-// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
-// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
-// WEBASSEMBLY32-NEXT:#define __ORDER_BIG_ENDIAN__ 4321
-// WEBASSEMBLY32-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234
-// WEBASSEMBLY32-NEXT:#define __ORDER_PDP_ENDIAN__ 3412
+// WEBASSEMBLY64-NEXT:#define __LONG_MAX__ 9223372036854775807L
+// WEBASSEMBLY64-NEXT:#define __LP64__ 1
+// WEBASSEMBLY-NEXT:#define __NO_INLINE__ 1
+// WEBASSEMBLY-NEXT:#define __OBJC_BOOL_IS_BOOL 0
+// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
+// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
+// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
+// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
+// WEBASSEMBLY-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
+// WEBASSEMBLY-NEXT:#define __ORDER_BIG_ENDIAN__ 4321
+// WEBASSEMBLY-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234
+// WEBASSEMBLY-NEXT:#define __ORDER_PDP_ENDIAN__ 3412
 // WEBASSEMBLY32-NEXT:#define __POINTER_WIDTH__ 32
-// WEBASSEMBLY32-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1
-// WEBASSEMBLY32-NEXT:#define __PTRDIFF_FMTd__ "ld"
-// WEBASSEMBLY32-NEXT:#define __PTRDIFF_FMTi__ "li"
+// WEBASSEMBLY64-NEXT:#define __POINTER_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1
+// WEBASSEMBLY-NEXT:#define __PTRDIFF_FMTd__ "ld"
+// WEBASSEMBLY-NEXT:#define __PTRDIFF_FMTi__ "li"
 // WEBASSEMBLY32-NEXT:#define __PTRDIFF_MAX__ 2147483647L
-// WEBASSEMBLY32-NEXT:#define __PTRDIFF_TYPE__ long int
+// WEBASSEMBLY64-NEXT:#define __PTRDIFF_MAX__ 9223372036854775807L
+// WEBASSEMBLY-NEXT:#define __PTRDIFF_TYPE__ long int
 // WEBASSEMBLY32-NEXT:#define __PTRDIFF_WIDTH__ 32
-// WEBASSEMBLY32-NOT:#define __REGISTER_PREFIX__
-// WEBASSEMBLY32-NEXT:#define __SCHAR_MAX__ 127
-// WEBASSEMBLY32-NEXT:#define __SHRT_MAX__ 32767
+// WEBASSEMBLY64-NEXT:#define __PTRDIFF_WIDTH__ 64
+// WEBASSEMBLY-NOT:#define __REGISTER_PREFIX__
+// WEBASSEMBLY-NEXT:#define __SCHAR_MAX__ 127
+// WEBASSEMBLY-NEXT:#define __SHRT_MAX__ 32767
 // WEBASSEMBLY32-NEXT:#define __SIG_ATOMIC_MAX__ 2147483647L
 // WEBASSEMBLY32-NEXT:#define __SIG_ATOMIC_WIDTH__ 32
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_DOUBLE__ 8
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_FLOAT__ 4
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_INT128__ 16
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_INT__ 4
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG_LONG__ 8
+// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_MAX__ 9223372036854775807L
+// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __SIZEOF_DOUBLE__ 8
+// WEBASSEMBLY-NEXT:#define __SIZEOF_FLOAT__ 4
+// WEBASSEMBLY-NEXT:#define __SIZEOF_INT128__ 16
+// WEBASSEMBLY-NEXT:#define __SIZEOF_INT__ 4
+// WEBASSEMBLY-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16
+// WEBASSEMBLY-NEXT:#define __SIZEOF_LONG_LONG__ 8
 // WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG__ 4
 // WEBASSEMBLY32-NEXT:#define __SIZEOF_POINTER__ 4
 // WEBASSEMBLY32-NEXT:#define __SIZEOF_PTRDIFF_T__ 4
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_SHORT__ 2
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_SIZE_T__ 4
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_WCHAR_T__ 4
-// WEBASSEMBLY32-NEXT:#define __SIZEOF_WINT_T__ 4
-// WEBASSEMBLY32-NEXT:#define __SIZE_FMTX__ "lX"
-// WEBASSEMBLY32-NEXT:#define __SIZE_FMTo__ "lo"
-// WEBASSEMBLY32-NEXT:#define __SIZE_FMTu__ "lu"
-// WEBASSEMBLY32-NEXT:#define __SIZE_FMTx__ "lx"
-// WEBASSEMBLY32-NEXT:#define __SIZE_MAX__ 4294967295UL
-// WEBASSEMBLY32-NEXT:#define __SIZE_TYPE__ long unsigned int
-// WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
-// WEBASSEMBLY32-NEXT:#define __STDC_HOSTED__ 0
-// WEBASSEMBLY32-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
-// WEBASSEMBLY32-NOT:#define __STDC_NO_ATOMICS__
-// WEBASSEMBLY32-NOT:#define __STDC_NO_COMPLEX__
-// WEBASSEMBLY32-NOT:#define __STDC_NO_VLA__
-// WEBASSEMBLY32-NOT:#define __STDC_NO_THREADS__
-// WEBASSEMBLY32-NEXT:#define __STDC_UTF_16__ 1
-// WEBASSEMBLY32-NEXT:#define __STDC_UTF_32__ 1
-// WEBASSEMBLY32-NEXT:#define __STDC_VERSION__ 201112L
-// WEBASSEMBLY32-NEXT:#define __STDC__ 1
-// WEBASSEMBLY32-NEXT:#define __UINT16_C_SUFFIX__
-// WEBASSEMBLY32-NEXT:#define __UINT16_FMTX__ "hX"
-// WEBASSEMBLY32-NEXT:#define __UINT16_FMTo__ "ho"
-// WEBASSEMBLY32-NEXT:#define __UINT16_FMTu__ "hu"
-// WEBASSEMBLY32-NEXT:#define __UINT16_FMTx__ "hx"
-// WEBASSEMBLY32-NEXT:#define __UINT16_MAX__ 65535
-// WEBASSEMBLY32-NEXT:#define __UINT16_TYPE__ unsigned short
-// WEBASSEMBLY32-NEXT:#define __UINT32_C_SUFFIX__ U
-// WEBASSEMBLY32-NEXT:#define __UINT32_FMTX__ "X"
-// WEBASSEMBLY32-NEXT:#define __UINT32_FMTo__ "o"
-// WEBASSEMBLY32-NEXT:#define __UINT32_FMTu__ "u"
-// WEBASSEMBLY32-NEXT:#define __UINT32_FMTx__ "x"
-// WEBASSEMBLY32-NEXT:#define __UINT32_MAX__ 4294967295U
-// WEBASSEMBLY32-NEXT:#define __UINT32_TYPE__ unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINT64_C_SUFFIX__ ULL
-// WEBASSEMBLY32-NEXT:#define __UINT64_FMTX__ "llX"
-// WEBASSEMBLY32-NEXT:#define __UINT64_FMTo__ "llo"
-// WEBASSEMBLY32-NEXT:#define __UINT64_FMTu__ "llu"
-// WEBASSEMBLY32-NEXT:#define __UINT64_FMTx__ "llx"
-// WEBASSEMBLY32-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY32-NEXT:#define __UINT64_TYPE__ long long unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINT8_C_SUFFIX__
-// WEBASSEMBLY32-NEXT:#define __UINT8_FMTX__ "hhX"
-// WEBASSEMBLY32-NEXT:#define __UINT8_FMTo__ "hho"
-// WEBASSEMBLY32-NEXT:#define __UINT8_FMTu__ "hhu"
-// WEBASSEMBLY32-NEXT:#define __UINT8_FMTx__ "hhx"
-// WEBASSEMBLY32-NEXT:#define __UINT8_MAX__ 255
-// WEBASSEMBLY32-NEXT:#define __UINT8_TYPE__ unsigned char
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_C_SUFFIX__ ULL
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTX__ "llX"
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTo__ "llo"
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTu__ "llu"
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTx__ "llx"
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_TYPE__ long long unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINTMAX_WIDTH__ 64
-// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTX__ "lX"
-// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTo__ "lo"
-// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTu__ "lu"
-// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTx__ "lx"
-// WEBASSEMBLY32-NEXT:#define __UINTPTR_MAX__ 4294967295UL
-// WEBASSEMBLY32-NEXT:#define __UINTPTR_TYPE__ long unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINTPTR_WIDTH__ 32
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTX__ "hX"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTo__ "ho"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTu__ "hu"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTx__ "hx"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_MAX__ 65535
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_TYPE__ unsigned short
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTX__ "X"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTo__ "o"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTu__ "u"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTx__ "x"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_MAX__ 4294967295U
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_TYPE__ unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTX__ "llX"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTo__ "llo"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTu__ "llu"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTx__ "llx"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTX__ "hhX"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTo__ "hho"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTu__ "hhu"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTx__ "hhx"
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_MAX__ 255
-// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_TYPE__ unsigned char
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTX__ "hX"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTo__ "ho"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTu__ "hu"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTx__ "hx"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_MAX__ 65535
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTX__ "X"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTo__ "o"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTu__ "u"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTx__ "x"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTX__ "llX"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTo__ "llo"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTu__ "llu"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTx__ "llx"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTX__ "hhX"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTo__ "hho"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTu__ "hhu"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTx__ "hhx"
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_MAX__ 255
-// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char
-// WEBASSEMBLY32-NEXT:#define __USER_LABEL_PREFIX__
-// WEBASSEMBLY32-NEXT:#define __VERSION__ "{{.*}}"
-// WEBASSEMBLY32-NEXT:#define __WCHAR_MAX__ 2147483647
-// WEBASSEMBLY32-NEXT:#define __WCHAR_TYPE__ int
-// WEBASSEMBLY32-NOT:#define __WCHAR_UNSIGNED__
-// WEBASSEMBLY32-NEXT:#define __WCHAR_WIDTH__ 32
-// WEBASSEMBLY32-NEXT:#define __WINT_MAX__ 2147483647
-// WEBASSEMBLY32-NEXT:#define __WINT_TYPE__ int
-// WEBASSEMBLY32-NOT:#define __WINT_UNSIGNED__
-// WEBASSEMBLY32-NEXT:#define __WINT_WIDTH__ 32
-// WEBASSEMBLY32-NEXT:#define __clang__ 1
-// WEBASSEMBLY32-NEXT:#define __clang_major__ {{.*}}
-// WEBASSEMBLY32-NEXT:#define __clang_minor__ {{.*}}
-// WEBASSEMBLY32-NEXT:#define __clang_patchlevel__ {{.*}}
-// WEBASSEMBLY32-NEXT:#define __clang_version__ "{{.*}}"
-// WEBASSEMBLY32-NEXT:#define __llvm__ 1
-// WEBASSEMBLY32-NOT:#define __wasm_simd128__
-// WEBASSEMBLY32-NOT:#define __wasm_simd256__
-// WEBASSEMBLY32-NOT:#define __wasm_simd512__
-// WEBASSEMBLY32-NOT:#define __unix
-// WEBASSEMBLY32-NOT:#define __unix__
-// WEBASSEMBLY32-NEXT:#define __wasm 1
-// WEBASSEMBLY32-NEXT:#define __wasm32 1
-// WEBASSEMBLY32-NEXT:#define __wasm32__ 1
-// WEBASSEMBLY32-NOT:#define __wasm64
-// WEBASSEMBLY32-NOT:#define __wasm64__
-// WEBASSEMBLY32-NEXT:#define __wasm__ 1
-//
-// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-unknown \
-// RUN:   < /dev/null \
-// RUN:   | FileCheck -match-full-lines -check-prefix=WEBASSEMBLY64 %s
-//
-// WEBASSEMBLY64-NOT:#define _ILP32
-// WEBASSEMBLY64:#define _LP64 1
-// WEBASSEMBLY64-NEXT:#define __ATOMIC_ACQUIRE 2
-// WEBASSEMBLY64-NEXT:#define __ATOMIC_ACQ_REL 4
-// WEBASSEMBLY64-NEXT:#define __ATOMIC_CONSUME 1
-// WEBASSEMBLY64-NEXT:#define __ATOMIC_RELAXED 0
-// WEBASSEMBLY64-NEXT:#define __ATOMIC_RELEASE 3
-// WEBASSEMBLY64-NEXT:#define __ATOMIC_SEQ_CST 5
-// WEBASSEMBLY64-NEXT:#define __BIGGEST_ALIGNMENT__ 16
-// WEBASSEMBLY64-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
-// WEBASSEMBLY64-NEXT:#define __CHAR16_TYPE__ unsigned short
-// WEBASSEMBLY64-NEXT:#define __CHAR32_TYPE__ unsigned int
-// WEBASSEMBLY64-NEXT:#define __CHAR_BIT__ 8
-// WEBASSEMBLY64-NOT:#define __CHAR_UNSIGNED__
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_BOOL_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_CHAR_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_INT_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_LLONG_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_LONG_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_POINTER_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_SHORT_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __CONSTANT_CFSTRINGS__ 1
-// WEBASSEMBLY64-NEXT:#define __DBL_DECIMAL_DIG__ 17
-// WEBASSEMBLY64-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324
-// WEBASSEMBLY64-NEXT:#define __DBL_DIG__ 15
-// WEBASSEMBLY64-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16
-// WEBASSEMBLY64-NEXT:#define __DBL_HAS_DENORM__ 1
-// WEBASSEMBLY64-NEXT:#define __DBL_HAS_INFINITY__ 1
-// WEBASSEMBLY64-NEXT:#define __DBL_HAS_QUIET_NAN__ 1
-// WEBASSEMBLY64-NEXT:#define __DBL_MANT_DIG__ 53
-// WEBASSEMBLY64-NEXT:#define __DBL_MAX_10_EXP__ 308
-// WEBASSEMBLY64-NEXT:#define __DBL_MAX_EXP__ 1024
-// WEBASSEMBLY64-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308
-// WEBASSEMBLY64-NEXT:#define __DBL_MIN_10_EXP__ (-307)
-// WEBASSEMBLY64-NEXT:#define __DBL_MIN_EXP__ (-1021)
-// WEBASSEMBLY64-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308
-// WEBASSEMBLY64-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__
-// WEBASSEMBLY64-NOT:#define __ELF__
-// WEBASSEMBLY64-NEXT:#define __FINITE_MATH_ONLY__ 0
-// WEBASSEMBLY64:#define __FLT_DECIMAL_DIG__ 9
-// WEBASSEMBLY64-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F
-// WEBASSEMBLY64-NEXT:#define __FLT_DIG__ 6
-// WEBASSEMBLY64-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F
-// WEBASSEMBLY64-NEXT:#define __FLT_EVAL_METHOD__ 0
-// WEBASSEMBLY64-NEXT:#define __FLT_HAS_DENORM__ 1
-// WEBASSEMBLY64-NEXT:#define __FLT_HAS_INFINITY__ 1
-// WEBASSEMBLY64-NEXT:#define __FLT_HAS_QUIET_NAN__ 1
-// WEBASSEMBLY64-NEXT:#define __FLT_MANT_DIG__ 24
-// WEBASSEMBLY64-NEXT:#define __FLT_MAX_10_EXP__ 38
-// WEBASSEMBLY64-NEXT:#define __FLT_MAX_EXP__ 128
-// WEBASSEMBLY64-NEXT:#define __FLT_MAX__ 3.40282347e+38F
-// WEBASSEMBLY64-NEXT:#define __FLT_MIN_10_EXP__ (-37)
-// WEBASSEMBLY64-NEXT:#define __FLT_MIN_EXP__ (-125)
-// WEBASSEMBLY64-NEXT:#define __FLT_MIN__ 1.17549435e-38F
-// WEBASSEMBLY64-NEXT:#define __FLT_RADIX__ 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
-// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
-// WEBASSEMBLY64-NEXT:#define __GNUC_MINOR__ {{.*}}
-// WEBASSEMBLY64-NEXT:#define __GNUC_PATCHLEVEL__ {{.*}}
-// WEBASSEMBLY64-NEXT:#define __GNUC_STDC_INLINE__ 1
-// WEBASSEMBLY64-NEXT:#define __GNUC__ {{.}}
-// WEBASSEMBLY64-NEXT:#define __GXX_ABI_VERSION 1002
-// WEBASSEMBLY64-NOT:#define __ILP32__
-// WEBASSEMBLY64-NEXT:#define __INT16_C_SUFFIX__
-// WEBASSEMBLY64-NEXT:#define __INT16_FMTd__ "hd"
-// WEBASSEMBLY64-NEXT:#define __INT16_FMTi__ "hi"
-// WEBASSEMBLY64-NEXT:#define __INT16_MAX__ 32767
-// WEBASSEMBLY64-NEXT:#define __INT16_TYPE__ short
-// WEBASSEMBLY64-NEXT:#define __INT32_C_SUFFIX__
-// WEBASSEMBLY64-NEXT:#define __INT32_FMTd__ "d"
-// WEBASSEMBLY64-NEXT:#define __INT32_FMTi__ "i"
-// WEBASSEMBLY64-NEXT:#define __INT32_MAX__ 2147483647
-// WEBASSEMBLY64-NEXT:#define __INT32_TYPE__ int
-// WEBASSEMBLY64-NEXT:#define __INT64_C_SUFFIX__ LL
-// WEBASSEMBLY64-NEXT:#define __INT64_FMTd__ "lld"
-// WEBASSEMBLY64-NEXT:#define __INT64_FMTi__ "lli"
-// WEBASSEMBLY64-NEXT:#define __INT64_MAX__ 9223372036854775807LL
-// WEBASSEMBLY64-NEXT:#define __INT64_TYPE__ long long int
-// WEBASSEMBLY64-NEXT:#define __INT8_C_SUFFIX__
-// WEBASSEMBLY64-NEXT:#define __INT8_FMTd__ "hhd"
-// WEBASSEMBLY64-NEXT:#define __INT8_FMTi__ "hhi"
-// WEBASSEMBLY64-NEXT:#define __INT8_MAX__ 127
-// WEBASSEMBLY64-NEXT:#define __INT8_TYPE__ signed char
-// WEBASSEMBLY64-NEXT:#define __INTMAX_C_SUFFIX__ LL
-// WEBASSEMBLY64-NEXT:#define __INTMAX_FMTd__ "lld"
-// WEBASSEMBLY64-NEXT:#define __INTMAX_FMTi__ "lli"
-// WEBASSEMBLY64-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL
-// WEBASSEMBLY64-NEXT:#define __INTMAX_TYPE__ long long int
-// WEBASSEMBLY64-NEXT:#define __INTMAX_WIDTH__ 64
-// WEBASSEMBLY64-NEXT:#define __INTPTR_FMTd__ "ld"
-// WEBASSEMBLY64-NEXT:#define __INTPTR_FMTi__ "li"
-// WEBASSEMBLY64-NEXT:#define __INTPTR_MAX__ 9223372036854775807L
-// WEBASSEMBLY64-NEXT:#define __INTPTR_TYPE__ long int
-// WEBASSEMBLY64-NEXT:#define __INTPTR_WIDTH__ 64
-// WEBASSEMBLY64-NEXT:#define __INT_FAST16_FMTd__ "hd"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST16_FMTi__ "hi"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST16_MAX__ 32767
-// WEBASSEMBLY64-NEXT:#define __INT_FAST16_TYPE__ short
-// WEBASSEMBLY64-NEXT:#define __INT_FAST32_FMTd__ "d"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST32_FMTi__ "i"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST32_MAX__ 2147483647
-// WEBASSEMBLY64-NEXT:#define __INT_FAST32_TYPE__ int
-// WEBASSEMBLY64-NEXT:#define __INT_FAST64_FMTd__ "lld"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST64_FMTi__ "lli"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL
-// WEBASSEMBLY64-NEXT:#define __INT_FAST64_TYPE__ long long int
-// WEBASSEMBLY64-NEXT:#define __INT_FAST8_FMTd__ "hhd"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST8_FMTi__ "hhi"
-// WEBASSEMBLY64-NEXT:#define __INT_FAST8_MAX__ 127
-// WEBASSEMBLY64-NEXT:#define __INT_FAST8_TYPE__ signed char
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_FMTd__ "hd"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_FMTi__ "hi"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_MAX__ 32767
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_TYPE__ short
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_FMTd__ "d"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_FMTi__ "i"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_MAX__ 2147483647
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_TYPE__ int
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_FMTd__ "lld"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_FMTi__ "lli"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_TYPE__ long long int
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_FMTd__ "hhd"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_FMTi__ "hhi"
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_MAX__ 127
-// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_TYPE__ signed char
-// WEBASSEMBLY64-NEXT:#define __INT_MAX__ 2147483647
-// WEBASSEMBLY64-NEXT:#define __LDBL_DECIMAL_DIG__ 36
-// WEBASSEMBLY64-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L
-// WEBASSEMBLY64-NEXT:#define __LDBL_DIG__ 33
-// WEBASSEMBLY64-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L
-// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_DENORM__ 1
-// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_INFINITY__ 1
-// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1
-// WEBASSEMBLY64-NEXT:#define __LDBL_MANT_DIG__ 113
-// WEBASSEMBLY64-NEXT:#define __LDBL_MAX_10_EXP__ 4932
-// WEBASSEMBLY64-NEXT:#define __LDBL_MAX_EXP__ 16384
-// WEBASSEMBLY64-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L
-// WEBASSEMBLY64-NEXT:#define __LDBL_MIN_10_EXP__ (-4931)
-// WEBASSEMBLY64-NEXT:#define __LDBL_MIN_EXP__ (-16381)
-// WEBASSEMBLY64-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L
-// WEBASSEMBLY64-NEXT:#define __LITTLE_ENDIAN__ 1
-// WEBASSEMBLY64-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL
-// WEBASSEMBLY64-NEXT:#define __LONG_MAX__ 9223372036854775807L
-// WEBASSEMBLY64-NEXT:#define __LP64__ 1
-// WEBASSEMBLY64-NEXT:#define __NO_INLINE__ 1
-// WEBASSEMBLY64-NEXT:#define __OBJC_BOOL_IS_BOOL 0
-// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
-// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
-// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
-// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
-// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
-// WEBASSEMBLY64-NEXT:#define __ORDER_BIG_ENDIAN__ 4321
-// WEBASSEMBLY64-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234
-// WEBASSEMBLY64-NEXT:#define __ORDER_PDP_ENDIAN__ 3412
-// WEBASSEMBLY64-NEXT:#define __POINTER_WIDTH__ 64
-// WEBASSEMBLY64-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1
-// WEBASSEMBLY64-NEXT:#define __PTRDIFF_FMTd__ "ld"
-// WEBASSEMBLY64-NEXT:#define __PTRDIFF_FMTi__ "li"
-// WEBASSEMBLY64-NEXT:#define __PTRDIFF_MAX__ 9223372036854775807L
-// WEBASSEMBLY64-NEXT:#define __PTRDIFF_TYPE__ long int
-// WEBASSEMBLY64-NEXT:#define __PTRDIFF_WIDTH__ 64
-// WEBASSEMBLY64-NOT:#define __REGISTER_PREFIX__
-// WEBASSEMBLY64-NEXT:#define __SCHAR_MAX__ 127
-// WEBASSEMBLY64-NEXT:#define __SHRT_MAX__ 32767
-// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_MAX__ 9223372036854775807L
-// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_WIDTH__ 64
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_DOUBLE__ 8
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_FLOAT__ 4
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_INT128__ 16
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_INT__ 4
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG_LONG__ 8
 // WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG__ 8
 // WEBASSEMBLY64-NEXT:#define __SIZEOF_POINTER__ 8
 // WEBASSEMBLY64-NEXT:#define __SIZEOF_PTRDIFF_T__ 8
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_SHORT__ 2
+// WEBASSEMBLY-NEXT:#define __SIZEOF_SHORT__ 2
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_SIZE_T__ 4
 // WEBASSEMBLY64-NEXT:#define __SIZEOF_SIZE_T__ 8
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_WCHAR_T__ 4
-// WEBASSEMBLY64-NEXT:#define __SIZEOF_WINT_T__ 4
-// WEBASSEMBLY64-NEXT:#define __SIZE_FMTX__ "lX"
-// WEBASSEMBLY64-NEXT:#define __SIZE_FMTo__ "lo"
-// WEBASSEMBLY64-NEXT:#define __SIZE_FMTu__ "lu"
-// WEBASSEMBLY64-NEXT:#define __SIZE_FMTx__ "lx"
+// WEBASSEMBLY-NEXT:#define __SIZEOF_WCHAR_T__ 4
+// WEBASSEMBLY-NEXT:#define __SIZEOF_WINT_T__ 4
+// WEBASSEMBLY-NEXT:#define __SIZE_FMTX__ "lX"
+// WEBASSEMBLY-NEXT:#define __SIZE_FMTo__ "lo"
+// WEBASSEMBLY-NEXT:#define __SIZE_FMTu__ "lu"
+// WEBASSEMBLY-NEXT:#define __SIZE_FMTx__ "lx"
+// WEBASSEMBLY32-NEXT:#define __SIZE_MAX__ 4294967295UL
 // WEBASSEMBLY64-NEXT:#define __SIZE_MAX__ 18446744073709551615UL
-// WEBASSEMBLY64-NEXT:#define __SIZE_TYPE__ long unsigned int
+// WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int
+// WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
 // WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64
-// WEBASSEMBLY64-NEXT:#define __STDC_HOSTED__ 0
-// WEBASSEMBLY64-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
-// WEBASSEMBLY64-NOT:#define __STDC_NO_ATOMICS__
-// WEBASSEMBLY64-NOT:#define __STDC_NO_COMPLEX__
-// WEBASSEMBLY64-NOT:#define __STDC_NO_VLA__
-// WEBASSEMBLY64-NOT:#define __STDC_NO_THREADS__
-// WEBASSEMBLY64-NEXT:#define __STDC_UTF_16__ 1
-// WEBASSEMBLY64-NEXT:#define __STDC_UTF_32__ 1
-// WEBASSEMBLY64-NEXT:#define __STDC_VERSION__ 201112L
-// WEBASSEMBLY64-NEXT:#define __STDC__ 1
-// WEBASSEMBLY64-NEXT:#define __UINT16_C_SUFFIX__
-// WEBASSEMBLY64-NEXT:#define __UINT16_FMTX__ "hX"
-// WEBASSEMBLY64-NEXT:#define __UINT16_FMTo__ "ho"
-// WEBASSEMBLY64-NEXT:#define __UINT16_FMTu__ "hu"
-// WEBASSEMBLY64-NEXT:#define __UINT16_FMTx__ "hx"
-// WEBASSEMBLY64-NEXT:#define __UINT16_MAX__ 65535
-// WEBASSEMBLY64-NEXT:#define __UINT16_TYPE__ unsigned short
-// WEBASSEMBLY64-NEXT:#define __UINT32_C_SUFFIX__ U
-// WEBASSEMBLY64-NEXT:#define __UINT32_FMTX__ "X"
-// WEBASSEMBLY64-NEXT:#define __UINT32_FMTo__ "o"
-// WEBASSEMBLY64-NEXT:#define __UINT32_FMTu__ "u"
-// WEBASSEMBLY64-NEXT:#define __UINT32_FMTx__ "x"
-// WEBASSEMBLY64-NEXT:#define __UINT32_MAX__ 4294967295U
-// WEBASSEMBLY64-NEXT:#define __UINT32_TYPE__ unsigned int
-// WEBASSEMBLY64-NEXT:#define __UINT64_C_SUFFIX__ ULL
-// WEBASSEMBLY64-NEXT:#define __UINT64_FMTX__ "llX"
-// WEBASSEMBLY64-NEXT:#define __UINT64_FMTo__ "llo"
-// WEBASSEMBLY64-NEXT:#define __UINT64_FMTu__ "llu"
-// WEBASSEMBLY64-NEXT:#define __UINT64_FMTx__ "llx"
-// WEBASSEMBLY64-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY64-NEXT:#define __UINT64_TYPE__ long long unsigned int
-// WEBASSEMBLY64-NEXT:#define __UINT8_C_SUFFIX__
-// WEBASSEMBLY64-NEXT:#define __UINT8_FMTX__ "hhX"
-// WEBASSEMBLY64-NEXT:#define __UINT8_FMTo__ "hho"
-// WEBASSEMBLY64-NEXT:#define __UINT8_FMTu__ "hhu"
-// WEBASSEMBLY64-NEXT:#define __UINT8_FMTx__ "hhx"
-// WEBASSEMBLY64-NEXT:#define __UINT8_MAX__ 255
-// WEBASSEMBLY64-NEXT:#define __UINT8_TYPE__ unsigned char
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_C_SUFFIX__ ULL
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTX__ "llX"
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTo__ "llo"
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTu__ "llu"
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTx__ "llx"
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_TYPE__ long long unsigned int
-// WEBASSEMBLY64-NEXT:#define __UINTMAX_WIDTH__ 64
-// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTX__ "lX"
-// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTo__ "lo"
-// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTu__ "lu"
-// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTx__ "lx"
+// WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0
+// WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
+// WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__
+// WEBASSEMBLY-NOT:#define __STDC_NO_COMPLEX__
+// WEBASSEMBLY-NOT:#define __STDC_NO_VLA__
+// WEBASSEMBLY-NOT:#define __STDC_NO_THREADS__
+// WEBASSEMBLY-NEXT:#define __STDC_UTF_16__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_UTF_32__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_VERSION__ 201112L
+// WEBASSEMBLY-NEXT:#define __STDC__ 1
+// WEBASSEMBLY-NEXT:#define __UINT16_C_SUFFIX__
+// WEBASSEMBLY-NEXT:#define __UINT16_FMTX__ "hX"
+// WEBASSEMBLY-NEXT:#define __UINT16_FMTo__ "ho"
+// WEBASSEMBLY-NEXT:#define __UINT16_FMTu__ "hu"
+// WEBASSEMBLY-NEXT:#define __UINT16_FMTx__ "hx"
+// WEBASSEMBLY-NEXT:#define __UINT16_MAX__ 65535
+// WEBASSEMBLY-NEXT:#define __UINT16_TYPE__ unsigned short
+// WEBASSEMBLY-NEXT:#define __UINT32_C_SUFFIX__ U
+// WEBASSEMBLY-NEXT:#define __UINT32_FMTX__ "X"
+// WEBASSEMBLY-NEXT:#define __UINT32_FMTo__ "o"
+// WEBASSEMBLY-NEXT:#define __UINT32_FMTu__ "u"
+// WEBASSEMBLY-NEXT:#define __UINT32_FMTx__ "x"
+// WEBASSEMBLY-NEXT:#define __UINT32_MAX__ 4294967295U
+// WEBASSEMBLY-NEXT:#define __UINT32_TYPE__ unsigned int
+// WEBASSEMBLY-NEXT:#define __UINT64_C_SUFFIX__ ULL
+// WEBASSEMBLY-NEXT:#define __UINT64_FMTX__ "llX"
+// WEBASSEMBLY-NEXT:#define __UINT64_FMTo__ "llo"
+// WEBASSEMBLY-NEXT:#define __UINT64_FMTu__ "llu"
+// WEBASSEMBLY-NEXT:#define __UINT64_FMTx__ "llx"
+// WEBASSEMBLY-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL
+// WEBASSEMBLY-NEXT:#define __UINT64_TYPE__ long long unsigned int
+// WEBASSEMBLY-NEXT:#define __UINT8_C_SUFFIX__
+// WEBASSEMBLY-NEXT:#define __UINT8_FMTX__ "hhX"
+// WEBASSEMBLY-NEXT:#define __UINT8_FMTo__ "hho"
+// WEBASSEMBLY-NEXT:#define __UINT8_FMTu__ "hhu"
+// WEBASSEMBLY-NEXT:#define __UINT8_FMTx__ "hhx"
+// WEBASSEMBLY-NEXT:#define __UINT8_MAX__ 255
+// WEBASSEMBLY-NEXT:#define __UINT8_TYPE__ unsigned char
+// WEBASSEMBLY-NEXT:#define __UINTMAX_C_SUFFIX__ ULL
+// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTX__ "llX"
+// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTo__ "llo"
+// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTu__ "llu"
+// WEBASSEMBLY-NEXT:#define __UINTMAX_FMTx__ "llx"
+// WEBASSEMBLY-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL
+// WEBASSEMBLY-NEXT:#define __UINTMAX_TYPE__ long long unsigned int
+// WEBASSEMBLY-NEXT:#define __UINTMAX_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTX__ "lX"
+// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTo__ "lo"
+// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTu__ "lu"
+// WEBASSEMBLY-NEXT:#define __UINTPTR_FMTx__ "lx"
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_MAX__ 4294967295UL
 // WEBASSEMBLY64-NEXT:#define __UINTPTR_MAX__ 18446744073709551615UL
-// WEBASSEMBLY64-NEXT:#define __UINTPTR_TYPE__ long unsigned int
+// WEBASSEMBLY-NEXT:#define __UINTPTR_TYPE__ long unsigned int
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_WIDTH__ 32
 // WEBASSEMBLY64-NEXT:#define __UINTPTR_WIDTH__ 64
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTX__ "hX"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTo__ "ho"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTu__ "hu"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTx__ "hx"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_MAX__ 65535
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_TYPE__ unsigned short
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTX__ "X"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTo__ "o"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTu__ "u"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTx__ "x"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_MAX__ 4294967295U
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_TYPE__ unsigned int
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTX__ "llX"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTo__ "llo"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTu__ "llu"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTx__ "llx"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTX__ "hhX"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTo__ "hho"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTu__ "hhu"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTx__ "hhx"
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_MAX__ 255
-// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_TYPE__ unsigned char
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTX__ "hX"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTo__ "ho"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTu__ "hu"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTx__ "hx"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_MAX__ 65535
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTX__ "X"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTo__ "o"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTu__ "u"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTx__ "x"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTX__ "llX"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTo__ "llo"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTu__ "llu"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTx__ "llx"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTX__ "hhX"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTo__ "hho"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTu__ "hhu"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTx__ "hhx"
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_MAX__ 255
-// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char
-// WEBASSEMBLY64-NEXT:#define __USER_LABEL_PREFIX__
-// WEBASSEMBLY64-NEXT:#define __VERSION__ "{{.*}}"
-// WEBASSEMBLY64-NEXT:#define __WCHAR_MAX__ 2147483647
-// WEBASSEMBLY64-NEXT:#define __WCHAR_TYPE__ int
-// WEBASSEMBLY64-NOT:#define __WCHAR_UNSIGNED__
-// WEBASSEMBLY64-NEXT:#define __WCHAR_WIDTH__ 32
-// WEBASSEMBLY64-NEXT:#define __WINT_MAX__ 2147483647
-// WEBASSEMBLY64-NEXT:#define __WINT_TYPE__ int
-// WEBASSEMBLY64-NOT:#define __WINT_UNSIGNED__
-// WEBASSEMBLY64-NEXT:#define __WINT_WIDTH__ 32
-// WEBASSEMBLY64-NEXT:#define __clang__ 1
-// WEBASSEMBLY64-NEXT:#define __clang_major__ {{.*}}
-// WEBASSEMBLY64-NEXT:#define __clang_minor__ {{.*}}
-// WEBASSEMBLY64-NEXT:#define __clang_patchlevel__ {{.*}}
-// WEBASSEMBLY64-NEXT:#define __clang_version__ "{{.*}}"
-// WEBASSEMBLY64-NEXT:#define __llvm__ 1
-// WEBASSEMBLY64-NOT:#define __wasm_simd128__
-// WEBASSEMBLY64-NOT:#define __wasm_simd256__
-// WEBASSEMBLY64-NOT:#define __wasm_simd512__
-// WEBASSEMBLY64-NOT:#define __unix
-// WEBASSEMBLY64-NOT:#define __unix__
-// WEBASSEMBLY64-NEXT:#define __wasm 1
+// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTX__ "hX"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTo__ "ho"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTu__ "hu"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST16_FMTx__ "hx"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST16_MAX__ 65535
+// WEBASSEMBLY-NEXT:#define __UINT_FAST16_TYPE__ unsigned short
+// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTX__ "X"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTo__ "o"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTu__ "u"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST32_FMTx__ "x"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST32_MAX__ 4294967295U
+// WEBASSEMBLY-NEXT:#define __UINT_FAST32_TYPE__ unsigned int
+// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTX__ "llX"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTo__ "llo"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTu__ "llu"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST64_FMTx__ "llx"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL
+// WEBASSEMBLY-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int
+// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTX__ "hhX"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTo__ "hho"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTu__ "hhu"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST8_FMTx__ "hhx"
+// WEBASSEMBLY-NEXT:#define __UINT_FAST8_MAX__ 255
+// WEBASSEMBLY-NEXT:#define __UINT_FAST8_TYPE__ unsigned char
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTX__ "hX"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTo__ "ho"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTu__ "hu"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_FMTx__ "hx"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_MAX__ 65535
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTX__ "X"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTo__ "o"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTu__ "u"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_FMTx__ "x"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTX__ "llX"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTo__ "llo"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTu__ "llu"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_FMTx__ "llx"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTX__ "hhX"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTo__ "hho"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTu__ "hhu"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_FMTx__ "hhx"
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_MAX__ 255
+// WEBASSEMBLY-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char
+// WEBASSEMBLY-NEXT:#define __USER_LABEL_PREFIX__
+// WEBASSEMBLY-NEXT:#define __VERSION__ "{{.*}}"
+// WEBASSEMBLY-NEXT:#define __WCHAR_MAX__ 2147483647
+// WEBASSEMBLY-NEXT:#define __WCHAR_TYPE__ int
+// WEBASSEMBLY-NOT:#define __WCHAR_UNSIGNED__
+// WEBASSEMBLY-NEXT:#define __WCHAR_WIDTH__ 32
+// WEBASSEMBLY-NEXT:#define __WINT_MAX__ 2147483647
+// WEBASSEMBLY-NEXT:#define __WINT_TYPE__ int
+// WEBASSEMBLY-NOT:#define __WINT_UNSIGNED__
+// WEBASSEMBLY-NEXT:#define __WINT_WIDTH__ 32
+// WEBASSEMBLY-NEXT:#define __clang__ 1
+// WEBASSEMBLY-NEXT:#define __clang_major__ {{.*}}
+// WEBASSEMBLY-NEXT:#define __clang_minor__ {{.*}}
+// WEBASSEMBLY-NEXT:#define __clang_patchlevel__ {{.*}}
+// WEBASSEMBLY-NEXT:#define __clang_version__ "{{.*}}"
+// WEBASSEMBLY-NEXT:#define __llvm__ 1
+// WEBASSEMBLY-NOT:#define __unix
+// WEBASSEMBLY-NOT:#define __unix__
+// WEBASSEMBLY-WASI-NEXT:#define __wasi__ 1
+// WEBASSEMBLY-NOT:#define __wasm_simd128__
+// WEBASSEMBLY-NOT:#define __wasm_simd256__
+// WEBASSEMBLY-NOT:#define __wasm_simd512__
+// WEBASSEMBLY-NEXT:#define __wasm 1
+// WEBASSEMBLY32-NEXT:#define __wasm32 1
 // WEBASSEMBLY64-NOT:#define __wasm32
+// WEBASSEMBLY32-NEXT:#define __wasm32__ 1
 // WEBASSEMBLY64-NOT:#define __wasm32__
+// WEBASSEMBLY32-NOT:#define __wasm64__
+// WEBASSEMBLY32-NOT:#define __wasm64
 // WEBASSEMBLY64-NEXT:#define __wasm64 1
 // WEBASSEMBLY64-NEXT:#define __wasm64__ 1
-// WEBASSEMBLY64-NEXT:#define __wasm__ 1
+// WEBASSEMBLY-NEXT:#define __wasm__ 1
 
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple i686-windows-cygnus < /dev/null | FileCheck -match-full-lines -check-prefix CYGWIN-X32 %s
 // CYGWIN-X32: #define __USER_LABEL_PREFIX__ _
diff --git a/clang/test/Sema/Float16.c b/clang/test/Sema/Float16.c
new file mode 100644
index 00000000000000..bdfb01702c3715
--- /dev/null
+++ b/clang/test/Sema/Float16.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+
+#ifdef HAVE
+// expected-no-diagnostics
+#else
+// expected-error@+2{{_Float16 is not supported on this target}}
+#endif // HAVE
+_Float16 f;
diff --git a/clang/test/Sema/attr-msp430.c b/clang/test/Sema/attr-msp430.c
index 26b2d8fcfd6533..4b38d09b86757c 100644
--- a/clang/test/Sema/attr-msp430.c
+++ b/clang/test/Sema/attr-msp430.c
@@ -1,6 +1,13 @@
 // RUN: %clang_cc1 -triple msp430-unknown-unknown -fsyntax-only -verify %s
 
+__attribute__((interrupt(1))) int t; // expected-warning {{'interrupt' attribute only applies to functions}}
+
 int i;
-void f(void) __attribute__((interrupt(i))); /* expected-error {{'interrupt' attribute requires an integer constant}} */
+__attribute__((interrupt(i))) void f(void); // expected-error {{'interrupt' attribute requires an integer constant}}
+__attribute__((interrupt(1, 2))) void f2(void); // expected-error {{'interrupt' attribute takes one argument}}
+__attribute__((interrupt(1))) int f3(void); // expected-warning {{MSP430 'interrupt' attribute only applies to functions that have a 'void' return type}}
+__attribute__((interrupt(1))) void f4(int a); // expected-warning {{MSP430 'interrupt' attribute only applies to functions that have no parameters}}
+__attribute__((interrupt(64))) void f5(void); // expected-error {{'interrupt' attribute parameter 64 is out of bounds}}
 
-void f2(void) __attribute__((interrupt(12)));
+__attribute__((interrupt(0))) void f6(void);
+__attribute__((interrupt(63))) void f7(void);
diff --git a/clang/test/SemaCXX/Float16.cpp b/clang/test/SemaCXX/Float16.cpp
new file mode 100644
index 00000000000000..f27c3839854e10
--- /dev/null
+++ b/clang/test/SemaCXX/Float16.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+
+#ifdef HAVE
+// expected-no-diagnostics
+#endif // HAVE
+
+#ifndef HAVE
+// expected-error@+2{{_Float16 is not supported on this target}}
+#endif // !HAVE
+_Float16 f;
+
+#ifndef HAVE
+// expected-error@+2{{invalid suffix 'F16' on floating constant}}
+#endif // !HAVE
+const auto g = 1.1F16;
diff --git a/clang/test/SemaCXX/PR40395.cpp b/clang/test/SemaCXX/PR40395.cpp
new file mode 100644
index 00000000000000..469c86d56209ca
--- /dev/null
+++ b/clang/test/SemaCXX/PR40395.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -std=c++17 -fms-extensions -triple=x86_64-pc-win32 -verify %s
+// expected-no-diagnostics
+
+// PR40395 - ConstantExpr shouldn't cause the template object to infinitely
+// expand.
+struct _GUID {};
+struct __declspec(uuid("{AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA}")) B {};
+
+template <const _GUID* piid>
+struct A {
+  virtual void baz() { A<piid>(); }
+};
+
+void f() {
+  A<&__uuidof(B)>();
+}
diff --git a/clang/test/SemaCXX/cxx1z-decomposition.cpp b/clang/test/SemaCXX/cxx1z-decomposition.cpp
index 3c9b181f1c8f0e..8b5fd6809bb4ce 100644
--- a/clang/test/SemaCXX/cxx1z-decomposition.cpp
+++ b/clang/test/SemaCXX/cxx1z-decomposition.cpp
@@ -81,4 +81,21 @@ struct PR37352 {
   void f() { static auto [a] = *this; } // expected-error {{cannot be declared 'static'}}
 };
 
+namespace instantiate_template {
+
+template <typename T1, typename T2>
+struct pair {
+  T1 a;
+  T2 b;
+};
+
+const pair<int, int> &f1();
+
+int f2() {
+  const auto &[a, b] = f1();
+  return a + b;
+}
+
+} // namespace instantiate_template
+
 // FIXME: by-value array copies
diff --git a/clang/test/SemaObjC/call-unavailable-init-in-self.m b/clang/test/SemaObjC/call-unavailable-init-in-self.m
index fa6f670cc997f4..48fc2326af457e 100644
--- a/clang/test/SemaObjC/call-unavailable-init-in-self.m
+++ b/clang/test/SemaObjC/call-unavailable-init-in-self.m
@@ -5,13 +5,24 @@ @interface NSObject
 + (instancetype)new;
 + (instancetype)alloc;
 
+- (void)declaredInSuper;
+
+@end
+
+@interface NSObject (Category)
+
+- (void)declaredInSuperCategory;
+
 @end
 
 @interface Sub: NSObject
 
 - (instancetype)init __attribute__((unavailable)); // expected-note 4 {{'init' has been explicitly marked unavailable here}}
 
-- (void)notImplemented __attribute__((unavailable)); // expected-note {{'notImplemented' has been explicitly marked unavailable here}}
+- (void)notImplemented __attribute__((unavailable));
+
+- (void)declaredInSuper __attribute__((unavailable));
+- (void)declaredInSuperCategory __attribute__((unavailable));
 
 @end
 
@@ -34,7 +45,14 @@ - (instancetype) init {
 }
 
 - (void)reportUseOfUnimplemented {
-  [self notImplemented]; // expected-error {{'notImplemented' is unavailable}}
+  [self notImplemented];
+}
+
+- (void)allowSuperCallUsingSelf {
+  [self declaredInSuper];
+  [[Sub alloc] declaredInSuper];
+  [self declaredInSuperCategory];
+  [[Sub alloc] declaredInSuperCategory];
 }
 
 @end
diff --git a/clang/test/SemaObjC/enum-fixed-type.m b/clang/test/SemaObjC/enum-fixed-type.m
index 88c895a33982d2..b4135a555a23ec 100644
--- a/clang/test/SemaObjC/enum-fixed-type.m
+++ b/clang/test/SemaObjC/enum-fixed-type.m
@@ -1,9 +1,11 @@
 // RUN: %clang_cc1 -fsyntax-only -pedantic -verify %s
 // RUN: %clang_cc1 -fsyntax-only -verify -xc %s
 
+#ifdef __OBJC__
 #if !__has_feature(objc_fixed_enum)
 #  error Enumerations with a fixed underlying type are not supported
 #endif
+#endif
 
 #if !__has_extension(cxx_fixed_enum)
 #  error Enumerations with a fixed underlying type are not supported
diff --git a/clang/test/SemaObjC/infer-availability-from-init.m b/clang/test/SemaObjC/infer-availability-from-init.m
index f9996ec70877eb..7aa1e53c09109e 100644
--- a/clang/test/SemaObjC/infer-availability-from-init.m
+++ b/clang/test/SemaObjC/infer-availability-from-init.m
@@ -47,12 +47,12 @@ void usenotmyobject() {
 }
 
 @interface FromSelf : NSObject
--(instancetype)init __attribute__((unavailable)); // expected-note {{'init' has been explicitly marked unavailable here}}
+-(instancetype)init __attribute__((unavailable));
 +(FromSelf*)another_one;
 @end
 
 @implementation FromSelf
 +(FromSelf*)another_one {
-  [self new]; // expected-error{{'new' is unavailable}}
+  [self new];
 }
 @end
diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl
index a587f1db99af20..d976cfb3a4354c 100644
--- a/clang/test/SemaOpenCL/extension-version.cl
+++ b/clang/test/SemaOpenCL/extension-version.cl
@@ -2,12 +2,14 @@
 // RUN: %clang_cc1 -x cl -cl-std=CL1.1 %s -verify -triple spir-unknown-unknown
 // RUN: %clang_cc1 -x cl -cl-std=CL1.2 %s -verify -triple spir-unknown-unknown
 // RUN: %clang_cc1 -x cl -cl-std=CL2.0 %s -verify -triple spir-unknown-unknown
+// RUN: %clang_cc1 -x cl -cl-std=c++ %s -verify -triple spir-unknown-unknown
 // RUN: %clang_cc1 -x cl -cl-std=CL %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
 // RUN: %clang_cc1 -x cl -cl-std=CL1.1 %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
 // RUN: %clang_cc1 -x cl -cl-std=CL1.2 %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
 // RUN: %clang_cc1 -x cl -cl-std=CL2.0 %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
+// RUN: %clang_cc1 -x cl -cl-std=c++ %s -verify -triple spir-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
 
-#if __OPENCL_C_VERSION__ >= 200 && ! defined TEST_CORE_FEATURES
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) && !defined(TEST_CORE_FEATURES)
 // expected-no-diagnostics
 #endif
 
@@ -47,44 +49,44 @@
 #ifndef cl_khr_byte_addressable_store
 #error "Missing cl_khr_byte_addressable_store define"
 #endif
-#pragma OPENCL EXTENSION cl_khr_byte_addressable_store: enable
-#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
+#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
 // expected-warning@-2{{OpenCL extension 'cl_khr_byte_addressable_store' is core feature or supported optional core feature - ignoring}}
 #endif
 
 #ifndef cl_khr_global_int32_base_atomics
 #error "Missing cl_khr_global_int32_base_atomics define"
 #endif
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics: enable
-#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
+#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
 // expected-warning@-2{{OpenCL extension 'cl_khr_global_int32_base_atomics' is core feature or supported optional core feature - ignoring}}
 #endif
 
 #ifndef cl_khr_global_int32_extended_atomics
 #error "Missing cl_khr_global_int32_extended_atomics define"
 #endif
-#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics: enable
-#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
+#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
 // expected-warning@-2{{OpenCL extension 'cl_khr_global_int32_extended_atomics' is core feature or supported optional core feature - ignoring}}
 #endif
 
 #ifndef cl_khr_local_int32_base_atomics
 #error "Missing cl_khr_local_int32_base_atomics define"
 #endif
-#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics: enable
-#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
+#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
 // expected-warning@-2{{OpenCL extension 'cl_khr_local_int32_base_atomics' is core feature or supported optional core feature - ignoring}}
 #endif
 
 #ifndef cl_khr_local_int32_extended_atomics
 #error "Missing cl_khr_local_int32_extended_atomics define"
 #endif
-#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics: enable
-#if (__OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
+#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110) && defined TEST_CORE_FEATURES
 // expected-warning@-2{{OpenCL extension 'cl_khr_local_int32_extended_atomics' is core feature or supported optional core feature - ignoring}}
 #endif
 
-#if (__OPENCL_C_VERSION__ < 110)
+#if (defined(__OPENCL_C_VERSION__) && __OPENCL_C_VERSION__ < 110)
 // Deprecated abvoe 1.0
 #ifndef cl_khr_select_fprounding_mode
 #error "Missing cl_khr_select_fp_rounding_mode define"
@@ -97,8 +99,8 @@
 #ifndef cl_khr_fp64
 #error "Missing cl_khr_fp64 define"
 #endif
-#pragma OPENCL EXTENSION cl_khr_fp64: enable
-#if (__OPENCL_C_VERSION__ >= 120) && defined TEST_CORE_FEATURES
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120) && defined TEST_CORE_FEATURES
 // expected-warning@-2{{OpenCL extension 'cl_khr_fp64' is core feature or supported optional core feature - ignoring}}
 #endif
 
@@ -106,131 +108,129 @@
 #ifndef cl_khr_3d_image_writes
 #error "Missing cl_khr_3d_image_writes define"
 #endif
-#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable
-#if (__OPENCL_C_VERSION__ >= 200) && defined TEST_CORE_FEATURES
+#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) && defined TEST_CORE_FEATURES
 // expected-warning@-2{{OpenCL extension 'cl_khr_3d_image_writes' is core feature or supported optional core feature - ignoring}}
 #endif
 
-
-
-#if (__OPENCL_C_VERSION__ >= 110)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110)
 #ifndef cl_khr_gl_event
 #error "Missing cl_khr_gl_event define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_gl_event' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_gl_event: enable
+#pragma OPENCL EXTENSION cl_khr_gl_event : enable
 
-#if (__OPENCL_C_VERSION__ >= 110)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110)
 #ifndef cl_khr_d3d10_sharing
 #error "Missing cl_khr_d3d10_sharing define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_d3d10_sharing' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_d3d10_sharing: enable
+#pragma OPENCL EXTENSION cl_khr_d3d10_sharing : enable
 
-#if (__OPENCL_C_VERSION__ >= 110)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 110)
 #ifndef cles_khr_int64
 #error "Missing cles_khr_int64 define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cles_khr_int64' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cles_khr_int64: enable
+#pragma OPENCL EXTENSION cles_khr_int64 : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_context_abort
 #error "Missing cl_context_abort define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_context_abort' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_context_abort: enable
+#pragma OPENCL EXTENSION cl_khr_context_abort : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_d3d11_sharing
 #error "Missing cl_khr_d3d11_sharing define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_d3d11_sharing' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_d3d11_sharing: enable
+#pragma OPENCL EXTENSION cl_khr_d3d11_sharing : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_dx9_media_sharing
 #error "Missing cl_khr_dx9_media_sharing define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_dx9_media_sharing' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_dx9_media_sharing: enable
+#pragma OPENCL EXTENSION cl_khr_dx9_media_sharing : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_image2d_from_buffer
 #error "Missing cl_khr_image2d_from_buffer define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_image2d_from_buffer' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_image2d_from_buffer: enable
+#pragma OPENCL EXTENSION cl_khr_image2d_from_buffer : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_initialize_memory
 #error "Missing cl_khr_initialize_memory define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_initialize_memory' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_initialize_memory: enable
+#pragma OPENCL EXTENSION cl_khr_initialize_memory : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_gl_depth_images
 #error "Missing cl_khr_gl_depth_images define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_gl_depth_images' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_gl_depth_images: enable
+#pragma OPENCL EXTENSION cl_khr_gl_depth_images : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_gl_msaa_sharing
 #error "Missing cl_khr_gl_msaa_sharing define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_gl_msaa_sharing' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing: enable
+#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_spir
 #error "Missing cl_khr_spir define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_spir' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_spir: enable
+#pragma OPENCL EXTENSION cl_khr_spir : enable
 
-#if (__OPENCL_C_VERSION__ >= 200)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
 #ifndef cl_khr_egl_event
 #error "Missing cl_khr_egl_event define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_egl_event' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_egl_event: enable
+#pragma OPENCL EXTENSION cl_khr_egl_event : enable
 
-#if (__OPENCL_C_VERSION__ >= 200)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
 #ifndef cl_khr_egl_image
 #error "Missing cl_khr_egl_image define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_egl_image' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_egl_image: enable
+#pragma OPENCL EXTENSION cl_khr_egl_image : enable
 
-#if (__OPENCL_C_VERSION__ >= 200)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
 #ifndef cl_khr_mipmap_image
 #error "Missing cl_khr_mipmap_image define"
 #endif
@@ -240,18 +240,18 @@
 #endif
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_mipmap_image' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable
+#pragma OPENCL EXTENSION cl_khr_mipmap_image : enable
 
-#if (__OPENCL_C_VERSION__ >= 200)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
 #ifndef cl_khr_srgb_image_writes
 #error "Missing cl_khr_srgb_image_writes define"
 #endif
 #else
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_srgb_image_writes' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_srgb_image_writes: enable
+#pragma OPENCL EXTENSION cl_khr_srgb_image_writes : enable
 
-#if (__OPENCL_C_VERSION__ >= 200)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
 #ifndef cl_khr_subgroups
 #error "Missing cl_khr_subgroups define"
 #endif
@@ -261,9 +261,9 @@
 #endif
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_subgroups: enable
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
 
-#if (__OPENCL_C_VERSION__ >= 200)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
 #ifndef cl_khr_terminate_context
 #error "Missing cl_khr_terminate_context define"
 #endif
@@ -280,9 +280,9 @@
 #ifndef cl_amd_media_ops2
 #error "Missing cl_amd_media_ops2 define"
 #endif
-#pragma OPENCL EXTENSION cl_amd_media_ops2: enable
+#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_khr_depth_images
 #error "Missing cl_khr_depth_images define"
 #endif
@@ -292,9 +292,9 @@
 #endif
 // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_depth_images' - ignoring}}
 #endif
-#pragma OPENCL EXTENSION cl_khr_depth_images: enable
+#pragma OPENCL EXTENSION cl_khr_depth_images : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_intel_subgroups
 #error "Missing cl_intel_subgroups define"
 #endif
@@ -303,7 +303,7 @@
 #endif
 #pragma OPENCL EXTENSION cl_intel_subgroups : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_intel_subgroups_short
 #error "Missing cl_intel_subgroups_short define"
 #endif
@@ -312,7 +312,7 @@
 #endif
 #pragma OPENCL EXTENSION cl_intel_subgroups_short : enable
 
-#if (__OPENCL_C_VERSION__ >= 120)
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 120)
 #ifndef cl_intel_device_side_avc_motion_estimation
 #error "Missing cl_intel_device_side_avc_motion_estimation define"
 #endif
diff --git a/clang/test/SemaOpenCL/extensions.cl b/clang/test/SemaOpenCL/extensions.cl
index 5f95e32d4a549b..e9dba69ecd7c94 100644
--- a/clang/test/SemaOpenCL/extensions.cl
+++ b/clang/test/SemaOpenCL/extensions.cl
@@ -28,6 +28,7 @@
 // enabled by default with -cl-std=CL2.0).
 //
 // RUN: %clang_cc1 %s -triple amdgcn-unknown-unknown -verify -pedantic -fsyntax-only -cl-std=CL2.0 -finclude-default-header
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -verify -pedantic -fsyntax-only -cl-std=c++
 
 #ifdef _OPENCL_H_
 // expected-no-diagnostics
@@ -37,7 +38,11 @@
 // expected-no-diagnostics
 #endif
 
-#if __OPENCL_C_VERSION__ < 120
+#ifdef __OPENCL_CPP_VERSION__
+// expected-no-diagnostics
+#endif
+
+#if (defined(__OPENCL_C_VERSION__) && __OPENCL_C_VERSION__ < 120)
 void f1(double da) { // expected-error {{type 'double' requires cl_khr_fp64 extension}}
   double d; // expected-error {{type 'double' requires cl_khr_fp64 extension}}
   (void) 1.0; // expected-warning {{double precision constant requires cl_khr_fp64}}
@@ -89,7 +94,7 @@ void f2(void) {
 // expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}}
 #endif
 
-#if __OPENCL_C_VERSION__ < 120
+#if (defined(__OPENCL_C_VERSION__) && __OPENCL_C_VERSION__ < 120)
 void f3(void) {
   double d; // expected-error {{type 'double' requires cl_khr_fp64 extension}}
 }
diff --git a/clang/test/SemaOpenCL/printf-format-string-warnings.cl b/clang/test/SemaOpenCL/printf-format-string-warnings.cl
index 2b9c5cc3f319f2..39b859402702f3 100644
--- a/clang/test/SemaOpenCL/printf-format-string-warnings.cl
+++ b/clang/test/SemaOpenCL/printf-format-string-warnings.cl
@@ -1,13 +1,14 @@
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -finclude-default-header
 
-// Make sure warnings are produced based on printf format strings.
+// FIXME: Make sure warnings are produced based on printf format strings.
 
+// expected-no-diagnostics
 
 kernel void format_string_warnings(__constant char* arg) {
 
-  printf("%d", arg); // expected-warning {{format specifies type 'int' but the argument has type '__constant char *'}}
+  printf("%d", arg);
 
-  printf("not enough arguments %d %d", 4); // expected-warning {{more '%' conversions than data arguments}}
+  printf("not enough arguments %d %d", 4);
 
-  printf("too many arguments", 4); // expected-warning {{data argument not used by format string}}
+  printf("too many arguments", 4);
 }
diff --git a/clang/test/SemaOpenCL/printf-format-strings.cl b/clang/test/SemaOpenCL/printf-format-strings.cl
index 079a83495685e1..212e1f8981cbef 100644
--- a/clang/test/SemaOpenCL/printf-format-strings.cl
+++ b/clang/test/SemaOpenCL/printf-format-strings.cl
@@ -13,10 +13,10 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)))
 kernel void format_v4f32(float4 arg)
 {
 #ifdef cl_khr_fp64
-    printf("%v4f\n", arg);
+    printf("%v4f\n", arg); // expected-warning{{format specifies type 'double __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}}
 
     // Precision modifier
-    printf("%.2v4f\n", arg);
+    printf("%.2v4f\n", arg); // expected-warning{{format specifies type 'double __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}}
 #else
     // FIXME: These should not warn, and the type should be expected to be float.
     printf("%v4f\n", arg);  // expected-warning {{double __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}}
diff --git a/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp b/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp
index fd0003e9b3828e..8c59a2f07f6984 100644
--- a/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp
+++ b/clang/test/Tooling/clang-check-mac-libcxx-fixed-compilation-db.cpp
@@ -11,6 +11,6 @@
 // RUN: cp $(which clang-check) %t/mock-libcxx/bin/
 // RUN: cp "%s" "%t/test.cpp"
 // RUN: %t/mock-libcxx/bin/clang-check -p "%t" "%t/test.cpp" -- -stdlib=libc++ -target x86_64-apple-darwin
-
+// REQUIRES: system-darwin
 #include <mock_vector>
 vector v;
diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp
index 746d9ad5e89bb1..8e98f44fa46d8e 100644
--- a/clang/unittests/Basic/FileManagerTest.cpp
+++ b/clang/unittests/Basic/FileManagerTest.cpp
@@ -222,33 +222,6 @@ TEST_F(FileManagerTest, getFileReturnsNULLForNonexistentFile) {
   EXPECT_EQ(nullptr, file);
 }
 
-// When calling getFile(OpenFile=false); getFile(OpenFile=true) the file is
-// opened for the second call.
-TEST_F(FileManagerTest, getFileDefersOpen) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem());
-  FS->addFile("/tmp/test", 0, llvm::MemoryBuffer::getMemBufferCopy("test"));
-  FS->addFile("/tmp/testv", 0, llvm::MemoryBuffer::getMemBufferCopy("testv"));
-  FileManager manager(options, FS);
-
-  const FileEntry *file = manager.getFile("/tmp/test", /*OpenFile=*/false);
-  ASSERT_TRUE(file != nullptr);
-  ASSERT_TRUE(file->isValid());
-  // "real path name" reveals whether the file was actually opened.
-  EXPECT_FALSE(file->isOpenForTests());
-
-  file = manager.getFile("/tmp/test", /*OpenFile=*/true);
-  ASSERT_TRUE(file != nullptr);
-  ASSERT_TRUE(file->isValid());
-  EXPECT_TRUE(file->isOpenForTests());
-
-  // However we should never try to open a file previously opened as virtual.
-  ASSERT_TRUE(manager.getVirtualFile("/tmp/testv", 5, 0));
-  ASSERT_TRUE(manager.getFile("/tmp/testv", /*OpenFile=*/false));
-  file = manager.getFile("/tmp/testv", /*OpenFile=*/true);
-  EXPECT_FALSE(file->isOpenForTests());
-}
-
 // The following tests apply to Unix-like system only.
 
 #ifndef _WIN32
diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp
index 186463f80af733..5813552a6cd323 100644
--- a/clang/unittests/Tooling/ToolingTest.cpp
+++ b/clang/unittests/Tooling/ToolingTest.cpp
@@ -450,6 +450,37 @@ TEST(ClangToolTest, StripDependencyFileAdjuster) {
   EXPECT_TRUE(HasFlag("-w"));
 }
 
+// Check getClangStripPluginsAdjuster strips plugin related args.
+TEST(ClangToolTest, StripPluginsAdjuster) {
+  FixedCompilationDatabase Compilations(
+      "/", {"-Xclang", "-add-plugin", "-Xclang", "random-plugin"});
+
+  ClangTool Tool(Compilations, std::vector<std::string>(1, "/a.cc"));
+  Tool.mapVirtualFile("/a.cc", "void a() {}");
+
+  std::unique_ptr<FrontendActionFactory> Action(
+      newFrontendActionFactory<SyntaxOnlyAction>());
+
+  CommandLineArguments FinalArgs;
+  ArgumentsAdjuster CheckFlagsAdjuster =
+      [&FinalArgs](const CommandLineArguments &Args, StringRef /*unused*/) {
+        FinalArgs = Args;
+        return Args;
+      };
+  Tool.clearArgumentsAdjusters();
+  Tool.appendArgumentsAdjuster(getStripPluginsAdjuster());
+  Tool.appendArgumentsAdjuster(CheckFlagsAdjuster);
+  Tool.run(Action.get());
+
+  auto HasFlag = [&FinalArgs](const std::string &Flag) {
+    return std::find(FinalArgs.begin(), FinalArgs.end(), Flag) !=
+           FinalArgs.end();
+  };
+  EXPECT_FALSE(HasFlag("-Xclang"));
+  EXPECT_FALSE(HasFlag("-add-plugin"));
+  EXPECT_FALSE(HasFlag("-random-plugin"));
+}
+
 namespace {
 /// Find a target name such that looking for it in TargetRegistry by that name
 /// returns the same target. We expect that there is at least one target
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index db5c4645dc0a58..5b94338771780e 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -118,6 +118,7 @@ check_library_exists(dl dlopen "" COMPILER_RT_HAS_LIBDL)
 check_library_exists(rt shm_open "" COMPILER_RT_HAS_LIBRT)
 check_library_exists(m pow "" COMPILER_RT_HAS_LIBM)
 check_library_exists(pthread pthread_create "" COMPILER_RT_HAS_LIBPTHREAD)
+check_library_exists(execinfo backtrace "" COMPILER_RT_HAS_LIBEXECINFO)
 
 # Look for terminfo library, used in unittests that depend on LLVMSupport.
 if(LLVM_ENABLE_TERMINFO)
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc b/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc
index 86cb440476069f..bddc26d20019d0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc
@@ -25,7 +25,7 @@ struct ioctl_desc {
   const char *name;
 };
 
-const unsigned ioctl_table_max = 1202;
+const unsigned ioctl_table_max = 1200;
 static ioctl_desc ioctl_table[ioctl_table_max];
 static unsigned ioctl_table_size = 0;
 
@@ -298,9 +298,6 @@ static void ioctl_table_fill() {
   _(IRFRAMETTY_GET_DEVICE, WRITE, sizeof(unsigned int));
   _(IRFRAMETTY_GET_DONGLE, WRITE, sizeof(unsigned int));
   _(IRFRAMETTY_SET_DONGLE, READ, sizeof(unsigned int));
-  /* Entries from file: dev/isa/satlinkio.h */
-  _(SATIORESET, NONE, 0);
-  _(SATIOGID, WRITE, struct_satlink_id_sz);
   /* Entries from file: dev/isa/isvio.h */
   _(ISV_CMD, READWRITE, struct_isv_cmd_sz);
   /* Entries from file: dev/isa/wtreg.h */
@@ -649,8 +646,8 @@ static void ioctl_table_fill() {
   _(SPKRTUNE, NONE, 0);
   _(SPKRGETVOL, WRITE, sizeof(unsigned int));
   _(SPKRSETVOL, READ, sizeof(unsigned int));
-  /* Entries from file: dev/nvmm/nvmm_ioctl.h */
 #if 0 /* WIP */
+  /* Entries from file: dev/nvmm/nvmm_ioctl.h */
   _(NVMM_IOC_CAPABILITY, WRITE, struct_nvmm_ioc_capability_sz);
   _(NVMM_IOC_MACHINE_CREATE, READWRITE, struct_nvmm_ioc_machine_create_sz);
   _(NVMM_IOC_MACHINE_DESTROY, READ, struct_nvmm_ioc_machine_destroy_sz);
@@ -659,7 +656,7 @@ static void ioctl_table_fill() {
   _(NVMM_IOC_VCPU_DESTROY, READ, struct_nvmm_ioc_vcpu_destroy_sz);
   _(NVMM_IOC_VCPU_SETSTATE, READ, struct_nvmm_ioc_vcpu_setstate_sz);
   _(NVMM_IOC_VCPU_GETSTATE, READ, struct_nvmm_ioc_vcpu_getstate_sz);
-  _(NVMM_IOC_VCPU_INJECT, READWRITE, struct_nvmm_ioc_vcpu_inject_sz);
+  _(NVMM_IOC_VCPU_INJECT, READ, struct_nvmm_ioc_vcpu_inject_sz);
   _(NVMM_IOC_VCPU_RUN, READWRITE, struct_nvmm_ioc_vcpu_run_sz);
   _(NVMM_IOC_GPA_MAP, READ, struct_nvmm_ioc_gpa_map_sz);
   _(NVMM_IOC_GPA_UNMAP, READ, struct_nvmm_ioc_gpa_unmap_sz);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
index b23b430d9e5bc4..c112e044b1d82c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
@@ -122,7 +122,6 @@
 #include <dev/ic/nvmeio.h>
 #include <dev/ir/irdaio.h>
 #include <dev/isa/isvio.h>
-#include <dev/isa/satlinkio.h>
 #include <dev/isa/wtreg.h>
 #include <dev/iscsi/iscsi_ioctl.h>
 #include <dev/nvmm/nvmm_ioctl.h>
@@ -639,7 +638,6 @@ unsigned struct_rf_recon_req_sz = sizeof(rf_recon_req);
 unsigned struct_rio_conf_sz = sizeof(rio_conf);
 unsigned struct_rio_interface_sz = sizeof(rio_interface);
 unsigned struct_rio_stats_sz = sizeof(rio_stats);
-unsigned struct_satlink_id_sz = sizeof(satlink_id);
 unsigned struct_scan_io_sz = sizeof(scan_io);
 unsigned struct_scbusaccel_args_sz = sizeof(scbusaccel_args);
 unsigned struct_scbusiodetach_args_sz = sizeof(scbusiodetach_args);
@@ -1105,9 +1103,6 @@ unsigned IOCTL_IRDA_GET_TURNAROUNDMASK = IRDA_GET_TURNAROUNDMASK;
 unsigned IOCTL_IRFRAMETTY_GET_DEVICE = IRFRAMETTY_GET_DEVICE;
 unsigned IOCTL_IRFRAMETTY_GET_DONGLE = IRFRAMETTY_GET_DONGLE;
 unsigned IOCTL_IRFRAMETTY_SET_DONGLE = IRFRAMETTY_SET_DONGLE;
-unsigned IOCTL_SATIORESET = SATIORESET;
-unsigned IOCTL_SATIOGID = SATIOGID;
-unsigned IOCTL_SATIOSBUFSIZE = SATIOSBUFSIZE;
 unsigned IOCTL_ISV_CMD = ISV_CMD;
 unsigned IOCTL_WTQICMD = WTQICMD;
 unsigned IOCTL_ISCSI_GET_VERSION = ISCSI_GET_VERSION;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
index 0c0c8a837b8b3d..594cfa6c0d47b9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
@@ -803,7 +803,6 @@ extern unsigned struct_rf_recon_req_sz;
 extern unsigned struct_rio_conf_sz;
 extern unsigned struct_rio_interface_sz;
 extern unsigned struct_rio_stats_sz;
-extern unsigned struct_satlink_id_sz;
 extern unsigned struct_scan_io_sz;
 extern unsigned struct_scbusaccel_args_sz;
 extern unsigned struct_scbusiodetach_args_sz;
@@ -1266,9 +1265,6 @@ extern unsigned IOCTL_IRDA_GET_TURNAROUNDMASK;
 extern unsigned IOCTL_IRFRAMETTY_GET_DEVICE;
 extern unsigned IOCTL_IRFRAMETTY_GET_DONGLE;
 extern unsigned IOCTL_IRFRAMETTY_SET_DONGLE;
-extern unsigned IOCTL_SATIORESET;
-extern unsigned IOCTL_SATIOGID;
-extern unsigned IOCTL_SATIOSBUFSIZE;
 extern unsigned IOCTL_ISV_CMD;
 extern unsigned IOCTL_WTQICMD;
 extern unsigned IOCTL_ISCSI_GET_VERSION;
diff --git a/compiler-rt/lib/xray/tests/CMakeLists.txt b/compiler-rt/lib/xray/tests/CMakeLists.txt
index 89a2b3b01ed8ae..deddc5101e76b2 100644
--- a/compiler-rt/lib/xray/tests/CMakeLists.txt
+++ b/compiler-rt/lib/xray/tests/CMakeLists.txt
@@ -71,13 +71,14 @@ if (NOT APPLE)
     endforeach()
 
     # We also add the actual libraries to link as dependencies.
-    list(APPEND XRAY_UNITTEST_LINK_FLAGS -lLLVMXRay -lLLVMSupport -lLLVMTestingSupport)
+    list(APPEND XRAY_UNITTEST_LINK_FLAGS -lLLVMXRay -lLLVMSupport -lLLVMDemangle -lLLVMTestingSupport)
   endif()
 
   append_list_if(COMPILER_RT_HAS_LIBM -lm XRAY_UNITTEST_LINK_FLAGS)
   append_list_if(COMPILER_RT_HAS_LIBRT -lrt XRAY_UNITTEST_LINK_FLAGS)
   append_list_if(COMPILER_RT_HAS_LIBDL -ldl XRAY_UNITTEST_LINK_FLAGS)
   append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread XRAY_UNITTEST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBEXECINFO -lexecinfo XRAY_UNITTEST_LINK_FLAGS)
 endif()
 
 macro(add_xray_unittest testname)
diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt
index 2e239d54e29c8b..7070fb789520c6 100644
--- a/compiler-rt/test/CMakeLists.txt
+++ b/compiler-rt/test/CMakeLists.txt
@@ -14,10 +14,6 @@ if(COMPILER_RT_BUILD_PROFILE AND COMPILER_RT_HAS_PROFILE)
   list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS profile)
 endif()
 
-if(COMPILER_RT_STANDALONE_BUILD)
-  list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS FileCheck)
-endif()
-
 # When ANDROID, we build tests with the host compiler (i.e. CMAKE_C_COMPILER),
 # and run tests with tools from the host toolchain.
 if(NOT ANDROID)
diff --git a/compiler-rt/utils/generate_netbsd_ioctls.awk b/compiler-rt/utils/generate_netbsd_ioctls.awk
index 82b1992143772e..38fe88fb0cde4f 100755
--- a/compiler-rt/utils/generate_netbsd_ioctls.awk
+++ b/compiler-rt/utils/generate_netbsd_ioctls.awk
@@ -152,7 +152,6 @@ FNR == 1 {
       $0 ~ /JOY_GET_X_OFFSET/ ||
       $0 ~ /CHIOGPICKER/ ||
       $0 ~ /SLIOCGUNIT/ ||
-      $0 ~ /SATIOSBUFSIZE/ ||
       $0 ~ /TUNSLMODE/ ||
       $0 ~ /CBQ_IF_ATTACH/ ||
       $0 ~ /CDNR_IF_ATTACH/ ||
diff --git a/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake b/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake
index 70eed1d70ba1b6..11c13315585bfc 100644
--- a/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake
+++ b/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake
@@ -116,7 +116,7 @@ macro(configure_out_of_tree_llvm)
     # Required LIT Configuration ------------------------------------------------
     # Define the default arguments to use with 'lit', and an option for the user
     # to override.
-    set(LLVM_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py")
+    set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py")
     set(LIT_ARGS_DEFAULT "-sv --show-xfail --show-unsupported")
     if (MSVC OR XCODE)
       set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp
index 4dd9390c4fdcfa..bd20e090637d0c 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp
@@ -17,9 +17,11 @@
 // None of the current GCC compilers support this.
 // UNSUPPORTED: gcc-5, gcc-6
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13.
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp
index d6194b00aa0255..6b4e1c1924b4a2 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp
@@ -15,9 +15,11 @@
 // FIXME change this to XFAIL.
 // UNSUPPORTED: no-aligned-allocation && !gcc
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13.
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp
index 59878aefd18a20..3188cc587dde92 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp
@@ -15,9 +15,11 @@
 // FIXME turn this into an XFAIL
 // UNSUPPORTED: no-aligned-allocation && !gcc
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13.
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp
index fc713dbf8ed833..29d8fd06a701e0 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp
@@ -10,9 +10,11 @@
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 // UNSUPPORTED: sanitizer-new-delete
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13.
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp
index 19cabcce1edd98..c01e39915ec013 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp
@@ -16,9 +16,11 @@
 // None of the current GCC compilers support this.
 // UNSUPPORTED: gcc-5, gcc-6
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp
index 7cf1aca3b9f8ea..8cb40885c466f3 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp
@@ -9,9 +9,11 @@
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13.
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp
index dd2666e00aad4e..9d7f13bee32889 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp
@@ -9,9 +9,11 @@
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp
index 514a2b8afc8c36..82367d7de093b1 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp
@@ -10,9 +10,11 @@
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 // UNSUPPORTED: sanitizer-new-delete
 
-// Aligned allocation was not provided before macosx10.12 and as a result we
-// get availability errors when the deployment target is older than macosx10.13.
-// However, AppleClang 10 (and older) don't trigger availability errors.
+// Aligned allocation was not provided before macosx10.14 and as a result we
+// get availability errors when the deployment target is older than macosx10.14.
+// However, AppleClang 10 (and older) don't trigger availability errors, and
+// Clang < 8.0 doesn't warn for 10.13
+// XFAIL: !(apple-clang-9 || apple-clang-10 || clang-7) && availability=macosx10.13
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.12
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.11
 // XFAIL: !(apple-clang-9 || apple-clang-10) && availability=macosx10.10
diff --git a/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake b/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake
index e50d0262f80418..0283a59ac1db2c 100644
--- a/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake
+++ b/libcxxabi/cmake/Modules/HandleOutOfTreeLLVM.cmake
@@ -117,7 +117,7 @@ macro(configure_out_of_tree_llvm)
     # Required LIT Configuration ------------------------------------------------
     # Define the default arguments to use with 'lit', and an option for the user
     # to override.
-    set(LLVM_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py")
+    set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py")
     set(LIT_ARGS_DEFAULT "-sv --show-xfail --show-unsupported")
     if (MSVC OR XCODE)
       set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index 30ad35995aece2..49bb360d794111 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -534,11 +534,11 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
 #endif
             cbdata->sects->dwarf_index_section = eh_frame_hdr_start;
             cbdata->sects->dwarf_index_section_length = phdr->p_memsz;
-            EHHeaderParser<LocalAddressSpace>::decodeEHHdr(
+            found_hdr = EHHeaderParser<LocalAddressSpace>::decodeEHHdr(
                 *cbdata->addressSpace, eh_frame_hdr_start, phdr->p_memsz,
                 hdrInfo);
-            cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr;
-            found_hdr = true;
+            if (found_hdr)
+              cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr;
           }
         }
 
diff --git a/libunwind/src/EHHeaderParser.hpp b/libunwind/src/EHHeaderParser.hpp
index 9bdaf5505ff04d..6b3e7dead86643 100644
--- a/libunwind/src/EHHeaderParser.hpp
+++ b/libunwind/src/EHHeaderParser.hpp
@@ -36,7 +36,7 @@ template <typename A> class EHHeaderParser {
     uint8_t table_enc;
   };
 
-  static void decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd,
+  static bool decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd,
                           EHHeaderInfo &ehHdrInfo);
   static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
                       uint32_t sectionLength,
@@ -53,12 +53,14 @@ template <typename A> class EHHeaderParser {
 };
 
 template <typename A>
-void EHHeaderParser<A>::decodeEHHdr(A &addressSpace, pint_t ehHdrStart,
+bool EHHeaderParser<A>::decodeEHHdr(A &addressSpace, pint_t ehHdrStart,
                                     pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) {
   pint_t p = ehHdrStart;
   uint8_t version = addressSpace.get8(p++);
-  if (version != 1)
-    _LIBUNWIND_ABORT("Unsupported .eh_frame_hdr version");
+  if (version != 1) {
+    _LIBUNWIND_LOG0("Unsupported .eh_frame_hdr version");
+    return false;
+  }
 
   uint8_t eh_frame_ptr_enc = addressSpace.get8(p++);
   uint8_t fde_count_enc = addressSpace.get8(p++);
@@ -71,6 +73,8 @@ void EHHeaderParser<A>::decodeEHHdr(A &addressSpace, pint_t ehHdrStart,
           ? 0
           : addressSpace.getEncodedP(p, ehHdrEnd, fde_count_enc, ehHdrStart);
   ehHdrInfo.table = p;
+
+  return true;
 }
 
 template <typename A>
@@ -102,7 +106,9 @@ bool EHHeaderParser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
   pint_t ehHdrEnd = ehHdrStart + sectionLength;
 
   EHHeaderParser<A>::EHHeaderInfo hdrInfo;
-  EHHeaderParser<A>::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd, hdrInfo);
+  if (!EHHeaderParser<A>::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd,
+                                      hdrInfo))
+    return false;
 
   size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc);
   pint_t tableEntry;
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 29131d7eb8db4b..2bb9aa01e53992 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -669,18 +669,38 @@ const uint8_t ArmThunk[] = {
     0xe7, 0x44,             // L1: add  pc, ip
 };
 
-size_t RangeExtensionThunk::getSize() const {
+size_t RangeExtensionThunkARM::getSize() const {
   assert(Config->Machine == ARMNT);
   return sizeof(ArmThunk);
 }
 
-void RangeExtensionThunk::writeTo(uint8_t *Buf) const {
+void RangeExtensionThunkARM::writeTo(uint8_t *Buf) const {
   assert(Config->Machine == ARMNT);
   uint64_t Offset = Target->getRVA() - RVA - 12;
   memcpy(Buf + OutputSectionOff, ArmThunk, sizeof(ArmThunk));
   applyMOV32T(Buf + OutputSectionOff, uint32_t(Offset));
 }
 
+// A position independent ARM64 adrp+add thunk, with a maximum range of
+// +/- 4 GB, which is enough for any PE-COFF.
+const uint8_t Arm64Thunk[] = {
+    0x10, 0x00, 0x00, 0x90, // adrp x16, Dest
+    0x10, 0x02, 0x00, 0x91, // add  x16, x16, :lo12:Dest
+    0x00, 0x02, 0x1f, 0xd6, // br   x16
+};
+
+size_t RangeExtensionThunkARM64::getSize() const {
+  assert(Config->Machine == ARM64);
+  return sizeof(Arm64Thunk);
+}
+
+void RangeExtensionThunkARM64::writeTo(uint8_t *Buf) const {
+  assert(Config->Machine == ARM64);
+  memcpy(Buf + OutputSectionOff, Arm64Thunk, sizeof(Arm64Thunk));
+  applyArm64Addr(Buf + OutputSectionOff + 0, Target->getRVA(), RVA, 12);
+  applyArm64Imm(Buf + OutputSectionOff + 4, Target->getRVA() & 0xfff, 0);
+}
+
 void LocalImportChunk::getBaserels(std::vector<Baserel> *Res) {
   Res->emplace_back(getRVA());
 }
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index f8a0ddd8ef3b2c..e132fdf8adfafa 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -355,9 +355,18 @@ class ImportThunkChunkARM64 : public Chunk {
   Defined *ImpSymbol;
 };
 
-class RangeExtensionThunk : public Chunk {
+class RangeExtensionThunkARM : public Chunk {
 public:
-  explicit RangeExtensionThunk(Defined *T) : Target(T) {}
+  explicit RangeExtensionThunkARM(Defined *T) : Target(T) {}
+  size_t getSize() const override;
+  void writeTo(uint8_t *Buf) const override;
+
+  Defined *Target;
+};
+
+class RangeExtensionThunkARM64 : public Chunk {
+public:
+  explicit RangeExtensionThunkARM64(Defined *T) : Target(T) {}
   size_t getSize() const override;
   void writeTo(uint8_t *Buf) const override;
 
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 599cc5892a16f5..c06027d3e5c340 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -47,6 +47,7 @@ class HintNameChunk : public Chunk {
   }
 
   void writeTo(uint8_t *Buf) const override {
+    memset(Buf + OutputSectionOff, 0, getSize());
     write16le(Buf + OutputSectionOff, Hint);
     memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size());
   }
@@ -63,7 +64,10 @@ class LookupChunk : public Chunk {
   size_t getSize() const override { return Config->Wordsize; }
 
   void writeTo(uint8_t *Buf) const override {
-    write32le(Buf + OutputSectionOff, HintName->getRVA());
+    if (Config->is64())
+      write64le(Buf + OutputSectionOff, HintName->getRVA());
+    else
+      write32le(Buf + OutputSectionOff, HintName->getRVA());
   }
 
   Chunk *HintName;
@@ -99,6 +103,8 @@ class ImportDirectoryChunk : public Chunk {
   size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); }
 
   void writeTo(uint8_t *Buf) const override {
+    memset(Buf + OutputSectionOff, 0, getSize());
+
     auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff);
     E->ImportLookupTableRVA = LookupTab->getRVA();
     E->NameRVA = DLLName->getRVA();
@@ -118,6 +124,10 @@ class NullChunk : public Chunk {
   bool hasData() const override { return false; }
   size_t getSize() const override { return Size; }
 
+  void writeTo(uint8_t *Buf) const override {
+    memset(Buf + OutputSectionOff, 0, Size);
+  }
+
 private:
   size_t Size;
 };
@@ -160,6 +170,8 @@ class DelayDirectoryChunk : public Chunk {
   }
 
   void writeTo(uint8_t *Buf) const override {
+    memset(Buf + OutputSectionOff, 0, getSize());
+
     auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff);
     E->Attributes = 1;
     E->Name = DLLName->getRVA();
@@ -392,6 +404,8 @@ class ExportDirectoryChunk : public Chunk {
   }
 
   void writeTo(uint8_t *Buf) const override {
+    memset(Buf + OutputSectionOff, 0, getSize());
+
     auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff);
     E->NameRVA = DLLName->getRVA();
     E->OrdinalBase = 0;
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 34ea360fa92584..f6904eb7d24f26 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -263,19 +263,21 @@ void ICF::run(ArrayRef<Chunk *> Vec) {
 
   // Initially, we use hash values to partition sections.
   parallelForEach(Chunks, [&](SectionChunk *SC) {
-    SC->Class[1] = xxHash64(SC->getContents());
+    SC->Class[0] = xxHash64(SC->getContents());
   });
 
   // Combine the hashes of the sections referenced by each section into its
   // hash.
-  parallelForEach(Chunks, [&](SectionChunk *SC) {
-    uint32_t Hash = SC->Class[1];
-    for (Symbol *B : SC->symbols())
-      if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
-        Hash ^= Sym->getChunk()->Class[1];
-    // Set MSB to 1 to avoid collisions with non-hash classs.
-    SC->Class[0] = Hash | (1U << 31);
-  });
+  for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
+    parallelForEach(Chunks, [&](SectionChunk *SC) {
+      uint32_t Hash = SC->Class[Cnt % 2];
+      for (Symbol *B : SC->symbols())
+        if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
+          Hash += Sym->getChunk()->Class[Cnt % 2];
+      // Set MSB to 1 to avoid collisions with non-hash classs.
+      SC->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
+    });
+  }
 
   // From now on, sections in Chunks are ordered so that sections in
   // the same group are consecutive in the vector.
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index 7862b6ce4cc5af..7757b89e2b36d5 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -288,18 +288,24 @@ static void pdbMakeAbsolute(SmallVectorImpl<char> &FileName) {
   // It's not absolute in any path syntax.  Relative paths necessarily refer to
   // the local file system, so we can make it native without ending up with a
   // nonsensical path.
-  sys::path::native(FileName);
   if (Config->PDBSourcePath.empty()) {
+    sys::path::native(FileName);
     sys::fs::make_absolute(FileName);
     return;
   }
-  // Only apply native and dot removal to the relative file path.  We want to
-  // leave the path the user specified untouched since we assume they specified
-  // it for a reason.
-  sys::path::remove_dots(FileName, /*remove_dot_dots=*/true);
 
+  // Try to guess whether /PDBSOURCEPATH is a unix path or a windows path.
+  // Since PDB's are more of a Windows thing, we make this conservative and only
+  // decide that it's a unix path if we're fairly certain.  Specifically, if
+  // it starts with a forward slash.
   SmallString<128> AbsoluteFileName = Config->PDBSourcePath;
-  sys::path::append(AbsoluteFileName, FileName);
+  sys::path::Style GuessedStyle = AbsoluteFileName.startswith("/")
+                                      ? sys::path::Style::posix
+                                      : sys::path::Style::windows;
+  sys::path::append(AbsoluteFileName, GuessedStyle, FileName);
+  sys::path::native(AbsoluteFileName, GuessedStyle);
+  sys::path::remove_dots(AbsoluteFileName, true, GuessedStyle);
+
   FileName = std::move(AbsoluteFileName);
 }
 
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 258796ea6057b1..6acfaf9a44545f 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -306,16 +306,31 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) {
 // Check whether the target address S is in range from a relocation
 // of type RelType at address P.
 static bool isInRange(uint16_t RelType, uint64_t S, uint64_t P, int Margin) {
-  assert(Config->Machine == ARMNT);
-  int64_t Diff = AbsoluteDifference(S, P + 4) + Margin;
-  switch (RelType) {
-  case IMAGE_REL_ARM_BRANCH20T:
-    return isInt<21>(Diff);
-  case IMAGE_REL_ARM_BRANCH24T:
-  case IMAGE_REL_ARM_BLX23T:
-    return isInt<25>(Diff);
-  default:
-    return true;
+  if (Config->Machine == ARMNT) {
+    int64_t Diff = AbsoluteDifference(S, P + 4) + Margin;
+    switch (RelType) {
+    case IMAGE_REL_ARM_BRANCH20T:
+      return isInt<21>(Diff);
+    case IMAGE_REL_ARM_BRANCH24T:
+    case IMAGE_REL_ARM_BLX23T:
+      return isInt<25>(Diff);
+    default:
+      return true;
+    }
+  } else if (Config->Machine == ARM64) {
+    int64_t Diff = AbsoluteDifference(S, P) + Margin;
+    switch (RelType) {
+    case IMAGE_REL_ARM64_BRANCH26:
+      return isInt<28>(Diff);
+    case IMAGE_REL_ARM64_BRANCH19:
+      return isInt<21>(Diff);
+    case IMAGE_REL_ARM64_BRANCH14:
+      return isInt<16>(Diff);
+    default:
+      return true;
+    }
+  } else {
+    llvm_unreachable("Unexpected architecture");
   }
 }
 
@@ -327,7 +342,17 @@ getThunk(DenseMap<uint64_t, Defined *> &LastThunks, Defined *Target, uint64_t P,
   Defined *&LastThunk = LastThunks[Target->getRVA()];
   if (LastThunk && isInRange(Type, LastThunk->getRVA(), P, Margin))
     return {LastThunk, false};
-  RangeExtensionThunk *C = make<RangeExtensionThunk>(Target);
+  Chunk *C;
+  switch (Config->Machine) {
+  case ARMNT:
+    C = make<RangeExtensionThunkARM>(Target);
+    break;
+  case ARM64:
+    C = make<RangeExtensionThunkARM64>(Target);
+    break;
+  default:
+    llvm_unreachable("Unexpected architecture");
+  }
   Defined *D = make<DefinedSynthetic>("", C);
   LastThunk = D;
   return {D, true};
@@ -344,14 +369,14 @@ getThunk(DenseMap<uint64_t, Defined *> &LastThunks, Defined *Target, uint64_t P,
 // After adding thunks, we verify that all relocations are in range (with
 // no extra margin requirements). If this failed, we restart (throwing away
 // the previously created thunks) and retry with a wider margin.
-static bool createThunks(std::vector<Chunk *> &Chunks, int Margin) {
+static bool createThunks(OutputSection *OS, int Margin) {
   bool AddressesChanged = false;
   DenseMap<uint64_t, Defined *> LastThunks;
   size_t ThunksSize = 0;
   // Recheck Chunks.size() each iteration, since we can insert more
   // elements into it.
-  for (size_t I = 0; I != Chunks.size(); ++I) {
-    SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Chunks[I]);
+  for (size_t I = 0; I != OS->Chunks.size(); ++I) {
+    SectionChunk *SC = dyn_cast_or_null<SectionChunk>(OS->Chunks[I]);
     if (!SC)
       continue;
     size_t ThunkInsertionSpot = I + 1;
@@ -388,7 +413,8 @@ static bool createThunks(std::vector<Chunk *> &Chunks, int Margin) {
         Chunk *ThunkChunk = Thunk->getChunk();
         ThunkChunk->setRVA(
             ThunkInsertionRVA); // Estimate of where it will be located.
-        Chunks.insert(Chunks.begin() + ThunkInsertionSpot, ThunkChunk);
+        ThunkChunk->setOutputSection(OS);
+        OS->Chunks.insert(OS->Chunks.begin() + ThunkInsertionSpot, ThunkChunk);
         ThunkInsertionSpot++;
         ThunksSize += ThunkChunk->getSize();
         ThunkInsertionRVA += ThunkChunk->getSize();
@@ -428,7 +454,7 @@ static bool verifyRanges(const std::vector<Chunk *> Chunks) {
 // Assign addresses and add thunks if necessary.
 void Writer::finalizeAddresses() {
   assignAddresses();
-  if (Config->Machine != ARMNT)
+  if (Config->Machine != ARMNT && Config->Machine != ARM64)
     return;
 
   size_t OrigNumChunks = 0;
@@ -477,7 +503,7 @@ void Writer::finalizeAddresses() {
     // to avoid things going out of range due to the added thunks.
     bool AddressesChanged = false;
     for (OutputSection *Sec : OutputSections)
-      AddressesChanged |= createThunks(Sec->Chunks, Margin);
+      AddressesChanged |= createThunks(Sec, Margin);
     // If the verification above thought we needed thunks, we should have
     // added some.
     assert(AddressesChanged);
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 06314155dcc92f..a000eeb079d9ef 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -264,15 +264,6 @@ void X86_64<ELFT>::relaxTlsIeToLe(uint8_t *Loc, RelType Type,
 template <class ELFT>
 void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
                                   uint64_t Val) const {
-  // Convert
-  //   leaq bar@tlsld(%rip), %rdi
-  //   callq __tls_get_addr@PLT
-  //   leaq bar@dtpoff(%rax), %rcx
-  // to
-  //   .word 0x6666
-  //   .byte 0x66
-  //   mov %fs:0,%rax
-  //   leaq bar@tpoff(%rax), %rcx
   if (Type == R_X86_64_DTPOFF64) {
     write64le(Loc, Val);
     return;
@@ -287,7 +278,37 @@ void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
       0x66,                                                 // .byte 0x66
       0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
   };
-  memcpy(Loc - 3, Inst, sizeof(Inst));
+
+  if (Loc[4] == 0xe8) {
+    // Convert
+    //   leaq bar@tlsld(%rip), %rdi           # 48 8d 3d <Loc>
+    //   callq __tls_get_addr@PLT             # e8 <disp32>
+    //   leaq bar@dtpoff(%rax), %rcx
+    // to
+    //   .word 0x6666
+    //   .byte 0x66
+    //   mov %fs:0,%rax
+    //   leaq bar@tpoff(%rax), %rcx
+    memcpy(Loc - 3, Inst, sizeof(Inst));
+    return;
+  }
+
+  if (Loc[4] == 0xff && Loc[5] == 0x15) {
+    // Convert
+    //   leaq  x@tlsld(%rip),%rdi               # 48 8d 3d <Loc>
+    //   call *__tls_get_addr@GOTPCREL(%rip)    # ff 15 <disp32>
+    // to
+    //   .long  0x66666666
+    //   movq   %fs:0,%rax
+    // See "Table 11.9: LD -> LE Code Transition (LP64)" in
+    // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
+    Loc[-3] = 0x66;
+    memcpy(Loc - 2, Inst, sizeof(Inst));
+    return;
+  }
+
+  error(getErrorLocation(Loc - 3) +
+        "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
 }
 
 template <class ELFT>
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 8fb760e592eb18..60555f188fed3e 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -159,6 +159,7 @@ struct Configuration {
   bool OFormatBinary;
   bool Omagic;
   bool OptRemarksWithHotness;
+  bool PicThunk;
   bool Pie;
   bool PrintGcSections;
   bool PrintIcfSections;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 13b6119e2dc91c..2e2036310fb21b 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1006,6 +1006,7 @@ static void setConfigs(opt::InputArgList &Args) {
   Config->Endianness = Config->IsLE ? endianness::little : endianness::big;
   Config->IsMips64EL = (K == ELF64LEKind && M == EM_MIPS);
   Config->Pic = Config->Pie || Config->Shared;
+  Config->PicThunk = Args.hasArg(OPT_pic_veneer, Config->Pic);
   Config->Wordsize = Config->Is64 ? 8 : 4;
 
   // ELF defines two different ways to store relocation addends as shown below:
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index e917ae76a689e0..d08ac73ded80e8 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -426,16 +426,17 @@ void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> Fn) {
 // Combine the hashes of the sections referenced by the given section into its
 // hash.
 template <class ELFT, class RelTy>
-static void combineRelocHashes(InputSection *IS, ArrayRef<RelTy> Rels) {
-  uint32_t Hash = IS->Class[1];
+static void combineRelocHashes(unsigned Cnt, InputSection *IS,
+                               ArrayRef<RelTy> Rels) {
+  uint32_t Hash = IS->Class[Cnt % 2];
   for (RelTy Rel : Rels) {
     Symbol &S = IS->template getFile<ELFT>()->getRelocTargetSym(Rel);
     if (auto *D = dyn_cast<Defined>(&S))
       if (auto *RelSec = dyn_cast_or_null<InputSection>(D->Section))
-        Hash ^= RelSec->Class[1];
+        Hash += RelSec->Class[Cnt % 2];
   }
   // Set MSB to 1 to avoid collisions with non-hash IDs.
-  IS->Class[0] = Hash | (1U << 31);
+  IS->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
 }
 
 static void print(const Twine &S) {
@@ -453,15 +454,17 @@ template <class ELFT> void ICF<ELFT>::run() {
 
   // Initially, we use hash values to partition sections.
   parallelForEach(Sections, [&](InputSection *S) {
-    S->Class[1] = xxHash64(S->data());
+    S->Class[0] = xxHash64(S->data());
   });
 
-  parallelForEach(Sections, [&](InputSection *S) {
-    if (S->AreRelocsRela)
-      combineRelocHashes<ELFT>(S, S->template relas<ELFT>());
-    else
-      combineRelocHashes<ELFT>(S, S->template rels<ELFT>());
-  });
+  for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
+    parallelForEach(Sections, [&](InputSection *S) {
+      if (S->AreRelocsRela)
+        combineRelocHashes<ELFT>(Cnt, S, S->template relas<ELFT>());
+      else
+        combineRelocHashes<ELFT>(Cnt, S, S->template rels<ELFT>());
+    });
+  }
 
   // From now on, sections in Sections vector are ordered so that sections
   // in the same equivalence class are consecutive in the vector.
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index e4d1dec7cbcbd6..bc7e61072e6422 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -320,17 +320,6 @@ StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
   return Signature;
 }
 
-template <class ELFT>
-ArrayRef<typename ObjFile<ELFT>::Elf_Word>
-ObjFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) {
-  const ELFFile<ELFT> &Obj = this->getObj();
-  ArrayRef<Elf_Word> Entries =
-      CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
-  if (Entries.empty() || Entries[0] != GRP_COMDAT)
-    fatal(toString(this) + ": unsupported SHT_GROUP format");
-  return Entries.slice(1);
-}
-
 template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
   // On a regular link we don't merge sections if -O0 (default is -O1). This
   // sometimes makes the linker significantly faster, although the output will
@@ -440,26 +429,34 @@ void ObjFile<ELFT>::initializeSections(
     case SHT_GROUP: {
       // De-duplicate section groups by their signatures.
       StringRef Signature = getShtGroupSignature(ObjSections, Sec);
-      bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
       this->Sections[I] = &InputSection::Discarded;
 
-      // We only support GRP_COMDAT type of group. Get the all entries of the
-      // section here to let getShtGroupEntries to check the type early for us.
-      ArrayRef<Elf_Word> Entries = getShtGroupEntries(Sec);
 
-      // If it is a new section group, we want to keep group members.
-      // Group leader sections, which contain indices of group members, are
-      // discarded because they are useless beyond this point. The only
-      // exception is the -r option because in order to produce re-linkable
-      // object files, we want to pass through basically everything.
+      ArrayRef<Elf_Word> Entries =
+          CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
+      if (Entries.empty())
+        fatal(toString(this) + ": empty SHT_GROUP");
+
+      // The first word of a SHT_GROUP section contains flags. Currently,
+      // the standard defines only "GRP_COMDAT" flag for the COMDAT group.
+      // An group with the empty flag doesn't define anything; such sections
+      // are just skipped.
+      if (Entries[0] == 0)
+        continue;
+
+      if (Entries[0] != GRP_COMDAT)
+        fatal(toString(this) + ": unsupported SHT_GROUP format");
+
+      bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
       if (IsNew) {
         if (Config->Relocatable)
           this->Sections[I] = createInputSection(Sec);
-        continue;
+	continue;
       }
 
+
       // Otherwise, discard group members.
-      for (uint32_t SecIndex : Entries) {
+      for (uint32_t SecIndex : Entries.slice(1)) {
         if (SecIndex >= Size)
           fatal(toString(this) +
                 ": invalid section index in group: " + Twine(SecIndex));
@@ -739,7 +736,8 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
   // sections. Drop those sections to avoid duplicate symbol errors.
   // FIXME: This is glibc PR20543, we should remove this hack once that has been
   // fixed for a while.
-  if (Name.startswith(".gnu.linkonce."))
+  if (Name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" ||
+      Name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx")
     return &InputSection::Discarded;
 
   // If we are creating a new .build-id section, strip existing .build-id
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 5094ddd804a5fe..d7cbbc67a36562 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -175,7 +175,6 @@ template <class ELFT> class ObjFile : public ELFFileBase<ELFT> {
 
   StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
                                  const Elf_Shdr &Sec);
-  ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec);
 
 public:
   static bool classof(const InputFile *F) { return F->kind() == Base::ObjKind; }
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index e43a21b923d331..bc203193661b44 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -255,6 +255,9 @@ defm use_android_relr_tags: B<"use-android-relr-tags",
     "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*",
     "Use SHT_RELR / DT_RELR* tags (default)">;
 
+def pic_veneer: F<"pic-veneer">,
+  HelpText<"Always generate position independent thunks (veneers)">;
+
 defm pie: B<"pie",
     "Create a position independent executable",
     "Do not create a position independent executable (default)">;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 812468896f0d01..9ffe8a9cc72e77 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -356,7 +356,7 @@ static bool needsGot(RelExpr Expr) {
 static bool isRelExpr(RelExpr Expr) {
   return isRelExprOneOf<R_PC, R_GOTREL, R_GOTREL_FROM_END, R_MIPS_GOTREL,
                         R_PPC_CALL, R_PPC_CALL_PLT, R_AARCH64_PAGE_PC,
-                        R_RELAX_GOT_PC>(Expr);
+                        R_AARCH64_PLT_PAGE_PC, R_RELAX_GOT_PC>(Expr);
 }
 
 // Returns true if a given relocation can be computed at link-time.
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index eee3f0e330cc3d..7cce94659c9e9d 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -94,7 +94,6 @@ class ScriptParser final : ScriptLexer {
   SortSectionPolicy readSortKind();
   SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
   SymbolAssignment *readAssignment(StringRef Tok);
-  std::tuple<ELFKind, uint16_t, bool> readBfdName();
   void readSort();
   Expr readAssert();
   Expr readConstant();
@@ -385,39 +384,24 @@ void ScriptParser::readOutputArch() {
     skip();
 }
 
-std::tuple<ELFKind, uint16_t, bool> ScriptParser::readBfdName() {
-  StringRef S = unquote(next());
-  if (S == "elf32-i386")
-    return std::make_tuple(ELF32LEKind, EM_386, false);
-  if (S == "elf32-iamcu")
-    return std::make_tuple(ELF32LEKind, EM_IAMCU, false);
-  if (S == "elf32-littlearm")
-    return std::make_tuple(ELF32LEKind, EM_ARM, false);
-  if (S == "elf32-x86-64")
-    return std::make_tuple(ELF32LEKind, EM_X86_64, false);
-  if (S == "elf64-littleaarch64")
-    return std::make_tuple(ELF64LEKind, EM_AARCH64, false);
-  if (S == "elf64-powerpc")
-    return std::make_tuple(ELF64BEKind, EM_PPC64, false);
-  if (S == "elf64-powerpcle")
-    return std::make_tuple(ELF64LEKind, EM_PPC64, false);
-  if (S == "elf64-x86-64")
-    return std::make_tuple(ELF64LEKind, EM_X86_64, false);
-  if (S == "elf32-tradbigmips")
-    return std::make_tuple(ELF32BEKind, EM_MIPS, false);
-  if (S == "elf32-ntradbigmips")
-    return std::make_tuple(ELF32BEKind, EM_MIPS, true);
-  if (S == "elf32-tradlittlemips")
-    return std::make_tuple(ELF32LEKind, EM_MIPS, false);
-  if (S == "elf32-ntradlittlemips")
-    return std::make_tuple(ELF32LEKind, EM_MIPS, true);
-  if (S == "elf64-tradbigmips")
-    return std::make_tuple(ELF64BEKind, EM_MIPS, false);
-  if (S == "elf64-tradlittlemips")
-    return std::make_tuple(ELF64LEKind, EM_MIPS, false);
-
-  setError("unknown output format name: " + S);
-  return std::make_tuple(ELFNoneKind, EM_NONE, false);
+static std::pair<ELFKind, uint16_t> parseBfdName(StringRef S) {
+  return StringSwitch<std::pair<ELFKind, uint16_t>>(S)
+      .Case("elf32-i386", {ELF32LEKind, EM_386})
+      .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU})
+      .Case("elf32-littlearm", {ELF32LEKind, EM_ARM})
+      .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64})
+      .Case("elf64-aarch64", {ELF64LEKind, EM_AARCH64})
+      .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64})
+      .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64})
+      .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64})
+      .Case("elf64-x86-64", {ELF64LEKind, EM_X86_64})
+      .Case("elf32-tradbigmips", {ELF32BEKind, EM_MIPS})
+      .Case("elf32-ntradbigmips", {ELF32BEKind, EM_MIPS})
+      .Case("elf32-tradlittlemips", {ELF32LEKind, EM_MIPS})
+      .Case("elf32-ntradlittlemips", {ELF32LEKind, EM_MIPS})
+      .Case("elf64-tradbigmips", {ELF64BEKind, EM_MIPS})
+      .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS})
+      .Default({ELFNoneKind, EM_NONE});
 }
 
 // Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(bfdname, big, little).
@@ -425,9 +409,16 @@ std::tuple<ELFKind, uint16_t, bool> ScriptParser::readBfdName() {
 void ScriptParser::readOutputFormat() {
   expect("(");
 
-  std::tuple<ELFKind, uint16_t, bool> BfdTuple = readBfdName();
-  if (Config->EKind == ELFNoneKind)
-    std::tie(Config->EKind, Config->EMachine, Config->MipsN32Abi) = BfdTuple;
+  StringRef Name = unquote(next());
+  StringRef S = Name;
+  if (S.consume_back("-freebsd"))
+    Config->OSABI = ELFOSABI_FREEBSD;
+
+  std::tie(Config->EKind, Config->EMachine) = parseBfdName(S);
+  if (Config->EMachine == EM_NONE)
+    setError("unknown output format name: " + Name);
+  if (S == "elf32-ntradlittlemips" || S == "elf32-ntradbigmips")
+    Config->MipsN32Abi = true;
 
   if (consume(")"))
     return;
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index f459c1b6b47920..b1a3f8bc70aec7 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1513,8 +1513,10 @@ void RelocationBaseSection::finalizeContents() {
   else
     getParent()->Link = 0;
 
-  if (In.RelaIplt == this || In.RelaPlt == this)
+  if (In.RelaPlt == this)
     getParent()->Info = In.GotPlt->getParent()->SectionIndex;
+  if (In.RelaIplt == this)
+    getParent()->Info = In.IgotPlt->getParent()->SectionIndex;
 }
 
 RelrBaseSection::RelrBaseSection()
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 95b57dc0db4260..7a31d36b0e9025 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -722,7 +722,7 @@ Thunk::~Thunk() = default;
 static Thunk *addThunkAArch64(RelType Type, Symbol &S) {
   if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
     fatal("unrecognized relocation type");
-  if (Config->Pic)
+  if (Config->PicThunk)
     return make<AArch64ADRPThunk>(S);
   return make<AArch64ABSLongThunk>(S);
 }
@@ -739,7 +739,7 @@ static Thunk *addThunkPreArmv7(RelType Reloc, Symbol &S) {
   case R_ARM_JUMP24:
   case R_ARM_CALL:
   case R_ARM_THM_CALL:
-    if (Config->Pic)
+    if (Config->PicThunk)
       return make<ARMV5PILongThunk>(S);
     return make<ARMV5ABSLongThunk>(S);
   }
@@ -794,13 +794,13 @@ static Thunk *addThunkArm(RelType Reloc, Symbol &S) {
   case R_ARM_PLT32:
   case R_ARM_JUMP24:
   case R_ARM_CALL:
-    if (Config->Pic)
+    if (Config->PicThunk)
       return make<ARMV7PILongThunk>(S);
     return make<ARMV7ABSLongThunk>(S);
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
   case R_ARM_THM_CALL:
-    if (Config->Pic)
+    if (Config->PicThunk)
       return make<ThumbV7PILongThunk>(S);
     return make<ThumbV7ABSLongThunk>(S);
   }
@@ -820,7 +820,7 @@ static Thunk *addThunkPPC64(RelType Type, Symbol &S) {
   if (S.isInPlt())
     return make<PPC64PltCallStub>(S);
 
-  if (Config->Pic)
+  if (Config->PicThunk)
     return make<PPC64PILongBranchThunk>(S);
 
   return make<PPC64PDLongBranchThunk>(S);
diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td
index 948faa6875211e..0cda2447e5223b 100644
--- a/lld/MinGW/Options.td
+++ b/lld/MinGW/Options.td
@@ -78,3 +78,9 @@ def version: F<"version">, HelpText<"Display the version number and exit">;
 def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>;
 def alias_strip_s: Flag<["-"], "s">, Alias<strip_all>;
 def alias_strip_S: Flag<["-"], "S">, Alias<strip_debug>;
+
+// Ignored options
+def: S<"plugin">;
+def: J<"plugin=">;
+def: S<"plugin-opt">;
+def: J<"plugin-opt=">;
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index dc5df6795d9936..0bebfb3fb1cec8 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -13,10 +13,12 @@ lld 8.0.0 Release Notes
 Introduction
 ============
 
-This document contains the release notes for the lld linker, release 8.0.0.
-Here we describe the status of lld, including major improvements
-from the previous release. All lld releases may be downloaded
-from the `LLVM releases web site <https://llvm.org/releases/>`_.
+lld is a high-performance linker that supports ELF (Unix), COFF (Windows),
+Mach-O (macOS), MinGW and WebAssembly. lld is command-line-compatible with
+GNU linkers and Microsoft link.exe and is significantly faster than the
+system default linkers.
+
+nlld 8.0.0 has lots of feature improvements and bug fixes.
 
 Non-comprehensive list of changes in this release
 =================================================
@@ -33,12 +35,35 @@ ELF Improvements
   non-superpages to a superpage if they are aligned to the superpage
   size. (`r342746 <https://reviews.llvm.org/rL342746>`_)
 
+* lld now attempts to place a ``.note`` segment in the first page of a
+  generated file, so that you can find some important information
+  (``.note.gnu.build-id`` in particular) in a core file even if a core
+  file is truncated by ulimit.
+  (`r349524 <https://reviews.llvm.org/rL349524>`_)
+
+* lld now reports an error if ``_GLOBAL_OFFSET_TABLE_`` symbol is
+  defined by an input object file, as the symbol is supposed to be
+  synthesized by the linker.
+  (`r347854 <https://reviews.llvm.org/rL347854>`_)
+
 * lld/Hexagon can now link Linux kernel and musl libc for Qualcomm
   Hexagon ISA.
 
 * Initial MSP430 ISA support has landed.
 
-* The following flags have been added: ``-z interpose``, ``-z global``
+* lld now uses the ``sigrie`` instruction as a trap instruction for
+  MIPS targets.
+
+* lld now creates a TLS segment for AArch64 with a slightly larger
+  alignment requirement, so that the loader makes a few bytes room
+  before each TLS segment at runtime. The aim of this change is to
+  make room to accomodate nonstandard Android TLS slots while keeping
+  the compatibility with the standard AArch64 ABI.
+  (`r350681 <https://reviews.llvm.org/rL350681>`_)
+
+* The following flags have been added: ``--call-graph-profile``,
+  ``--no-call-graph-profile``, ``--warn-ifunc-textrel``,
+  ``-z interpose``, ``-z global``, ``-z nodefaultlib``
 
 COFF Improvements
 -----------------
@@ -46,11 +71,30 @@ COFF Improvements
 * PDB GUID is set to hash of PDB contents instead to a random byte
   sequence for build reproducibility.
 
+* ``/pdbsourcepath:`` is now also used to make ``"cwd"``, ``"exe"``, ``"pdb"``
+  in the env block of PDB outputs absolute if they are relative, and to make
+  paths to obj files referenced in PDB outputs absolute if they are relative.
+  Together with the previous item, this makes it possible to generate
+  executables and PDBs that are fully deterministic and independent of the
+  absolute path to the build directory, so that different machines building
+  the same code in different directories can produce exactly the same output.
+
 * The following flags have been added: ``/force:multiple``
 
 * lld now can link against import libraries produced by GNU tools.
 
-* lld can create thunks for ARM, to allow linking images over 16 MB.
+* lld can create thunks for ARM and ARM64, to allow linking larger images
+  (over 16 MB for ARM and over 128 MB for ARM64)
+
+* Several speed and memory usage improvements.
+
+* lld now creates debug info for typedefs.
+
+* lld can now link obj files produced by ``cl.exe /Z7 /Yc``.
+
+* lld now understands ``%_PDB%`` and ``%_EXT%`` in ``/pdbaltpath:``.
+
+* Undefined symbols are now printed in demangled form in addition to raw form.
 
 MinGW Improvements
 ------------------
@@ -66,10 +110,12 @@ MinGW Improvements
   linked in a different order than with GNU ld, inserting a DWARF exception
   table terminator too early.)
 
-MachO Improvements
-------------------
+* lld now supports COFF embedded directives for linking to nondefault
+  libraries, just like for the normal COFF target.
 
-* Item 1.
+* Actually generate a codeview build id signature, even if not creating a PDB.
+  Previously, the ``--build-id`` option did not actually generate a build id
+  unless ``--pdb`` was specified.
 
 WebAssembly Improvements
 ------------------------
diff --git a/lld/docs/index.rst b/lld/docs/index.rst
index da1c894f3d8339..2564e9b6310fd7 100644
--- a/lld/docs/index.rst
+++ b/lld/docs/index.rst
@@ -173,4 +173,5 @@ document soon.
    AtomLLD
    WebAssembly
    windows_support
+   missingkeyfunction
    ReleaseNotes
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index d1ce4a3517f4d4..889d5feabe4d1e 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -311,6 +311,8 @@ Write optimization remarks in YAML format to
 .Ar file .
 .It Fl -opt-remarks-with-hotness
 Include hotness information in the optimization remarks file.
+.It Fl -pic-veneer
+Always generate position independent thunks.
 .It Fl -pie
 Create a position independent executable.
 .It Fl -print-gc-sections
diff --git a/lld/docs/missingkeyfunction.rst b/lld/docs/missingkeyfunction.rst
index 410c749c3b036d..54ad3251f794e7 100644
--- a/lld/docs/missingkeyfunction.rst
+++ b/lld/docs/missingkeyfunction.rst
@@ -1,5 +1,5 @@
-Missing Key Method
-==================
+Missing Key Function
+====================
 
 If your build failed with a linker error something like this::
 
diff --git a/lld/test/COFF/arm-thumb-thunks-pdb.s b/lld/test/COFF/arm-thumb-thunks-pdb.s
new file mode 100644
index 00000000000000..9e972a78d37fff
--- /dev/null
+++ b/lld/test/COFF/arm-thumb-thunks-pdb.s
@@ -0,0 +1,18 @@
+// REQUIRES: arm
+// RUN: llvm-mc -filetype=obj -triple=thumbv7-windows %s -o %t.obj
+// RUN: lld-link -entry:main -subsystem:console %t.obj -out:%t.exe -debug -pdb:%t.pdb -verbose 2>&1 | FileCheck %s --check-prefix=VERBOSE
+
+// VERBOSE: Added 1 thunks with margin {{.*}} in {{.*}} passes
+
+    .syntax unified
+    .globl main
+    .globl func1
+    .text
+main:
+    bne func1
+    bx lr
+    .section .text$a, "xr"
+    .space 0x100000
+    .section .text$b, "xr"
+func1:
+    bx lr
diff --git a/lld/test/COFF/arm64-branch-range.test b/lld/test/COFF/arm64-branch-range.test
deleted file mode 100644
index 0b581e9c464dc8..00000000000000
--- a/lld/test/COFF/arm64-branch-range.test
+++ /dev/null
@@ -1,16 +0,0 @@
-// REQUIRES: aarch64
-
-// RUN: echo -e '.globl _start\n _start:\n bl too_far26\n' > %t.main26.s
-// RUN: echo -e '.globl _start\n _start:\n b.ne too_far19\n' > %t.main19.s
-// RUN: echo -e '.globl _start\n _start:\n tbz x0, #0, too_far14\n' > %t.main14.s
-
-// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.main26.s -o %t.main26.obj
-// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.main19.s -o %t.main19.obj
-// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.main14.s -o %t.main14.obj
-// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/far-arm64-abs.s -o %t.far.obj
-
-// RUN: not lld-link -base:0x10000 -entry:_start -subsystem:console %t.main26.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s
-// RUN: not lld-link -base:0x10000 -entry:_start -subsystem:console %t.main19.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s
-// RUN: not lld-link -base:0x10000 -entry:_start -subsystem:console %t.main14.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s
-
-// CHECK: relocation out of range
diff --git a/lld/test/COFF/arm64-thunks.s b/lld/test/COFF/arm64-thunks.s
new file mode 100644
index 00000000000000..49004544c43893
--- /dev/null
+++ b/lld/test/COFF/arm64-thunks.s
@@ -0,0 +1,27 @@
+// REQUIRES: aarch64
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %s -o %t.obj
+// RUN: lld-link -entry:main -subsystem:console %t.obj -out:%t.exe -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
+// RUN: llvm-objdump -d %t.exe | FileCheck -check-prefix=DISASM %s
+
+// VERBOSE: Added 1 thunks with margin {{.*}} in 1 passes
+
+    .globl main
+    .globl func1
+    .text
+main:
+    tbz w0, #0, func1
+    ret
+    .section .text$a, "xr"
+    .space 0x8000
+    .section .text$b, "xr"
+func1:
+    ret
+
+// DISASM: 0000000140001000 .text:
+// DISASM: 140001000:      40 00 00 36     tbz     w0, #0, #8 <.text+0x8>
+// DISASM: 140001004:      c0 03 5f d6     ret
+// DISASM: 140001008:      50 00 00 90     adrp    x16, #32768
+// DISASM: 14000100c:      10 52 00 91     add     x16, x16, #20
+// DISASM: 140001010:      00 02 1f d6     br      x16
+
+// DISASM: 140009014:      c0 03 5f d6     ret
diff --git a/lld/test/COFF/imports.test b/lld/test/COFF/imports.test
index 64f3900a1c2f3e..f54bdfd88dfab1 100644
--- a/lld/test/COFF/imports.test
+++ b/lld/test/COFF/imports.test
@@ -34,3 +34,16 @@ IMPORT-NEXT:   Symbol: ExitProcess (0)
 IMPORT-NEXT:   Symbol:  (50)
 IMPORT-NEXT:   Symbol: MessageBoxA (1)
 IMPORT-NEXT: }
+
+# RUN: lld-link /out:%t.exe /entry:main /subsystem:console /merge:.rdata=.text \
+# RUN:   %p/Inputs/hello64.obj %p/Inputs/std64.lib /include:ExitProcess
+# RUN: llvm-readobj -coff-imports %t.exe | FileCheck -check-prefix=MERGE %s
+
+MERGE:      Import {
+MERGE-NEXT:   Name: std64.dll
+MERGE-NEXT:   ImportLookupTableRVA: 0x1090
+MERGE-NEXT:   ImportAddressTableRVA: 0x10B0
+MERGE-NEXT:   Symbol: ExitProcess (0)
+MERGE-NEXT:   Symbol:  (50)
+MERGE-NEXT:   Symbol: MessageBoxA (1)
+MERGE-NEXT: }
diff --git a/lld/test/COFF/pdb-relative-source-lines.test b/lld/test/COFF/pdb-relative-source-lines.test
index 865d7a6d8a0a4f..547056785962ad 100644
--- a/lld/test/COFF/pdb-relative-source-lines.test
+++ b/lld/test/COFF/pdb-relative-source-lines.test
@@ -37,26 +37,26 @@ RUN: llvm-pdbutil pdb2yaml -modules -module-files -module-syms -subsections=line
 RUN: ./lld-link -debug "-pdbsourcepath:/usr/src" -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_lines_1_relative.obj pdb_lines_2_relative.obj
 RUN: llvm-pdbutil pdb2yaml -modules -module-files -module-syms -subsections=lines,fc %t/out.pdb | FileCheck --check-prefix=POSIX %s
 
-CHECK-LABEL:  - Module:          'c:\src{{[\\/]}}pdb_lines_1_relative.obj'
-CHECK-NEXT:     ObjFile:         'c:\src{{[\\/]}}pdb_lines_1_relative.obj'
+CHECK-LABEL:  - Module:          'c:\src\pdb_lines_1_relative.obj'
+CHECK-NEXT:     ObjFile:         'c:\src\pdb_lines_1_relative.obj'
 CHECK:          SourceFiles:
-CHECK-NEXT:       - 'c:\src{{[\\/]}}pdb_lines_1.c'
-CHECK-NEXT:       - 'c:\src{{[\\/]}}foo.h'
+CHECK-NEXT:       - 'c:\src\pdb_lines_1.c'
+CHECK-NEXT:       - 'c:\src\foo.h'
 CHECK:          Subsections:
-CHECK:                - FileName:        'c:\src{{[\\/]}}pdb_lines_1.c'
-CHECK:                - FileName:        'c:\src{{[\\/]}}foo.h'
+CHECK:                - FileName:        'c:\src\pdb_lines_1.c'
+CHECK:                - FileName:        'c:\src\foo.h'
 CHECK:            - !FileChecksums
-CHECK:                - FileName:        'c:\src{{[\\/]}}pdb_lines_1.c'
-CHECK:                - FileName:        'c:\src{{[\\/]}}foo.h'
+CHECK:                - FileName:        'c:\src\pdb_lines_1.c'
+CHECK:                - FileName:        'c:\src\foo.h'
 
-CHECK-LABEL:  - Module:          'c:\src{{[\\/]}}pdb_lines_2_relative.obj'
-CHECK-NEXT:     ObjFile:         'c:\src{{[\\/]}}pdb_lines_2_relative.obj'
+CHECK-LABEL:  - Module:          'c:\src\pdb_lines_2_relative.obj'
+CHECK-NEXT:     ObjFile:         'c:\src\pdb_lines_2_relative.obj'
 CHECK:          SourceFiles:
-CHECK-NEXT:       - 'c:\src{{[\\/]}}pdb_lines_2.c'
+CHECK-NEXT:       - 'c:\src\pdb_lines_2.c'
 CHECK:          Subsections:
-CHECK:                - FileName:        'c:\src{{[\\/]}}pdb_lines_2.c'
+CHECK:                - FileName:        'c:\src\pdb_lines_2.c'
 CHECK:            - !FileChecksums
-CHECK:                - FileName:        'c:\src{{[\\/]}}pdb_lines_2.c'
+CHECK:                - FileName:        'c:\src\pdb_lines_2.c'
 
 CHECK-LABEL:  - Kind:            S_ENVBLOCK
 CHECK-NEXT:     EnvBlockSym:     
@@ -64,33 +64,33 @@ CHECK-NEXT:       Entries:
 CHECK-NEXT:         - cwd
 CHECK-NEXT:         - 'c:\src'
 CHECK-NEXT:         - exe
-CHECK-NEXT:         - 'c:\src{{[\\/]}}lld-link'
+CHECK-NEXT:         - 'c:\src\lld-link'
 CHECK-NEXT:         - pdb 
-CHECK-NEXT:         - 'c:\src{{[\\/]}}out.pdb'
+CHECK-NEXT:         - 'c:\src\out.pdb'
 CHECK-NEXT:         - cmd
 CHECK-NEXT:         - '-debug -pdbsourcepath:c:\src -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_lines_1_relative.obj pdb_lines_2_relative.obj'
 
 
-POSIX-LABEL:  - Module:          '/usr/src{{[\\/]}}pdb_lines_1_relative.obj'
-POSIX-NEXT:     ObjFile:         '/usr/src{{[\\/]}}pdb_lines_1_relative.obj'
+POSIX-LABEL:  - Module:          '/usr/src/pdb_lines_1_relative.obj'
+POSIX-NEXT:     ObjFile:         '/usr/src/pdb_lines_1_relative.obj'
 POSIX:          SourceFiles:
-POSIX-NEXT:       - '/usr/src{{[\\/]}}pdb_lines_1.c'
-POSIX-NEXT:       - '/usr/src{{[\\/]}}foo.h'
+POSIX-NEXT:       - '/usr/src/pdb_lines_1.c'
+POSIX-NEXT:       - '/usr/src/foo.h'
 POSIX:          Subsections:
-POSIX:                - FileName:        '/usr/src{{[\\/]}}pdb_lines_1.c'
-POSIX:                - FileName:        '/usr/src{{[\\/]}}foo.h'
+POSIX:                - FileName:        '/usr/src/pdb_lines_1.c'
+POSIX:                - FileName:        '/usr/src/foo.h'
 POSIX:            - !FileChecksums
-POSIX:                - FileName:        '/usr/src{{[\\/]}}pdb_lines_1.c'
-POSIX:                - FileName:        '/usr/src{{[\\/]}}foo.h'
+POSIX:                - FileName:        '/usr/src/pdb_lines_1.c'
+POSIX:                - FileName:        '/usr/src/foo.h'
 
-POSIX-LABEL:  - Module:          '/usr/src{{[\\/]}}pdb_lines_2_relative.obj'
-POSIX-NEXT:     ObjFile:         '/usr/src{{[\\/]}}pdb_lines_2_relative.obj'
+POSIX-LABEL:  - Module:          '/usr/src/pdb_lines_2_relative.obj'
+POSIX-NEXT:     ObjFile:         '/usr/src/pdb_lines_2_relative.obj'
 POSIX:          SourceFiles:
-POSIX-NEXT:       - '/usr/src{{[\\/]}}pdb_lines_2.c'
+POSIX-NEXT:       - '/usr/src/pdb_lines_2.c'
 POSIX:          Subsections:
-POSIX:                - FileName:        '/usr/src{{[\\/]}}pdb_lines_2.c'
+POSIX:                - FileName:        '/usr/src/pdb_lines_2.c'
 POSIX:            - !FileChecksums
-POSIX:                - FileName:        '/usr/src{{[\\/]}}pdb_lines_2.c'
+POSIX:                - FileName:        '/usr/src/pdb_lines_2.c'
 
 POSIX-LABEL:  - Kind:            S_ENVBLOCK
 POSIX-NEXT:     EnvBlockSym:     
@@ -98,8 +98,8 @@ POSIX-NEXT:       Entries:
 POSIX-NEXT:         - cwd
 POSIX-NEXT:         - '/usr/src'
 POSIX-NEXT:         - exe
-POSIX-NEXT:         - '/usr/src{{[\\/]}}lld-link'
+POSIX-NEXT:         - '/usr/src/lld-link'
 POSIX-NEXT:         - pdb 
-POSIX-NEXT:         - '/usr/src{{[\\/]}}out.pdb'
+POSIX-NEXT:         - '/usr/src/out.pdb'
 POSIX-NEXT:         - cmd
 POSIX-NEXT:         - '-debug -pdbsourcepath:/usr/src -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_lines_1_relative.obj pdb_lines_2_relative.obj'
diff --git a/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s b/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s
new file mode 100644
index 00000000000000..3db9070dbd07bb
--- /dev/null
+++ b/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s
@@ -0,0 +1,44 @@
+# REQUIRES: aarch64
+# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
+# RUN: ld.lld -pie %t.o -o %tout
+# RUN: llvm-objdump -D %tout | FileCheck %s
+# RUN: llvm-readobj -r %tout | FileCheck %s -check-prefix=CHECK-RELOCS
+
+# Test that when we take the address of a preemptible ifunc using -fpie, we can
+# handle the case when the ifunc is in the same translation unit as the address
+# taker. In this case the compiler knows that ifunc is not defined in a shared
+# library so it can use a non got generating relative reference.
+.text
+.globl myfunc
+.type myfunc,@gnu_indirect_function
+myfunc:
+ ret
+
+.text
+.globl main
+.type main,@function
+main:
+ adrp x8, myfunc
+ add  x8, x8, :lo12: myfunc
+ ret
+
+# CHECK: 0000000000010000 myfunc:
+# CHECK-NEXT:    10000:	c0 03 5f d6 	ret
+# CHECK: 0000000000010004 main:
+# CHECK-NEXT:    10004:	08 00 00 90 	adrp	x8, #0
+# x8 = 0x10000
+# CHECK-NEXT:    10008:	08 41 00 91 	add	x8, x8, #16
+# x8 = 0x10010 = .plt for myfunc
+# CHECK-NEXT:    1000c:	c0 03 5f d6 	ret
+# CHECK-NEXT: Disassembly of section .plt:
+# CHECK-NEXT: 0000000000010010 .plt:
+# CHECK-NEXT:    10010:	90 00 00 90 	adrp	x16, #65536
+# CHECK-NEXT:    10014:	11 02 40 f9 	ldr	x17, [x16]
+# CHECK-NEXT:    10018:	10 02 00 91 	add	x16, x16, #0
+# CHECK-NEXT:    1001c:	20 02 1f d6 	br	x17
+
+# CHECK-RELOCS: Relocations [
+# CHECK-RELOCS-NEXT:   Section {{.*}} .rela.plt {
+# CHECK-RELOCS-NEXT:     0x20000 R_AARCH64_IRELATIVE - 0x10000
+# CHECK-RELOCS-NEXT:   }
+# CHECK-RELOCS-NEXT: ]
diff --git a/lld/test/ELF/arm-force-pi-thunk.s b/lld/test/ELF/arm-force-pi-thunk.s
new file mode 100644
index 00000000000000..2c88de0424ee5c
--- /dev/null
+++ b/lld/test/ELF/arm-force-pi-thunk.s
@@ -0,0 +1,87 @@
+// REQUIRES: arm
+// RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t
+// RUN: echo "SECTIONS { \
+// RUN:       . = SIZEOF_HEADERS; \
+// RUN:       .text_low : { *(.text_low) *(.text_low2) } \
+// RUN:       .text_high 0x2000000 : { *(.text_high) *(.text_high2) } \
+// RUN:       } " > %t.script
+// RUN: ld.lld --pic-veneer --script %t.script %t -o %t2 2>&1
+// RUN: llvm-objdump -d -triple=thumbv7a-none-linux-gnueabi %t2 | FileCheck %s
+
+// Test that we can force generation of position independent thunks even when
+// inputs are not pic.
+
+ .syntax unified
+ .section .text_low, "ax", %progbits
+ .thumb
+ .globl _start
+_start: bx lr
+ .globl low_target
+ .type low_target, %function
+low_target:
+ bl high_target
+ bl high_target2
+
+ .section .text_low2, "ax", %progbits
+ .thumb
+ .globl low_target2
+ .type low_target2, %function
+low_target2:
+ bl high_target
+ bl high_target2
+
+// CHECK: Disassembly of section .text_low:
+// CHECK-NEXT: _start:
+// CHECK-NEXT:       94:        70 47   bx      lr
+// CHECK: low_target:
+// CHECK-NEXT:       96:        00 f0 03 f8     bl      #6
+// CHECK-NEXT:       9a:        00 f0 07 f8     bl      #14
+// CHECK-NEXT:       9e:        d4 d4   bmi     #-88
+// CHECK: __ThumbV7PILongThunk_high_target:
+// CHECK-NEXT:       a0:        4f f6 55 7c     movw    r12, #65365
+// CHECK-NEXT:       a4:        c0 f2 ff 1c     movt    r12, #511
+// CHECK-NEXT:       a8:        fc 44   add     r12, pc
+// CHECK-NEXT:       aa:        60 47   bx      r12
+// CHECK: __ThumbV7PILongThunk_high_target2:
+// CHECK-NEXT:       ac:        4f f6 69 7c     movw    r12, #65385
+// CHECK-NEXT:       b0:        c0 f2 ff 1c     movt    r12, #511
+// CHECK-NEXT:       b4:        fc 44   add     r12, pc
+// CHECK-NEXT:       b6:        60 47   bx      r12
+// CHECK: low_target2:
+// CHECK-NEXT:       b8:        ff f7 f2 ff     bl      #-28
+// CHECK-NEXT:       bc:        ff f7 f6 ff     bl      #-20
+
+
+ .section .text_high, "ax", %progbits
+ .thumb
+ .globl high_target
+ .type high_target, %function
+high_target:
+ bl low_target
+ bl low_target2
+
+ .section .text_high2, "ax", %progbits
+ .thumb
+ .globl high_target2
+ .type high_target2, %function
+high_target2:
+ bl low_target
+ bl low_target2
+
+// CHECK: Disassembly of section .text_high:
+// CHECK-NEXT: high_target:
+// CHECK-NEXT:  2000000:        00 f0 02 f8     bl      #4
+// CHECK-NEXT:  2000004:        00 f0 06 f8     bl      #12
+// CHECK: __ThumbV7PILongThunk_low_target:
+// CHECK-NEXT:  2000008:        40 f2 83 0c     movw    r12, #131
+// CHECK-NEXT:  200000c:        cf f6 00 6c     movt    r12, #65024
+// CHECK-NEXT:  2000010:        fc 44   add     r12, pc
+// CHECK-NEXT:  2000012:        60 47   bx      r12
+// CHECK: __ThumbV7PILongThunk_low_target2:
+// CHECK-NEXT:  2000014:        40 f2 99 0c     movw    r12, #153
+// CHECK-NEXT:  2000018:        cf f6 00 6c     movt    r12, #65024
+// CHECK-NEXT:  200001c:        fc 44   add     r12, pc
+// CHECK-NEXT:  200001e:        60 47   bx      r12
+// CHECK: high_target2:
+// CHECK-NEXT:  2000020:        ff f7 f2 ff     bl      #-28
+// CHECK-NEXT:  2000024:        ff f7 f6 ff     bl      #-20
diff --git a/lld/test/ELF/arm-gnu-ifunc.s b/lld/test/ELF/arm-gnu-ifunc.s
index 8a7cb0ae237a79..92f87b5d5faefd 100644
--- a/lld/test/ELF/arm-gnu-ifunc.s
+++ b/lld/test/ELF/arm-gnu-ifunc.s
@@ -35,6 +35,8 @@ _start:
 // CHECK-NEXT:     Address: 0x100F4
 // CHECK-NEXT:     Offset: 0xF4
 // CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info: 4
 // CHECK:          Name: .plt
 // CHECK-NEXT:     Type: SHT_PROGBITS
 // CHECK-NEXT:     Flags [
@@ -44,7 +46,8 @@ _start:
 // CHECK-NEXT:     Address: 0x11020
 // CHECK-NEXT:     Offset: 0x1020
 // CHECK-NEXT:     Size: 32
-// CHECK:          Name: .got
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .got
 // CHECK-NEXT:     Type: SHT_PROGBITS
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
diff --git a/lld/test/ELF/comdat-linkonce.s b/lld/test/ELF/comdat-linkonce.s
index 8721f58bb20ce1..8b1d4b362e86c3 100644
--- a/lld/test/ELF/comdat-linkonce.s
+++ b/lld/test/ELF/comdat-linkonce.s
@@ -4,7 +4,12 @@
 // RUN: ld.lld -shared %t.o %t2.o -o %t
 // RUN: ld.lld -shared %t2.o %t.o -o %t
 
-.section .gnu.linkonce.t.zed
+.section .gnu.linkonce.t.__x86.get_pc_thunk.bx
 .globl abc
 abc:
 nop
+
+.section .gnu.linkonce.t.__i686.get_pc_thunk.bx
+.globl def
+def:
+nop
diff --git a/lld/test/ELF/emulation-aarch64.s b/lld/test/ELF/emulation-aarch64.s
index b9a6428fa953e0..c0edc9a69d36de 100644
--- a/lld/test/ELF/emulation-aarch64.s
+++ b/lld/test/ELF/emulation-aarch64.s
@@ -30,5 +30,28 @@
 # AARCH64-NEXT:   Flags [ (0x0)
 # AARCH64-NEXT:   ]
 
+# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %taarch64fbsd
+# RUN: echo 'OUTPUT_FORMAT(elf64-aarch64-freebsd)' > %taarch64fbsd.script
+# RUN: ld.lld %taarch64fbsd.script %taarch64fbsd -o %t2aarch64fbsd
+# RUN: llvm-readobj -file-headers %t2aarch64fbsd | FileCheck --check-prefix=AARCH64-FBSD %s
+# AARCH64-FBSD:      ElfHeader {
+# AARCH64-FBSD-NEXT:   Ident {
+# AARCH64-FBSD-NEXT:     Magic: (7F 45 4C 46)
+# AARCH64-FBSD-NEXT:     Class: 64-bit (0x2)
+# AARCH64-FBSD-NEXT:     DataEncoding: LittleEndian (0x1)
+# AARCH64-FBSD-NEXT:     FileVersion: 1
+# AARCH64-FBSD-NEXT:     OS/ABI: FreeBSD (0x9)
+# AARCH64-FBSD-NEXT:     ABIVersion: 0
+# AARCH64-FBSD-NEXT:     Unused: (00 00 00 00 00 00 00)
+# AARCH64-FBSD-NEXT:   }
+# AARCH64-FBSD-NEXT:   Type: Executable (0x2)
+# AARCH64-FBSD-NEXT:   Machine: EM_AARCH64 (0xB7)
+# AARCH64-FBSD-NEXT:   Version: 1
+# AARCH64-FBSD-NEXT:   Entry:
+# AARCH64-FBSD-NEXT:   ProgramHeaderOffset: 0x40
+# AARCH64-FBSD-NEXT:   SectionHeaderOffset:
+# AARCH64-FBSD-NEXT:   Flags [ (0x0)
+# AARCH64-FBSD-NEXT:   ]
+
 .globl _start
 _start:
diff --git a/lld/test/ELF/emulation-ppc.s b/lld/test/ELF/emulation-ppc.s
index 12e84782252fdf..843e77604779b1 100644
--- a/lld/test/ELF/emulation-ppc.s
+++ b/lld/test/ELF/emulation-ppc.s
@@ -35,6 +35,38 @@
 # PPC64-NEXT:   StringTableSectionIndex:
 # PPC64-NEXT: }
 
+# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-freebsd %s -o %tppc64fbsd
+# RUN: echo 'OUTPUT_FORMAT(elf64-powerpc-freebsd)' > %tppc64fbsd.script
+# RUN: ld.lld %tppc64fbsd.script  %tppc64fbsd -o %t2ppc64fbsd
+# RUN: llvm-readobj -file-headers %t2ppc64fbsd | FileCheck --check-prefix=PPC64-FBSD %s
+
+# PPC64-FBSD:      ElfHeader {
+# PPC64-FBSD-NEXT:   Ident {
+# PPC64-FBSD-NEXT:     Magic: (7F 45 4C 46)
+# PPC64-FBSD-NEXT:     Class: 64-bit (0x2)
+# PPC64-FBSD-NEXT:     DataEncoding: BigEndian (0x2)
+# PPC64-FBSD-NEXT:     FileVersion: 1
+# PPC64-FBSD-NEXT:     OS/ABI: FreeBSD (0x9)
+# PPC64-FBSD-NEXT:     ABIVersion: 0
+# PPC64-FBSD-NEXT:     Unused: (00 00 00 00 00 00 00)
+# PPC64-FBSD-NEXT:   }
+# PPC64-FBSD-NEXT:   Type: Executable (0x2)
+# PPC64-FBSD-NEXT:   Machine: EM_PPC64 (0x15)
+# PPC64-FBSD-NEXT:   Version: 1
+# PPC64-FBSD-NEXT:   Entry:
+# PPC64-FBSD-NEXT:   ProgramHeaderOffset: 0x40
+# PPC64-FBSD-NEXT:   SectionHeaderOffset:
+# PPC64-FBSD-NEXT:   Flags [ (0x2)
+# PPC64-FBSD-NEXT:     0x2
+# PPC64-FBSD-NEXT:   ]
+# PPC64-FBSD-NEXT:   HeaderSize: 64
+# PPC64-FBSD-NEXT:   ProgramHeaderEntrySize: 56
+# PPC64-FBSD-NEXT:   ProgramHeaderCount:
+# PPC64-FBSD-NEXT:   SectionHeaderEntrySize: 64
+# PPC64-FBSD-NEXT:   SectionHeaderCount:
+# PPC64-FBSD-NEXT:   StringTableSectionIndex:
+# PPC64-FBSD-NEXT: }
+
 # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %tppc64le
 # RUN: ld.lld -m elf64lppc %tppc64le -o %t2ppc64le
 # RUN: llvm-readobj -file-headers %t2ppc64le | FileCheck --check-prefix=PPC64LE %s
diff --git a/lld/test/ELF/emulation-x86.s b/lld/test/ELF/emulation-x86.s
index 65d807c67f2f0c..02b89435669287 100644
--- a/lld/test/ELF/emulation-x86.s
+++ b/lld/test/ELF/emulation-x86.s
@@ -7,6 +7,9 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.sysv
 # RUN: ld.lld -m elf_amd64_fbsd %t.sysv -o %t.freebsd
 # RUN: llvm-readobj -file-headers %t.freebsd | FileCheck --check-prefix=AMD64 %s
+# RUN: echo 'OUTPUT_FORMAT(elf64-x86-64-freebsd)' > %t4x64.script
+# RUN: ld.lld %t4x64.script %tx64 -o %t4x64
+# RUN: llvm-readobj -file-headers %t4x64 | FileCheck --check-prefix=AMD64 %s
 # AMD64:      ElfHeader {
 # AMD64-NEXT:   Ident {
 # AMD64-NEXT:     Magic: (7F 45 4C 46)
@@ -137,10 +140,13 @@
 # X86-NEXT: }
 
 # RUN: llvm-mc -filetype=obj -triple=i686-unknown-freebsd %s -o %tx86fbsd
-# RUN: ld.lld -m elf_i386_fbsd %tx86fbsd -o %t2x86_fbsd
-# RUN: llvm-readobj -file-headers %t2x86_fbsd | FileCheck --check-prefix=X86FBSD %s
+# RUN: ld.lld -m elf_i386_fbsd %tx86fbsd -o %t2x86fbsd
+# RUN: llvm-readobj -file-headers %t2x86fbsd | FileCheck --check-prefix=X86FBSD %s
 # RUN: ld.lld %tx86fbsd -o %t3x86fbsd
 # RUN: llvm-readobj -file-headers %t3x86fbsd | FileCheck --check-prefix=X86FBSD %s
+# RUN: echo 'OUTPUT_FORMAT(elf32-i386-freebsd)' > %t4x86fbsd.script
+# RUN: ld.lld %t4x86fbsd.script %tx86fbsd -o %t4x86fbsd
+# RUN: llvm-readobj -file-headers %t4x86fbsd | FileCheck --check-prefix=X86FBSD %s
 # X86FBSD:      ElfHeader {
 # X86FBSD-NEXT:   Ident {
 # X86FBSD-NEXT:     Magic: (7F 45 4C 46)
diff --git a/lld/test/ELF/no-discard-this_module.s b/lld/test/ELF/no-discard-this_module.s
new file mode 100644
index 00000000000000..3ce56d165fc1a6
--- /dev/null
+++ b/lld/test/ELF/no-discard-this_module.s
@@ -0,0 +1,41 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-linux-gnu -save-temp-labels %s -o %t
+// RUN: ld.lld %t -o %t2
+// RUN: llvm-readobj -s -sd -t %t2 | FileCheck %s
+
+.global _start
+_start:
+
+// This section and symbol is used by Linux kernel modules. Ensure it's not
+// accidentally discarded.
+.section .gnu.linkonce.this_module:
+__this_module:
+.byte 0x00
+
+// CHECK: Section {
+// CHECK:    Index:
+// CHECK:    Name: .gnu.linkonce.this_module
+// CHECK-NEXT:    Type: SHT_PROGBITS
+// CHECK-NEXT:    Flags [
+// CHECK-NEXT:    ]
+// CHECK-NEXT:    Address:
+// CHECK-NEXT:    Offset:
+// CHECK-NEXT:    Size:
+// CHECK-NEXT:    Link:
+// CHECK-NEXT:    Info:
+// CHECK-NEXT:    AddressAlignment:
+// CHECK-NEXT:    EntrySize:
+// CHECK-NEXT:    SectionData (
+// CHECK-NEXT:      0000: 00                                   |.|
+// CHECK-NEXT:    )
+// CHECK-NEXT:  }
+
+// CHECK:  Symbol {
+// CHECK:    Name: __this_module
+// CHECK-NEXT:    Value:
+// CHECK-NEXT:    Size:
+// CHECK-NEXT:    Binding: Local
+// CHECK-NEXT:    Type: None
+// CHECK-NEXT:    Other:
+// CHECK-NEXT:    Section: .gnu.linkonce.this_module:
+// CHECK-NEXT:  }
diff --git a/lld/test/ELF/sht-group-empty.test b/lld/test/ELF/sht-group-empty.test
new file mode 100644
index 00000000000000..46c77f332e7e70
--- /dev/null
+++ b/lld/test/ELF/sht-group-empty.test
@@ -0,0 +1,55 @@
+# RUN: yaml2obj %s -o %t.o
+# RUN: ld.lld %t.o %t.o -o %t -r
+# RUN: llvm-readobj -s %t | FileCheck %s
+
+# CHECK:     Name: .text.foo
+# CHECK:     Name: .rela.text.foo
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+Sections:
+  - Name:            .group
+    Type:            SHT_GROUP
+    Link:            .symtab
+    Info:            foo
+    Members:
+      - SectionOrType:    GRP_COMDAT
+      - SectionOrType:    .text.foo
+      - SectionOrType:    .text.bar
+      - SectionOrType:    .note
+  - Name:            .note
+    Type:            SHT_NOTE
+    Flags:           [ SHF_GROUP ]
+  - Name:            .text.foo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR, SHF_GROUP ]
+  - Name:            .text.bar
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR, SHF_GROUP ]
+  - Name:            .rela.text.foo
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK, SHF_GROUP ]
+    Link:            .symtab
+    Info:            .text.foo
+    Relocations:
+      - Offset:          0x0000000000000000
+        Symbol:          foo
+        Type:            R_X86_64_64
+  - Name:            .rela.text.bar
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK, SHF_GROUP ]
+    Link:            .symtab
+    Info:            .text.bar
+    Relocations:
+      - Offset:          0x0000000000000000
+        Symbol:          bar
+        Type:            R_X86_64_64
+Symbols:
+  Global:
+    - Name:            foo
+    - Name:            bar
+
diff --git a/lld/test/ELF/tls-opt-x86_64-noplt.s b/lld/test/ELF/tls-opt-x86_64-noplt.s
new file mode 100644
index 00000000000000..69ec49871210e2
--- /dev/null
+++ b/lld/test/ELF/tls-opt-x86_64-noplt.s
@@ -0,0 +1,88 @@
+// REQUIRES: x86
+
+// Checks whether the TLS optimizations match the cases in Chapter 11 of
+// https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
+
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/tls-opt-gdie.s -o %tso.o
+// RUN: ld.lld -shared %tso.o -o %t.so
+// RUN: ld.lld %t.o %t.so -o %t1
+// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=RELOC %s
+// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
+
+// RELOC:      Relocations [
+// RELOC-NEXT:  Section {{.*}} .rela.dyn {
+// RELOC-NEXT:    0x2020C0 R_X86_64_TPOFF64 tlsshared0 0x0
+// RELOC-NEXT:    0x2020C8 R_X86_64_TPOFF64 tlsshared1 0x0
+// RELOC-NEXT:  }
+// RELOC-NEXT: ]
+
+// DISASM:      _start:
+
+// Table 11.5: GD -> IE Code Transition (LP64)
+// DISASM-NEXT: 201000: 64 48 8b 04 25 00 00 00 00      movq %fs:0, %rax
+// DISASM-NEXT: 201009: 48 03 05 b0 10 00 00            addq 4272(%rip), %rax
+// DISASM-NEXT: 201010: 64 48 8b 04 25 00 00 00 00      movq %fs:0, %rax
+// DISASM-NEXT: 201019: 48 03 05 a8 10 00 00            addq 4264(%rip), %rax
+
+// Table 11.7: GD -> LE Code Transition (LP64)
+// DISASM-NEXT: 201020: 64 48 8b 04 25 00 00 00 00      movq %fs:0, %rax
+// DISASM-NEXT: 201029: 48 8d 80 f8 ff ff ff            leaq -8(%rax), %rax
+// DISASM-NEXT: 201030: 64 48 8b 04 25 00 00 00 00      movq %fs:0, %rax
+// DISASM-NEXT: 201039: 48 8d 80 fc ff ff ff            leaq -4(%rax), %rax
+
+
+// Table 11.9: LD -> LE Code Transition (LP64)
+// DISASM-NEXT: 201040: 66 66 66 66 64 48 8b 04 25 00 00 00 00  movq %fs:0, %rax
+// DISASM-NEXT: 20104d: 66 66 66 66 64 48 8b 04 25 00 00 00 00  movq %fs:0, %rax
+
+.type tls0,@object
+.section .tbss,"awT",@nobits
+.globl tls0
+.align 4
+tls0:
+ .long 0
+ .size tls0, 4
+
+.type  tls1,@object
+.globl tls1
+.align 4
+tls1:
+ .long 0
+ .size tls1, 4
+
+.section .text
+.globl _start
+_start:
+ // Table 11.5: GD -> IE Code Transition (LP64)
+ .byte  0x66
+ leaq   tlsshared0@tlsgd(%rip),%rdi
+ .byte  0x66
+ rex64
+ call   *__tls_get_addr@GOTPCREL(%rip)
+
+ .byte  0x66
+ leaq   tlsshared1@tlsgd(%rip),%rdi
+ .byte  0x66
+ rex64
+ call   *__tls_get_addr@GOTPCREL(%rip)
+
+ // Table 11.7: GD -> LE Code Transition (LP64)
+ .byte  0x66
+ leaq   tls0@tlsgd(%rip),%rdi
+ .byte  0x66
+ rex64
+ call   *__tls_get_addr@GOTPCREL(%rip)
+
+ .byte  0x66
+ leaq   tls1@tlsgd(%rip),%rdi
+ .byte  0x66
+ rex64
+ call   *__tls_get_addr@GOTPCREL(%rip)
+
+ // Table 11.9: LD -> LE Code Transition (LP64)
+ leaq   tls0@tlsld(%rip),%rdi
+ call   *__tls_get_addr@GOTPCREL(%rip)
+
+ leaq   tls1@tlsld(%rip),%rdi
+ call   *__tls_get_addr@GOTPCREL(%rip)
diff --git a/lld/test/wasm/data-layout.ll b/lld/test/wasm/data-layout.ll
index b01c13ac9b82a0..7c215efb0d8f4c 100644
--- a/lld/test/wasm/data-layout.ll
+++ b/lld/test/wasm/data-layout.ll
@@ -85,10 +85,10 @@ target triple = "wasm32-unknown-unknown"
 ; RELOC:       - Type:            DATA
 ; RELOC-NEXT:     Relocations:
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_I32
-; RELOC-NEXT:         Index:           6
+; RELOC-NEXT:         Index:           3
 ; RELOC-NEXT:         Offset:          0x00000018
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_I32
-; RELOC-NEXT:         Index:           3
+; RELOC-NEXT:         Index:           4
 ; RELOC-NEXT:         Offset:          0x0000002E
 ; RELOC-NEXT:         Addend:          4
 ; RELOC-NEXT:     Segments:
@@ -148,7 +148,7 @@ target triple = "wasm32-unknown-unknown"
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Segment:         2
 ; RELOC-NEXT:         Size:            4
-; RELOC:            - Index:           6
+; RELOC-NEXT:       - Index:           3
 ; RELOC-NEXT:         Kind:            DATA
 ; RELOC-NEXT:         Name:            hello_str
 ; RELOC-NEXT:         Flags:           [  ]
diff --git a/lld/test/wasm/import-module.ll b/lld/test/wasm/import-module.ll
new file mode 100644
index 00000000000000..9a473194ce2c37
--- /dev/null
+++ b/lld/test/wasm/import-module.ll
@@ -0,0 +1,21 @@
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: wasm-ld --allow-undefined -o %t.wasm %t.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+target triple = "wasm32-unknown-unknown-wasm"
+
+define void @_start() {
+  call void @foo();
+  ret void
+}
+
+declare void @foo() #0
+
+attributes #0 = { "wasm-import-module"="bar" }
+
+; CHECK:        - Type:            IMPORT
+; CHECK-NEXT:     Imports:         
+; CHECK-NEXT:       - Module:          bar
+; CHECK-NEXT:         Field:           foo
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         SigIndex:        0
diff --git a/lld/test/wasm/import-names.ll b/lld/test/wasm/import-names.ll
new file mode 100644
index 00000000000000..a3953d3356198b
--- /dev/null
+++ b/lld/test/wasm/import-names.ll
@@ -0,0 +1,27 @@
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: wasm-ld --allow-undefined -o %t.wasm %t.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare void @f0() #0
+
+define void @_start() {
+    call void @f0()
+    ret void
+}
+
+attributes #0 = { "wasm-import-module"="somewhere" "wasm-import-name"="something" }
+
+; CHECK:        - Type:            IMPORT
+; CHECK-NEXT:     Imports:
+; CHECK-NEXT:       - Module:          somewhere
+; CHECK-NEXT:         Field:           something
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         SigIndex:        0
+
+; CHECK:        - Type:            CUSTOM
+; CHECK-NEXT:     Name:            name
+; CHECK-NEXT:     FunctionNames:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Name:            f0
diff --git a/lld/test/wasm/init-fini.ll b/lld/test/wasm/init-fini.ll
index 9a7f5357ef0157..b17020b177c71a 100644
--- a/lld/test/wasm/init-fini.ll
+++ b/lld/test/wasm/init-fini.ll
@@ -163,64 +163,64 @@ entry:
 ; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN ]
 ; RELOC-NEXT:         Function:        7
 ; RELOC-NEXT:       - Index:           6
+; RELOC-NEXT:         Kind:            DATA
+; RELOC-NEXT:         Name:            __dso_handle
+; RELOC-NEXT:         Flags:           [ BINDING_WEAK, VISIBILITY_HIDDEN, UNDEFINED ]
+; RELOC-NEXT:       - Index:           7
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            externDtor
+; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN, UNDEFINED ]
+; RELOC-NEXT:         Function:        0
+; RELOC-NEXT:       - Index:           8
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            externCtor
+; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN, UNDEFINED ]
+; RELOC-NEXT:         Function:        1
+; RELOC-NEXT:       - Index:           9
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            myctor
+; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN ]
+; RELOC-NEXT:         Function:        14
+; RELOC-NEXT:       - Index:           10
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            mydtor
+; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN ]
+; RELOC-NEXT:         Function:        15
+; RELOC-NEXT:       - Index:           11
+; RELOC-NEXT:         Kind:            GLOBAL
+; RELOC-NEXT:         Name:            __stack_pointer
+; RELOC-NEXT:         Flags:           [ UNDEFINED ]
+; RELOC-NEXT:         Global:          0
+; RELOC-NEXT:       - Index:           12
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            .Lcall_dtors.101
 ; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
 ; RELOC-NEXT:         Function:        8
-; RELOC-NEXT:       - Index:           7
+; RELOC-NEXT:       - Index:           13
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            .Lregister_call_dtors.101
 ; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
 ; RELOC-NEXT:         Function:        9
-; RELOC-NEXT:       - Index:           8
-; RELOC-NEXT:         Kind:            DATA
-; RELOC-NEXT:         Name:            __dso_handle
-; RELOC-NEXT:         Flags:           [ BINDING_WEAK, VISIBILITY_HIDDEN, UNDEFINED ]
-; RELOC-NEXT:       - Index:           9
+; RELOC-NEXT:       - Index:           14
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            .Lcall_dtors.1001
 ; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
 ; RELOC-NEXT:         Function:        10
-; RELOC-NEXT:       - Index:           10
+; RELOC-NEXT:       - Index:           15
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            .Lregister_call_dtors.1001
 ; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
 ; RELOC-NEXT:         Function:        11
-; RELOC-NEXT:       - Index:           11
+; RELOC-NEXT:       - Index:           16
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            .Lcall_dtors.4000
 ; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
 ; RELOC-NEXT:         Function:        12
-; RELOC-NEXT:       - Index:           12
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            externDtor
-; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN, UNDEFINED ]
-; RELOC-NEXT:         Function:        0
-; RELOC-NEXT:       - Index:           13
+; RELOC-NEXT:       - Index:           17
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            .Lregister_call_dtors.4000
 ; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
 ; RELOC-NEXT:         Function:        13
-; RELOC-NEXT:       - Index:           14
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            externCtor
-; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN, UNDEFINED ]
-; RELOC-NEXT:         Function:        1
-; RELOC-NEXT:       - Index:           15
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            myctor
-; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN ]
-; RELOC-NEXT:         Function:        14
-; RELOC-NEXT:       - Index:           16
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            mydtor
-; RELOC-NEXT:         Flags:           [ VISIBILITY_HIDDEN ]
-; RELOC-NEXT:         Function:        15
-; RELOC-NEXT:       - Index:           17
-; RELOC-NEXT:         Kind:            GLOBAL
-; RELOC-NEXT:         Name:            __stack_pointer
-; RELOC-NEXT:         Flags:           [ UNDEFINED ]
-; RELOC-NEXT:         Global:          0
 ; RELOC-NEXT:       - Index:           18
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            .Lcall_dtors.101
@@ -251,36 +251,36 @@ entry:
 ; RELOC-NEXT:         Name:            .Lregister_call_dtors.2002
 ; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
 ; RELOC-NEXT:         Function:        21
-; RELOC-NEXT:     InitFunctions:
+; RELOC-NEXT:     InitFunctions:   
 ; RELOC-NEXT:       - Priority:        101
 ; RELOC-NEXT:         Symbol:          0
 ; RELOC-NEXT:       - Priority:        101
 ; RELOC-NEXT:         Symbol:          1
 ; RELOC-NEXT:       - Priority:        101
-; RELOC-NEXT:         Symbol:          7
+; RELOC-NEXT:         Symbol:          13
 ; RELOC-NEXT:       - Priority:        101
-; RELOC-NEXT:         Symbol:          15
+; RELOC-NEXT:         Symbol:          9
 ; RELOC-NEXT:       - Priority:        101
 ; RELOC-NEXT:         Symbol:          19
 ; RELOC-NEXT:       - Priority:        202
-; RELOC-NEXT:         Symbol:          15
+; RELOC-NEXT:         Symbol:          9
 ; RELOC-NEXT:       - Priority:        202
 ; RELOC-NEXT:         Symbol:          21
 ; RELOC-NEXT:       - Priority:        1001
 ; RELOC-NEXT:         Symbol:          0
 ; RELOC-NEXT:       - Priority:        1001
-; RELOC-NEXT:         Symbol:          10
-; RELOC-NEXT:       - Priority:        2002
 ; RELOC-NEXT:         Symbol:          15
 ; RELOC-NEXT:       - Priority:        2002
+; RELOC-NEXT:         Symbol:          9
+; RELOC-NEXT:       - Priority:        2002
 ; RELOC-NEXT:         Symbol:          23
 ; RELOC-NEXT:       - Priority:        4000
-; RELOC-NEXT:         Symbol:          14
+; RELOC-NEXT:         Symbol:          8
 ; RELOC-NEXT:       - Priority:        4000
-; RELOC-NEXT:         Symbol:          13
+; RELOC-NEXT:         Symbol:          17
 ; RELOC-NEXT:   - Type:            CUSTOM
 ; RELOC-NEXT:     Name:            name
-; RELOC-NEXT:     FunctionNames:
+; RELOC-NEXT:     FunctionNames:   
 ; RELOC-NEXT:       - Index:           0
 ; RELOC-NEXT:         Name:            externDtor
 ; RELOC-NEXT:       - Index:           1
diff --git a/lld/test/wasm/locals-duplicate.test b/lld/test/wasm/locals-duplicate.test
index 2d6bd0df5314cd..74383bf429f69b 100644
--- a/lld/test/wasm/locals-duplicate.test
+++ b/lld/test/wasm/locals-duplicate.test
@@ -270,40 +270,40 @@
 ; RELOC-NEXT:   - Type:            CODE
 ; RELOC-NEXT:     Relocations:
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_SLEB
-; RELOC-NEXT:         Index:           4
+; RELOC-NEXT:         Index:           18
 ; RELOC-NEXT:         Offset:          0x00000013
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_SLEB
-; RELOC-NEXT:         Index:           6
+; RELOC-NEXT:         Index:           3
 ; RELOC-NEXT:         Offset:          0x0000001C
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_SLEB
-; RELOC-NEXT:         Index:           8
+; RELOC-NEXT:         Index:           19
 ; RELOC-NEXT:         Offset:          0x00000025
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           0
+; RELOC-NEXT:         Index:           16
 ; RELOC-NEXT:         Offset:          0x0000002E
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           1
+; RELOC-NEXT:         Index:           0
 ; RELOC-NEXT:         Offset:          0x00000037
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           2
+; RELOC-NEXT:         Index:           17
 ; RELOC-NEXT:         Offset:          0x00000040
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_SLEB
-; RELOC-NEXT:         Index:           16
+; RELOC-NEXT:         Index:           10
 ; RELOC-NEXT:         Offset:          0x00000058
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_SLEB
-; RELOC-NEXT:         Index:           18
+; RELOC-NEXT:         Index:           22
 ; RELOC-NEXT:         Offset:          0x00000061
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_MEMORY_ADDR_SLEB
-; RELOC-NEXT:         Index:           20
+; RELOC-NEXT:         Index:           23
 ; RELOC-NEXT:         Offset:          0x0000006A
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           12
+; RELOC-NEXT:         Index:           8
 ; RELOC-NEXT:         Offset:          0x00000073
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           13
+; RELOC-NEXT:         Index:           20
 ; RELOC-NEXT:         Offset:          0x0000007C
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           14
+; RELOC-NEXT:         Index:           21
 ; RELOC-NEXT:         Offset:          0x00000085
 ; RELOC-NEXT:     Functions:
 ; RELOC-NEXT:       - Index:           0
@@ -386,133 +386,133 @@
 ; RELOC-NEXT:     SymbolTable:
 ; RELOC-NEXT:       - Index:           0
 ; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            colliding_func1
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Function:        0
-; RELOC-NEXT:       - Index:           1
-; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            colliding_func2
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        1
-; RELOC-NEXT:       - Index:           2
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            colliding_func3
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Function:        2
-; RELOC-NEXT:       - Index:           3
+; RELOC-NEXT:       - Index:           1
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_global1A
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        3
-; RELOC-NEXT:       - Index:           4
-; RELOC-NEXT:         Kind:            DATA
-; RELOC-NEXT:         Name:            colliding_global1
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Segment:         0
-; RELOC-NEXT:         Size:            4
-; RELOC-NEXT:       - Index:           5
+; RELOC-NEXT:       - Index:           2
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_global2A
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        4
-; RELOC-NEXT:       - Index:           6
+; RELOC-NEXT:       - Index:           3
 ; RELOC-NEXT:         Kind:            DATA
 ; RELOC-NEXT:         Name:            colliding_global2
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Segment:         1
 ; RELOC-NEXT:         Size:            4
-; RELOC-NEXT:       - Index:           7
+; RELOC-NEXT:       - Index:           4
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_global3A
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        5
-; RELOC-NEXT:       - Index:           8
-; RELOC-NEXT:         Kind:            DATA
-; RELOC-NEXT:         Name:            colliding_global3
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Segment:         2
-; RELOC-NEXT:         Size:            4
-; RELOC-NEXT:       - Index:           9
+; RELOC-NEXT:       - Index:           5
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_func1A
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        6
-; RELOC-NEXT:       - Index:           10
+; RELOC-NEXT:       - Index:           6
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_func2A
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        7
-; RELOC-NEXT:       - Index:           11
+; RELOC-NEXT:       - Index:           7
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_func3A
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        8
-; RELOC-NEXT:       - Index:           12
+; RELOC-NEXT:       - Index:           8
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            colliding_func1
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        9
-; RELOC-NEXT:       - Index:           13
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            colliding_func2
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Function:        10
-; RELOC-NEXT:       - Index:           14
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            colliding_func3
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Function:        11
-; RELOC-NEXT:       - Index:           15
+; RELOC-NEXT:       - Index:           9
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_global1B
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        12
-; RELOC-NEXT:       - Index:           16
+; RELOC-NEXT:       - Index:           10
 ; RELOC-NEXT:         Kind:            DATA
 ; RELOC-NEXT:         Name:            colliding_global1
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Segment:         0
 ; RELOC-NEXT:         Offset:          4
 ; RELOC-NEXT:         Size:            4
-; RELOC-NEXT:       - Index:           17
+; RELOC-NEXT:       - Index:           11
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_global2B
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        13
-; RELOC-NEXT:       - Index:           18
-; RELOC-NEXT:         Kind:            DATA
-; RELOC-NEXT:         Name:            colliding_global2
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Segment:         1
-; RELOC-NEXT:         Offset:          4
-; RELOC-NEXT:         Size:            4
-; RELOC-NEXT:       - Index:           19
+; RELOC-NEXT:       - Index:           12
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_global3B
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        14
-; RELOC-NEXT:       - Index:           20
-; RELOC-NEXT:         Kind:            DATA
-; RELOC-NEXT:         Name:            colliding_global3
-; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
-; RELOC-NEXT:         Segment:         2
-; RELOC-NEXT:         Offset:          4
-; RELOC-NEXT:         Size:            4
-; RELOC-NEXT:       - Index:           21
+; RELOC-NEXT:       - Index:           13
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_func1B
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        15
-; RELOC-NEXT:       - Index:           22
+; RELOC-NEXT:       - Index:           14
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_func2B
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        16
-; RELOC-NEXT:       - Index:           23
+; RELOC-NEXT:       - Index:           15
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            get_func3B
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        17
+; RELOC-NEXT:       - Index:           16
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            colliding_func1
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Function:        0
+; RELOC-NEXT:       - Index:           17
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            colliding_func3
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Function:        2
+; RELOC-NEXT:       - Index:           18
+; RELOC-NEXT:         Kind:            DATA
+; RELOC-NEXT:         Name:            colliding_global1
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Segment:         0
+; RELOC-NEXT:         Size:            4
+; RELOC-NEXT:       - Index:           19
+; RELOC-NEXT:         Kind:            DATA
+; RELOC-NEXT:         Name:            colliding_global3
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Segment:         2
+; RELOC-NEXT:         Size:            4
+; RELOC-NEXT:       - Index:           20
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            colliding_func2
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Function:        10
+; RELOC-NEXT:       - Index:           21
+; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            colliding_func3
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Function:        11
+; RELOC-NEXT:       - Index:           22
+; RELOC-NEXT:         Kind:            DATA
+; RELOC-NEXT:         Name:            colliding_global2
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Segment:         1
+; RELOC-NEXT:         Offset:          4
+; RELOC-NEXT:         Size:            4
+; RELOC-NEXT:       - Index:           23
+; RELOC-NEXT:         Kind:            DATA
+; RELOC-NEXT:         Name:            colliding_global3
+; RELOC-NEXT:         Flags:           [ BINDING_LOCAL ]
+; RELOC-NEXT:         Segment:         2
+; RELOC-NEXT:         Offset:          4
+; RELOC-NEXT:         Size:            4
 ; RELOC-NEXT:     SegmentInfo:
 ; RELOC-NEXT:       - Index:           0
 ; RELOC-NEXT:         Name:            .bss.colliding_global1
diff --git a/lld/test/wasm/lto/relocatable-undefined.ll b/lld/test/wasm/lto/relocatable-undefined.ll
new file mode 100644
index 00000000000000..b9780ee0309b26
--- /dev/null
+++ b/lld/test/wasm/lto/relocatable-undefined.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: wasm-ld -r -o %t.wasm %t.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+@missing_data = external global i32
+declare i32 @missing_func() local_unnamed_addr
+
+define i32 @foo() {
+entry:
+  %0 = call i32 @missing_func()
+  %1 = load i32, i32* @missing_data, align 4
+  ret i32 %1
+}
+
+
+; CHECK:        - Type:            CUSTOM
+; CHECK-NEXT:     Name:            linking
+; CHECK-NEXT:     Version:         2
+; CHECK-NEXT:     SymbolTable:     
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         Name:            missing_func
+; CHECK-NEXT:         Flags:           [ UNDEFINED ]
+; CHECK-NEXT:         Function:        0
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         Name:            foo
+; CHECK-NEXT:         Flags:           [  ]
+; CHECK-NEXT:         Function:        1
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         Kind:            DATA
+; CHECK-NEXT:         Name:            missing_data
+; CHECK-NEXT:         Flags:           [ UNDEFINED ]
diff --git a/lld/test/wasm/weak-alias.ll b/lld/test/wasm/weak-alias.ll
index 0c856e1eafa090..a925c10ccda413 100644
--- a/lld/test/wasm/weak-alias.ll
+++ b/lld/test/wasm/weak-alias.ll
@@ -187,13 +187,13 @@ entry:
 ; RELOC-NEXT:   - Type:            CODE
 ; RELOC-NEXT:     Relocations:
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_FUNCTION_INDEX_LEB
-; RELOC-NEXT:         Index:           4
+; RELOC-NEXT:         Index:           1
 ; RELOC-NEXT:         Offset:          0x00000004
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_FUNCTION_INDEX_LEB
-; RELOC-NEXT:         Index:           1
+; RELOC-NEXT:         Index:           2
 ; RELOC-NEXT:         Offset:          0x00000013
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_FUNCTION_INDEX_LEB
-; RELOC-NEXT:         Index:           4
+; RELOC-NEXT:         Index:           1
 ; RELOC-NEXT:         Offset:          0x0000001C
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_GLOBAL_INDEX_LEB
 ; RELOC-NEXT:         Index:           6
@@ -202,10 +202,10 @@ entry:
 ; RELOC-NEXT:         Index:           6
 ; RELOC-NEXT:         Offset:          0x00000032
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           4
+; RELOC-NEXT:         Index:           1
 ; RELOC-NEXT:         Offset:          0x0000003A
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_FUNCTION_INDEX_LEB
-; RELOC-NEXT:         Index:           4
+; RELOC-NEXT:         Index:           1
 ; RELOC-NEXT:         Offset:          0x00000043
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_GLOBAL_INDEX_LEB
 ; RELOC-NEXT:         Index:           6
@@ -217,10 +217,10 @@ entry:
 ; RELOC-NEXT:         Index:           6
 ; RELOC-NEXT:         Offset:          0x00000068
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_TABLE_INDEX_SLEB
-; RELOC-NEXT:         Index:           1
+; RELOC-NEXT:         Index:           2
 ; RELOC-NEXT:         Offset:          0x00000070
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_FUNCTION_INDEX_LEB
-; RELOC-NEXT:         Index:           1
+; RELOC-NEXT:         Index:           2
 ; RELOC-NEXT:         Offset:          0x00000079
 ; RELOC-NEXT:       - Type:            R_WEBASSEMBLY_GLOBAL_INDEX_LEB
 ; RELOC-NEXT:         Index:           6
@@ -259,24 +259,24 @@ entry:
 ; RELOC-NEXT:         Function:        0
 ; RELOC-NEXT:       - Index:           1
 ; RELOC-NEXT:         Kind:            FUNCTION
+; RELOC-NEXT:         Name:            alias_fn
+; RELOC-NEXT:         Flags:           [ BINDING_WEAK ]
+; RELOC-NEXT:         Function:        1
+; RELOC-NEXT:       - Index:           2
+; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            direct_fn
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        1
-; RELOC-NEXT:       - Index:           2
+; RELOC-NEXT:       - Index:           3
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            call_direct
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        2
-; RELOC-NEXT:       - Index:           3
+; RELOC-NEXT:       - Index:           4
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            call_alias
 ; RELOC-NEXT:         Flags:           [  ]
 ; RELOC-NEXT:         Function:        3
-; RELOC-NEXT:       - Index:           4
-; RELOC-NEXT:         Kind:            FUNCTION
-; RELOC-NEXT:         Name:            alias_fn
-; RELOC-NEXT:         Flags:           [ BINDING_WEAK ]
-; RELOC-NEXT:         Function:        1
 ; RELOC-NEXT:       - Index:           5
 ; RELOC-NEXT:         Kind:            FUNCTION
 ; RELOC-NEXT:         Name:            call_alias_ptr
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index fab4c0c4ed8bb7..ade15a19f66e0f 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -434,7 +434,9 @@ static Symbol *handleUndefined(StringRef Name) {
 static UndefinedGlobal *
 createUndefinedGlobal(StringRef Name, llvm::wasm::WasmGlobalType *Type) {
   auto *Sym =
-      cast<UndefinedGlobal>(Symtab->addUndefinedGlobal(Name, 0, nullptr, Type));
+      cast<UndefinedGlobal>(Symtab->addUndefinedGlobal(Name, Name,
+                                                       DefaultModule, 0,
+                                                       nullptr, Type));
   Config->AllowUndefinedSymbols.insert(Sym->getName());
   Sym->IsUsedInRegularObj = true;
   return Sym;
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index 1145c670253c81..f5884a1beea4b8 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -23,7 +23,7 @@ using namespace llvm::support::endian;
 using namespace lld;
 using namespace lld::wasm;
 
-static StringRef ReloctTypeToString(uint8_t RelocType) {
+static StringRef reloctTypeToString(uint8_t RelocType) {
   switch (RelocType) {
 #define WASM_RELOC(NAME, REL)                                                  \
   case REL:                                                                    \
@@ -77,7 +77,7 @@ void InputChunk::verifyRelocTargets() const {
       warn("expected LEB at relocation site be 5-byte padded");
     uint32_t ExpectedValue = File->calcExpectedValue(Rel);
     if (ExpectedValue != ExistingValue)
-      warn("unexpected existing value for " + ReloctTypeToString(Rel.Type) +
+      warn("unexpected existing value for " + reloctTypeToString(Rel.Type) +
            ": existing=" + Twine(ExistingValue) +
            " expected=" + Twine(ExpectedValue));
   }
@@ -103,7 +103,7 @@ void InputChunk::writeTo(uint8_t *Buf) const {
   for (const WasmRelocation &Rel : Relocations) {
     uint8_t *Loc = Buf + Rel.Offset + Off;
     uint32_t Value = File->calcNewValue(Rel);
-    LLVM_DEBUG(dbgs() << "apply reloc: type=" << ReloctTypeToString(Rel.Type)
+    LLVM_DEBUG(dbgs() << "apply reloc: type=" << reloctTypeToString(Rel.Type)
                       << " addend=" << Rel.Addend << " index=" << Rel.Index
                       << " value=" << Value << " offset=" << Rel.Offset
                       << "\n");
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index e5da23db37739c..1e54272163545d 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -377,11 +377,15 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
 
   switch (Sym.Info.Kind) {
   case WASM_SYMBOL_TYPE_FUNCTION:
-    return Symtab->addUndefinedFunction(Name, Flags, this, Sym.Signature);
+    return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName,
+                                        Sym.Info.ImportModule, Flags, this,
+                                        Sym.Signature);
   case WASM_SYMBOL_TYPE_DATA:
     return Symtab->addUndefinedData(Name, Flags, this);
   case WASM_SYMBOL_TYPE_GLOBAL:
-    return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType);
+    return Symtab->addUndefinedGlobal(Name, Sym.Info.ImportName,
+                                      Sym.Info.ImportModule, Flags, this,
+                                      Sym.GlobalType);
   case WASM_SYMBOL_TYPE_SECTION:
     llvm_unreachable("section symbols cannot be undefined");
   }
@@ -445,7 +449,8 @@ static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym,
 
   if (ObjSym.isUndefined()) {
     if (ObjSym.isExecutable())
-      return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr);
+      return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F,
+                                          nullptr);
     return Symtab->addUndefinedData(Name, Flags, &F);
   }
 
diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp
index 96a947e29d4124..e994691cceb2e0 100644
--- a/lld/wasm/LTO.cpp
+++ b/lld/wasm/LTO.cpp
@@ -79,8 +79,9 @@ BitcodeCompiler::~BitcodeCompiler() = default;
 
 static void undefine(Symbol *S) {
   if (auto F = dyn_cast<DefinedFunction>(S))
-    replaceSymbol<UndefinedFunction>(F, F->getName(), 0, F->getFile(),
-                                     F->Signature);
+    replaceSymbol<UndefinedFunction>(F, F->getName(), F->getName(),
+                                     DefaultModule, 0,
+                                     F->getFile(), F->Signature);
   else if (isa<DefinedData>(S))
     replaceSymbol<UndefinedData>(S, S->getName(), 0, S->getFile());
   else
diff --git a/lld/wasm/LTO.h b/lld/wasm/LTO.h
index cf726de5643ae9..d771301f224dca 100644
--- a/lld/wasm/LTO.h
+++ b/lld/wasm/LTO.h
@@ -23,6 +23,7 @@
 
 #include "lld/Common/LLVM.h"
 #include "llvm/ADT/SmallString.h"
+#include "Writer.h"
 #include <memory>
 #include <vector>
 
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 3bbd1148f6ad35..723ac4e3c6baa4 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -85,7 +85,7 @@ void lld::wasm::markLive() {
       // equal to null pointer, only reachable via direct call).
       if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB ||
           Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32) {
-        FunctionSymbol *FuncSym = cast<FunctionSymbol>(Sym);
+        auto *FuncSym = cast<FunctionSymbol>(Sym);
         if (FuncSym->hasTableIndex() && FuncSym->getTableIndex() == 0)
           continue;
       }
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index c7983196db36c8..65441d293b50be 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -314,8 +314,9 @@ Symbol *SymbolTable::addDefinedEvent(StringRef Name, uint32_t Flags,
   return S;
 }
 
-Symbol *SymbolTable::addUndefinedFunction(StringRef Name, uint32_t Flags,
-                                          InputFile *File,
+Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName,
+                                          StringRef ImportModule,
+                                          uint32_t Flags, InputFile *File,
                                           const WasmSignature *Sig) {
   LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name <<
              " [" << (Sig ? toString(*Sig) : "none") << "]\n");
@@ -325,7 +326,8 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef Name, uint32_t Flags,
   std::tie(S, WasInserted) = insert(Name, File);
 
   if (WasInserted)
-    replaceSymbol<UndefinedFunction>(S, Name, Flags, File, Sig);
+    replaceSymbol<UndefinedFunction>(S, Name, ImportName, ImportModule, Flags,
+                                     File, Sig);
   else if (auto *Lazy = dyn_cast<LazySymbol>(S))
     Lazy->fetch();
   else
@@ -351,7 +353,8 @@ Symbol *SymbolTable::addUndefinedData(StringRef Name, uint32_t Flags,
   return S;
 }
 
-Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, uint32_t Flags,
+Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, StringRef ImportName,
+                                        StringRef ImportModule, uint32_t Flags,
                                         InputFile *File,
                                         const WasmGlobalType *Type) {
   LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << Name << "\n");
@@ -361,7 +364,8 @@ Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, uint32_t Flags,
   std::tie(S, WasInserted) = insert(Name, File);
 
   if (WasInserted)
-    replaceSymbol<UndefinedGlobal>(S, Name, Flags, File, Type);
+    replaceSymbol<UndefinedGlobal>(S, Name, ImportName, ImportModule, Flags,
+                                   File, Type);
   else if (auto *Lazy = dyn_cast<LazySymbol>(S))
     Lazy->fetch();
   else if (S->isDefined())
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index 5e38e30692abe8..64678aee50055c 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -59,11 +59,13 @@ class SymbolTable {
   Symbol *addDefinedEvent(StringRef Name, uint32_t Flags, InputFile *File,
                           InputEvent *E);
 
-  Symbol *addUndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File,
-                               const WasmSignature *Signature);
+  Symbol *addUndefinedFunction(StringRef Name, StringRef ImportName,
+                               StringRef ImportModule, uint32_t Flags,
+                               InputFile *File, const WasmSignature *Signature);
   Symbol *addUndefinedData(StringRef Name, uint32_t Flags, InputFile *File);
-  Symbol *addUndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File,
-                             const WasmGlobalType *Type);
+  Symbol *addUndefinedGlobal(StringRef Name, StringRef ImportName,
+                             StringRef ImportModule,  uint32_t Flags,
+                             InputFile *File, const WasmGlobalType *Type);
 
   void addLazy(ArchiveFile *F, const llvm::object::Archive::Symbol *Sym);
 
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index 11ee66550cdccb..a065338ac1e42c 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -149,13 +149,19 @@ class DefinedFunction : public FunctionSymbol {
 
 class UndefinedFunction : public FunctionSymbol {
 public:
-  UndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File = nullptr,
+  UndefinedFunction(StringRef Name, StringRef ImportName,
+                    StringRef ImportModule, uint32_t Flags,
+                    InputFile *File = nullptr,
                     const WasmSignature *Type = nullptr)
-      : FunctionSymbol(Name, UndefinedFunctionKind, Flags, File, Type) {}
+      : FunctionSymbol(Name, UndefinedFunctionKind, Flags, File, Type),
+        ImportName(ImportName), ImportModule(ImportModule) {}
 
   static bool classof(const Symbol *S) {
     return S->kind() == UndefinedFunctionKind;
   }
+
+  StringRef ImportName;
+  StringRef ImportModule;
 };
 
 class SectionSymbol : public Symbol {
@@ -261,13 +267,18 @@ class DefinedGlobal : public GlobalSymbol {
 
 class UndefinedGlobal : public GlobalSymbol {
 public:
-  UndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File = nullptr,
+  UndefinedGlobal(StringRef Name, StringRef ImportName, StringRef ImportModule,
+                  uint32_t Flags, InputFile *File = nullptr,
                   const WasmGlobalType *Type = nullptr)
-      : GlobalSymbol(Name, UndefinedGlobalKind, Flags, File, Type) {}
+      : GlobalSymbol(Name, UndefinedGlobalKind, Flags, File, Type),
+        ImportName(ImportName), ImportModule(ImportModule) {}
 
   static bool classof(const Symbol *S) {
     return S->kind() == UndefinedGlobalKind;
   }
+
+  StringRef ImportName;
+  StringRef ImportModule;
 };
 
 // Wasm events are features that suspend the current execution and transfer the
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 819d4298fef290..902ca61ca19b71 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -39,8 +39,9 @@ using namespace llvm::wasm;
 using namespace lld;
 using namespace lld::wasm;
 
-static constexpr int kStackAlignment = 16;
-static constexpr const char *kFunctionTableName = "__indirect_function_table";
+static constexpr int StackAlignment = 16;
+static constexpr const char *FunctionTableName = "__indirect_function_table";
+const char *lld::wasm::DefaultModule = "env";
 
 namespace {
 
@@ -155,7 +156,7 @@ void Writer::createImportSection() {
 
   if (Config->ImportMemory) {
     WasmImport Import;
-    Import.Module = "env";
+    Import.Module = DefaultModule;
     Import.Field = "memory";
     Import.Kind = WASM_EXTERNAL_MEMORY;
     Import.Memory.Flags = 0;
@@ -172,8 +173,8 @@ void Writer::createImportSection() {
   if (Config->ImportTable) {
     uint32_t TableSize = TableBase + IndirectFunctions.size();
     WasmImport Import;
-    Import.Module = "env";
-    Import.Field = kFunctionTableName;
+    Import.Module = DefaultModule;
+    Import.Field = FunctionTableName;
     Import.Kind = WASM_EXTERNAL_TABLE;
     Import.Table.ElemType = WASM_TYPE_FUNCREF;
     Import.Table.Limits = {0, TableSize, 0};
@@ -182,8 +183,17 @@ void Writer::createImportSection() {
 
   for (const Symbol *Sym : ImportedSymbols) {
     WasmImport Import;
-    Import.Module = "env";
-    Import.Field = Sym->getName();
+    if (auto *F = dyn_cast<UndefinedFunction>(Sym)) {
+      Import.Field = F->ImportName;
+      Import.Module = F->ImportModule;
+    } else if (auto *G = dyn_cast<UndefinedGlobal>(Sym)) {
+      Import.Field = G->ImportName;
+      Import.Module = G->ImportModule;
+    } else {
+      Import.Field = Sym->getName();
+      Import.Module = DefaultModule;
+    }
+
     if (auto *FunctionSym = dyn_cast<FunctionSymbol>(Sym)) {
       Import.Kind = WASM_EXTERNAL_FUNCTION;
       Import.SigIndex = lookupType(*FunctionSym->Signature);
@@ -441,6 +451,13 @@ static uint32_t getWasmFlags(const Symbol *Sym) {
     Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN;
   if (Sym->isUndefined())
     Flags |= WASM_SYMBOL_UNDEFINED;
+  if (auto *F = dyn_cast<UndefinedFunction>(Sym)) {
+    if (F->getName() != F->ImportName)
+      Flags |= WASM_SYMBOL_EXPLICIT_NAME;
+  } else if (auto *G = dyn_cast<UndefinedGlobal>(Sym)) {
+    if (G->getName() != G->ImportName)
+      Flags |= WASM_SYMBOL_EXPLICIT_NAME;
+  }
   return Flags;
 }
 
@@ -506,15 +523,18 @@ void Writer::createLinkingSection() {
 
       if (auto *F = dyn_cast<FunctionSymbol>(Sym)) {
         writeUleb128(Sub.OS, F->getFunctionIndex(), "index");
-        if (Sym->isDefined())
+        if (Sym->isDefined() ||
+            (Flags & WASM_SYMBOL_EXPLICIT_NAME) != 0)
           writeStr(Sub.OS, Sym->getName(), "sym name");
       } else if (auto *G = dyn_cast<GlobalSymbol>(Sym)) {
         writeUleb128(Sub.OS, G->getGlobalIndex(), "index");
-        if (Sym->isDefined())
+        if (Sym->isDefined() ||
+            (Flags & WASM_SYMBOL_EXPLICIT_NAME) != 0)
           writeStr(Sub.OS, Sym->getName(), "sym name");
       } else if (auto *E = dyn_cast<EventSymbol>(Sym)) {
         writeUleb128(Sub.OS, E->getEventIndex(), "index");
-        if (Sym->isDefined())
+        if (Sym->isDefined() ||
+            (Flags & WASM_SYMBOL_EXPLICIT_NAME) != 0)
           writeStr(Sub.OS, Sym->getName(), "sym name");
       } else if (isa<DataSymbol>(Sym)) {
         writeStr(Sub.OS, Sym->getName(), "sym name");
@@ -663,9 +683,9 @@ void Writer::layoutMemory() {
   auto PlaceStack = [&]() {
     if (Config->Relocatable || Config->Shared)
       return;
-    MemoryPtr = alignTo(MemoryPtr, kStackAlignment);
-    if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment))
-      error("stack size must be " + Twine(kStackAlignment) + "-byte aligned");
+    MemoryPtr = alignTo(MemoryPtr, StackAlignment);
+    if (Config->ZStackSize != alignTo(Config->ZStackSize, StackAlignment))
+      error("stack size must be " + Twine(StackAlignment) + "-byte aligned");
     log("mem: stack size  = " + Twine(Config->ZStackSize));
     log("mem: stack base  = " + Twine(MemoryPtr));
     MemoryPtr += Config->ZStackSize;
@@ -814,7 +834,7 @@ void Writer::calculateExports() {
     Exports.push_back(WasmExport{"memory", WASM_EXTERNAL_MEMORY, 0});
 
   if (!Config->Relocatable && Config->ExportTable)
-    Exports.push_back(WasmExport{kFunctionTableName, WASM_EXTERNAL_TABLE, 0});
+    Exports.push_back(WasmExport{FunctionTableName, WASM_EXTERNAL_TABLE, 0});
 
   unsigned FakeGlobalIndex = NumImportedGlobals + InputGlobals.size();
 
@@ -858,40 +878,42 @@ void Writer::assignSymtab() {
   StringMap<uint32_t> SectionSymbolIndices;
 
   unsigned SymbolIndex = SymtabEntries.size();
-  for (ObjFile *File : Symtab->ObjectFiles) {
-    LLVM_DEBUG(dbgs() << "Symtab entries: " << File->getName() << "\n");
-    for (Symbol *Sym : File->getSymbols()) {
-      if (Sym->getFile() != File)
-        continue;
-
-      if (auto *S = dyn_cast<SectionSymbol>(Sym)) {
-        StringRef Name = S->getName();
-        if (CustomSectionMapping.count(Name) == 0)
-          continue;
-
-        auto SSI = SectionSymbolIndices.find(Name);
-        if (SSI != SectionSymbolIndices.end()) {
-          Sym->setOutputSymbolIndex(SSI->second);
-          continue;
-        }
 
-        SectionSymbolIndices[Name] = SymbolIndex;
-        CustomSectionSymbols[Name] = cast<SectionSymbol>(Sym);
+  auto AddSymbol = [&](Symbol *Sym) {
+    if (auto *S = dyn_cast<SectionSymbol>(Sym)) {
+      StringRef Name = S->getName();
+      if (CustomSectionMapping.count(Name) == 0)
+        return;
 
-        Sym->markLive();
+      auto SSI = SectionSymbolIndices.find(Name);
+      if (SSI != SectionSymbolIndices.end()) {
+        Sym->setOutputSymbolIndex(SSI->second);
+        return;
       }
 
-      // (Since this is relocatable output, GC is not performed so symbols must
-      // be live.)
-      assert(Sym->isLive());
-      Sym->setOutputSymbolIndex(SymbolIndex++);
-      SymtabEntries.emplace_back(Sym);
+      SectionSymbolIndices[Name] = SymbolIndex;
+      CustomSectionSymbols[Name] = cast<SectionSymbol>(Sym);
+
+      Sym->markLive();
     }
-  }
 
-  // For the moment, relocatable output doesn't contain any synthetic functions,
-  // so no need to look through the Symtab for symbols not referenced by
-  // Symtab->ObjectFiles.
+    // (Since this is relocatable output, GC is not performed so symbols must
+    // be live.)
+    assert(Sym->isLive());
+    Sym->setOutputSymbolIndex(SymbolIndex++);
+    SymtabEntries.emplace_back(Sym);
+  };
+
+  for (Symbol *Sym : Symtab->getSymbols())
+    if (!Sym->isLazy())
+      AddSymbol(Sym);
+
+  for (ObjFile *File : Symtab->ObjectFiles) {
+    LLVM_DEBUG(dbgs() << "Local symtab entries: " << File->getName() << "\n");
+    for (Symbol *Sym : File->getSymbols())
+      if (Sym->isLocal())
+        AddSymbol(Sym);
+  }
 }
 
 uint32_t Writer::lookupType(const WasmSignature &Sig) {
diff --git a/lld/wasm/Writer.h b/lld/wasm/Writer.h
index a931ba9c29a890..e62f470642285d 100644
--- a/lld/wasm/Writer.h
+++ b/lld/wasm/Writer.h
@@ -15,6 +15,8 @@ namespace wasm {
 
 void writeResult();
 
+extern const char *DefaultModule;
+
 } // namespace wasm
 } // namespace lld
 
diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake
index f7cac3dad83758..f82c11d3d317e1 100644
--- a/lldb/cmake/modules/AddLLDB.cmake
+++ b/lldb/cmake/modules/AddLLDB.cmake
@@ -88,7 +88,9 @@ function(add_lldb_library name)
   # Hack: only some LLDB libraries depend on the clang autogenerated headers,
   # but it is simple enough to make all of LLDB depend on some of those
   # headers without negatively impacting much of anything.
-  add_dependencies(${name} clang-tablegen-targets)
+  if(NOT LLDB_BUILT_STANDALONE)
+    add_dependencies(${name} clang-tablegen-targets)
+  endif()
 
   # Add in any extra C++ compilation flags for this library.
   target_compile_options(${name} PRIVATE ${PARAM_EXTRA_CXXFLAGS})
diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index e63b2694e6a4c5..a9059dd5f9eb41 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -58,7 +58,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   set(LLVM_DIR ${LLVM_OBJ_ROOT}/cmake/modules/CMakeFiles CACHE PATH "Path to LLVM build tree CMake files")
   set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree")
   set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
-  set(LLVM_EXTERNAL_LIT ${LLVM_TOOLS_BINARY_DIR}/llvm-lit CACHE PATH "Path to llvm-lit")
+  set(LLVM_DEFAULT_EXTERNAL_LIT ${LLVM_TOOLS_BINARY_DIR}/llvm-lit CACHE PATH "Path to llvm-lit")
 
   find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR}
     NO_DEFAULT_PATH)
diff --git a/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h b/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h
index 5a0388ffdd9770..809a6f4461f51c 100644
--- a/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h
+++ b/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h
@@ -12,12 +12,13 @@
 
 #include "lldb/Host/posix/HostInfoPosix.h"
 #include "lldb/Utility/FileSpec.h"
+#include "llvm/Support/VersionTuple.h"
 
 namespace lldb_private {
 
 class HostInfoOpenBSD : public HostInfoPosix {
 public:
-  static bool GetOSVersion(uint32_t &major, uint32_t &minor, uint32_t &update);
+  static llvm::VersionTuple GetOSVersion();
   static bool GetOSBuildString(std::string &s);
   static bool GetOSKernelDescription(std::string &s);
   static FileSpec GetProgramFileSpec();
diff --git a/lldb/lit/CMakeLists.txt b/lldb/lit/CMakeLists.txt
index 1ac013b2eed304..804e950b3bb033 100644
--- a/lldb/lit/CMakeLists.txt
+++ b/lldb/lit/CMakeLists.txt
@@ -26,9 +26,6 @@ list(APPEND LLDB_TEST_DEPS
   llvm-config
   llvm-mc
   llvm-objcopy
-  FileCheck
-  count
-  not
   )
 
 if(TARGET lld)
@@ -55,6 +52,14 @@ configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/Suite/lit.site.cfg.in
   ${CMAKE_CURRENT_BINARY_DIR}/Suite/lit.site.cfg)
 
+if(NOT LLDB_BUILT_STANDALONE)
+  list(APPEND LLDB_TEST_DEPS
+    FileCheck
+    count
+    not
+  )
+endif()
+
 add_lit_testsuite(check-lldb-lit "Running lldb lit test suite"
   ${CMAKE_CURRENT_BINARY_DIR}
   DEPENDS ${LLDB_TEST_DEPS}
diff --git a/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp b/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp
index 548958899322c5..cf7acb79da00ed 100644
--- a/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp
+++ b/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp
@@ -17,16 +17,17 @@
 
 using namespace lldb_private;
 
-bool HostInfoOpenBSD::GetOSVersion(uint32_t &major, uint32_t &minor,
-                                   uint32_t &update) {
+llvm::VersionTuple HostInfoOpenBSD::GetOSVersion() {
   struct utsname un;
 
   ::memset(&un, 0, sizeof(utsname));
   if (uname(&un) < 0)
-    return false;
+    return llvm::VersionTuple();
 
-  int status = sscanf(un.release, "%u.%u", &major, &minor);
-  return status == 2;
+  unsigned major, minor;
+  if (2 == sscanf(un.release, "%u.%u", &major, &minor))
+    return llvm::VersionTuple(major, minor);
+  return llvm::VersionTuple();
 }
 
 bool HostInfoOpenBSD::GetOSBuildString(std::string &s) {
diff --git a/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt b/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt
index e131e6d70468bf..586725bb7a566d 100644
--- a/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt
+++ b/lldb/source/Plugins/Process/NetBSD/CMakeLists.txt
@@ -11,6 +11,7 @@ add_lldb_library(lldbPluginProcessNetBSD PLUGIN
     lldbUtility
     lldbPluginProcessPOSIX
     lldbPluginProcessUtility
+    util
   LINK_COMPONENTS
     Support
   )
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 6e5221ebfd339b..27754f339493bd 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -383,9 +383,12 @@ option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF)
 set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING
   "Enable abi-breaking checks.  Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.")
 
-option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN
+option(LLVM_FORCE_USE_OLD_TOOLCHAIN
        "Set to ON to force using an old, unsupported host toolchain." OFF)
 
+option(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN
+       "Set to ON to only warn when using a toolchain which is about to be deprecated, instead of emitting an error." OFF)
+
 option(LLVM_USE_INTEL_JITEVENTS
   "Use Intel JIT API to inform Intel(R) VTune(TM) Amplifier XE 2011 about JIT code"
   OFF)
diff --git a/llvm/bindings/go/llvm/ir_test.go b/llvm/bindings/go/llvm/ir_test.go
index 10f4968ba89f2c..4d559c33671580 100644
--- a/llvm/bindings/go/llvm/ir_test.go
+++ b/llvm/bindings/go/llvm/ir_test.go
@@ -31,7 +31,7 @@ func testAttribute(t *testing.T, name string) {
 	fn.AddFunctionAttr(attr)
 	newattr := fn.GetEnumFunctionAttribute(kind)
 	if attr != newattr {
-		t.Errorf("got attribute mask %d, want %d", newattr, attr)
+		t.Errorf("got attribute %p, want %p", newattr.C, attr.C)
 	}
 
 	text := mod.String()
@@ -42,7 +42,7 @@ func testAttribute(t *testing.T, name string) {
 	fn.RemoveEnumFunctionAttribute(kind)
 	newattr = fn.GetEnumFunctionAttribute(kind)
 	if !newattr.IsNil() {
-		t.Errorf("got attribute mask %d, want 0", newattr)
+		t.Errorf("got attribute %p, want 0", newattr.C)
 	}
 }
 
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 4dbc0ddaf4f018..0df6845aaa71d3 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1280,7 +1280,6 @@ function(get_llvm_lit_path base_dir file_name)
   cmake_parse_arguments(ARG "ALLOW_EXTERNAL" "" "" ${ARGN})
 
   if (ARG_ALLOW_EXTERNAL)
-    set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_EXTERNAL_LIT}")
     set (LLVM_EXTERNAL_LIT "" CACHE STRING "Command used to spawn lit")
     if ("${LLVM_EXTERNAL_LIT}" STREQUAL "")
       set(LLVM_EXTERNAL_LIT "${LLVM_DEFAULT_EXTERNAL_LIT}")
diff --git a/llvm/cmake/modules/CheckCompilerVersion.cmake b/llvm/cmake/modules/CheckCompilerVersion.cmake
index adf500ad53a722..b1cb5527422896 100644
--- a/llvm/cmake/modules/CheckCompilerVersion.cmake
+++ b/llvm/cmake/modules/CheckCompilerVersion.cmake
@@ -1,52 +1,94 @@
-# Check if the host compiler is new enough. LLVM requires at least GCC 4.8,
-# MSVC 2015 (Update 3), or Clang 3.1.
+# Check if the host compiler is new enough.
+# These versions are updated based on the following policy:
+#   llvm.org/docs/DeveloperPolicy.html#toolchain
 
 include(CheckCXXSourceCompiles)
 
-if(NOT DEFINED LLVM_COMPILER_CHECKED)
-  set(LLVM_COMPILER_CHECKED ON)
+set(GCC_MIN 4.8)
+set(GCC_SOFT_ERROR 5.1)
+set(CLANG_MIN 3.1)
+set(CLANG_SOFT_ERROR 3.5)
+set(APPLECLANG_MIN 3.1)
+set(APPLECLANG_SOFT_ERROR 6.0)
+set(MSVC_MIN 19.00.24213.1)
+set(MSVC_SOFT_ERROR 19.1)
 
-  if(NOT LLVM_FORCE_USE_OLD_TOOLCHAIN)
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-      if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
-        message(FATAL_ERROR "Host GCC version must be at least 4.8!")
-      endif()
-    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-      if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.1)
-        message(FATAL_ERROR "Host Clang version must be at least 3.1!")
-      endif()
+# Map the above GCC versions to dates: https://gcc.gnu.org/develop.html#timeline
+set(GCC_MIN_DATE 20130322)
+set(GCC_SOFT_ERROR_DATE 20150422)
 
-      if (CMAKE_CXX_SIMULATE_ID MATCHES "MSVC")
-        if (CMAKE_CXX_SIMULATE_VERSION VERSION_LESS 19.0)
-          message(FATAL_ERROR "Host Clang must have at least -fms-compatibility-version=19.0")
-        endif()
-        set(CLANG_CL 1)
-      elseif(NOT LLVM_ENABLE_LIBCXX)
-        # Otherwise, test that we aren't using too old of a version of libstdc++
-        # with the Clang compiler. This is tricky as there is no real way to
-        # check the version of libstdc++ directly. Instead we test for a known
-        # bug in libstdc++4.6 that is fixed in libstdc++4.7.
-        set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
-        set(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
-        set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++0x")
-        check_cxx_source_compiles("
-#include <atomic>
-std::atomic<float> x(0.0f);
-int main() { return (float)x; }"
-          LLVM_NO_OLD_LIBSTDCXX)
-        if(NOT LLVM_NO_OLD_LIBSTDCXX)
-          message(FATAL_ERROR "Host Clang must be able to find libstdc++4.8 or newer!")
-        endif()
-        set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
-        set(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQUIRED_LIBRARIES})
-      endif()
-    elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-      if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0)
-        message(FATAL_ERROR "Host Visual Studio must be at least 2015")
-      elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.00.24213.1)
-        message(WARNING "Host Visual Studio should at least be 2015 Update 3 (MSVC 19.00.24213.1)"
-          "  due to miscompiles from earlier versions")
+
+if(DEFINED LLVM_COMPILER_CHECKED)
+  return()
+endif()
+set(LLVM_COMPILER_CHECKED ON)
+
+if(LLVM_FORCE_USE_OLD_TOOLCHAIN)
+  return()
+endif()
+
+function(check_compiler_version NAME NICE_NAME MINIMUM_VERSION SOFT_ERROR_VERSION)
+  if(NOT CMAKE_CXX_COMPILER_ID STREQUAL NAME)
+    return()
+  endif()
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS MINIMUM_VERSION)
+    message(FATAL_ERROR "Host ${NICE_NAME} version must be at least ${MINIMUM_VERSION}, your version is ${CMAKE_CXX_COMPILER_VERSION}.")
+  elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS SOFT_ERROR_VERSION)
+    if(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN)
+      message(WARNING "Host ${NICE_NAME} version should be at least ${SOFT_ERROR_VERSION} because LLVM will soon use new C++ features which your toolchain version doesn't support. Your version is ${CMAKE_CXX_COMPILER_VERSION}. Ignoring because you've set LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.")
+    else()
+      message(FATAL_ERROR "Host ${NICE_NAME} version should be at least ${SOFT_ERROR_VERSION} because LLVM will soon use new C++ features which your toolchain version doesn't support. Your version is ${CMAKE_CXX_COMPILER_VERSION}. You can temporarily opt out using LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.")
+    endif()
+  endif()
+endfunction(check_compiler_version)
+
+check_compiler_version("GNU" "GCC" ${GCC_MIN} ${GCC_SOFT_ERROR})
+check_compiler_version("Clang" "Clang" ${CLANG_MIN} ${CLANG_SOFT_ERROR})
+check_compiler_version("AppleClang" "Apple Clang" ${APPLECLANG_MIN} ${APPLECLANG_SOFT_ERROR})
+check_compiler_version("MSVC" "Visual Studio" ${MSVC_MIN} ${MSVC_SOFT_ERROR})
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  if (CMAKE_CXX_SIMULATE_ID MATCHES "MSVC")
+    if (CMAKE_CXX_SIMULATE_VERSION VERSION_LESS MSVC_MIN)
+      message(FATAL_ERROR "Host Clang must have at least -fms-compatibility-version=${MSVC_MIN}, your version is ${CMAKE_CXX_COMPILER_VERSION}.")
+    endif()
+    set(CLANG_CL 1)
+  elseif(NOT LLVM_ENABLE_LIBCXX)
+    # Test that we aren't using too old of a version of libstdc++.
+    set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+    set(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
+    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++0x")
+    check_cxx_source_compiles("
+#include <iosfwd>
+#if defined(__GLIBCXX__)
+#if __GLIBCXX__ < ${GCC_MIN_DATE}
+#error Unsupported libstdc++ version
+#endif
+#endif
+int main() { return 0; }
+"
+      LLVM_LIBSTDCXX_MIN)
+    if(NOT LLVM_LIBSTDCXX_MIN)
+      message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.")
+    endif()
+    check_cxx_source_compiles("
+#include <iosfwd>
+#if defined(__GLIBCXX__)
+#if __GLIBCXX__ < ${GCC_SOFT_ERROR_DATE}
+#error Unsupported libstdc++ version
+#endif
+#endif
+int main() { return 0; }
+"
+      LLVM_LIBSTDCXX_SOFT_ERROR)
+    if(NOT LLVM_LIBSTDCXX_SOFT_ERROR)
+      if(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN)
+        message(WARNING "libstdc++ version should be at least ${GCC_SOFT_ERROR} because LLVM will soon use new C++ features which your toolchain version doesn't support. Ignoring because you've set LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.")
+      else()
+        message(FATAL_ERROR "libstdc++ version should be at least ${GCC_SOFT_ERROR} because LLVM will soon use new C++ features which your toolchain version doesn't support. You can temporarily opt out using LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN, but very soon your toolchain won't be supported.")
       endif()
     endif()
+    set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
+    set(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQUIRED_LIBRARIES})
   endif()
 endif()
diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake
index b239816c825392..bc3b210f01859d 100644
--- a/llvm/cmake/modules/CrossCompile.cmake
+++ b/llvm/cmake/modules/CrossCompile.cmake
@@ -52,6 +52,7 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype)
         -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="${experimental_targets_to_build_arg}"
         -DLLVM_DEFAULT_TARGET_TRIPLE="${TARGET_TRIPLE}"
         -DLLVM_TARGET_ARCH="${LLVM_TARGET_ARCH}"
+        -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN="${LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN}"
         ${build_type_flags} ${linker_flag} ${external_clang_dir}
     WORKING_DIRECTORY ${LLVM_${target_name}_BUILD}
     DEPENDS CREATE_LLVM_${target_name}
diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index a5a574e5707777..eb219c58560b62 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -573,6 +573,15 @@ LLVM-specific variables
   options, which are passed to the CCACHE_MAXSIZE and CCACHE_DIR environment
   variables, respectively.
 
+**LLVM_FORCE_USE_OLD_TOOLCHAIN**:BOOL
+  If enabled, the compiler and standard library versions won't be checked. LLVM
+  may not compile at all, or might fail at runtime due to known bugs in these
+  toolchains.
+
+**LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN**:BOOL
+  If enabled, the compiler version check will only warn when using a toolchain
+  which is about to be deprecated, instead of emitting an error.
+
 CMake Caches
 ============
 
diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst
index 09912940cde2d2..4ed67cbf1ef1de 100644
--- a/llvm/docs/DeveloperPolicy.rst
+++ b/llvm/docs/DeveloperPolicy.rst
@@ -22,7 +22,7 @@ This policy is also designed to accomplish the following objectives:
 
 #. Make life as simple and easy for contributors as possible.
 
-#. Keep the tip of tree as stable as possible.
+#. Keep the top of tree as stable as possible.
 
 #. Establish awareness of the project's :ref:`copyright, license, and patent
    policies <copyright-license-patents>` with contributors to the project.
@@ -638,6 +638,47 @@ In essences, these rules are necessary for targets to gain and retain their
 status, but also markers to define bit-rot, and will be used to clean up the
 tree from unmaintained targets.
 
+.. _toolchain:
+
+Updating Toolchain Requirements
+-------------------------------
+
+We intend to require newer toolchains as time goes by. This means LLVM's
+codebase can use newer versions of C++ as they get standardized. Requiring newer
+toolchains to build LLVM can be painful for those building LLVM; therefore, it
+will only be done through the following process:
+
+  * Generally, try to support LLVM and GCC versions from the last 3 years at a
+    minimum. This time-based guideline is not strict: we may support much older
+    compilers, or decide to support fewer versions.
+
+  * An RFC is sent to the `llvm-dev mailing list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_
+
+    - Detail upsides of the version increase (e.g. which newer C++ language or
+      library features LLVM should use; avoid miscompiles in particular compiler
+      versions, etc).
+    - Detail downsides on important platforms (e.g. Ubuntu LTS status).
+
+  * Once the RFC reaches consensus, update the CMake toolchain version checks as
+    well as the :doc:`getting started<GettingStarted>` guide. We want to
+    soft-error when developers compile LLVM. We say "soft-error" because the
+    error can be turned into a warning using a CMake flag. This is an important
+    step: LLVM still doesn't have code which requires the new toolchains, but it
+    soon will. If you compile LLVM but don't read the mailing list, we should
+    tell you!
+
+  * Ensure that at least one LLVM release has had this soft-error. Not all
+    developers compile LLVM top-of-tree. These release-bound developers should
+    also be told about upcoming changes.
+
+  * Turn the soft-error into a hard-error after said LLVM release has branched.
+
+  * Update the :doc:`coding standards<CodingStandards>` to allow the new
+    features we've explicitly approved in the RFC.
+
+  * Start using the new features in LLVM's codebase.
+
+
 .. _copyright-license-patents:
 
 Copyright, License, and Patents
diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst
index b714cc6601a046..c22e821e3b7a05 100644
--- a/llvm/docs/GettingStarted.rst
+++ b/llvm/docs/GettingStarted.rst
@@ -170,7 +170,7 @@ uses the package and provides other details.
 Package                                                     Version      Notes
 =========================================================== ============ ==========================================
 `GNU Make <http://savannah.gnu.org/projects/make>`_         3.79, 3.79.1 Makefile/build processor
-`GCC <http://gcc.gnu.org/>`_                                >=4.8.0      C/C++ compiler\ :sup:`1`
+`GCC <http://gcc.gnu.org/>`_                                >=5.1.0      C/C++ compiler\ :sup:`1`
 `python <http://www.python.org/>`_                          >=2.7        Automated test suite\ :sup:`2`
 `zlib <http://zlib.net>`_                                   >=1.2.3.4    Compression library\ :sup:`3`
 =========================================================== ============ ==========================================
@@ -220,15 +220,25 @@ Host C++ Toolchain, both Compiler and Standard Library
 ------------------------------------------------------
 
 LLVM is very demanding of the host C++ compiler, and as such tends to expose
-bugs in the compiler. We are also planning to follow improvements and
-developments in the C++ language and library reasonably closely. As such, we
-require a modern host C++ toolchain, both compiler and standard library, in
-order to build LLVM.
+bugs in the compiler. We also attempt to follow improvements and developments in
+the C++ language and library reasonably closely. As such, we require a modern
+host C++ toolchain, both compiler and standard library, in order to build LLVM.
 
-For the most popular host toolchains we check for specific minimum versions in
-our build systems:
+LLVM is written using the subset of C++ documented in :doc:`coding
+standards<CodingStandards>`. To enforce this language version, we check the most
+popular host toolchains for specific minimum versions in our build systems:
+
+* Clang 3.5
+* Apple Clang 6.0
+* GCC 5.1
+* Visual Studio 2017
+
+The below versions currently soft-error as we transition to the new compiler
+versions listed above. The LLVM codebase is currently known to compile correctly
+with the following compilers, though this will change in the near future:
 
 * Clang 3.1
+* Apple Clang 3.1
 * GCC 4.8
 * Visual Studio 2015 (Update 3)
 
@@ -282,33 +292,36 @@ The first step is to get a recent GCC toolchain installed. The most common
 distribution on which users have struggled with the version requirements is
 Ubuntu Precise, 12.04 LTS. For this distribution, one easy option is to install
 the `toolchain testing PPA`_ and use it to install a modern GCC. There is
-a really nice discussions of this on the `ask ubuntu stack exchange`_. However,
-not all users can use PPAs and there are many other distributions, so it may be
-necessary (or just useful, if you're here you *are* doing compiler development
-after all) to build and install GCC from source. It is also quite easy to do
-these days.
+a really nice discussions of this on the `ask ubuntu stack exchange`_ and a
+`github gist`_ with updated commands. However, not all users can use PPAs and
+there are many other distributions, so it may be necessary (or just useful, if
+you're here you *are* doing compiler development after all) to build and install
+GCC from source. It is also quite easy to do these days.
 
 .. _toolchain testing PPA:
   https://launchpad.net/~ubuntu-toolchain-r/+archive/test
 .. _ask ubuntu stack exchange:
-  http://askubuntu.com/questions/271388/how-to-install-gcc-4-8-in-ubuntu-12-04-from-the-terminal
+  https://askubuntu.com/questions/466651/how-do-i-use-the-latest-gcc-on-ubuntu/581497#58149
+.. _github gist:
+  https://gist.github.com/application2000/73fd6f4bf1be6600a2cf9f56315a2d91
 
-Easy steps for installing GCC 4.8.2:
+Easy steps for installing GCC 5.1.0:
 
 .. code-block:: console
 
-  % wget https://ftp.gnu.org/gnu/gcc/gcc-4.8.2/gcc-4.8.2.tar.bz2
-  % wget https://ftp.gnu.org/gnu/gcc/gcc-4.8.2/gcc-4.8.2.tar.bz2.sig
+  % gcc_version=5.1.0
+  % wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2
+  % wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2.sig
   % wget https://ftp.gnu.org/gnu/gnu-keyring.gpg
-  % signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-4.8.2.tar.bz2.sig`
+  % signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-${gcc_version}.tar.bz2.sig`
   % if [ $signature_invalid ]; then echo "Invalid signature" ; exit 1 ; fi
-  % tar -xvjf gcc-4.8.2.tar.bz2
-  % cd gcc-4.8.2
+  % tar -xvjf gcc-${gcc_version}.tar.bz2
+  % cd gcc-${gcc_version}
   % ./contrib/download_prerequisites
   % cd ..
-  % mkdir gcc-4.8.2-build
-  % cd gcc-4.8.2-build
-  % $PWD/../gcc-4.8.2/configure --prefix=$HOME/toolchains --enable-languages=c,c++
+  % mkdir gcc-${gcc_version}-build
+  % cd gcc-${gcc_version}-build
+  % $PWD/../gcc-${gcc_version}/configure --prefix=$HOME/toolchains --enable-languages=c,c++
   % make -j$(nproc)
   % make install
 
@@ -316,7 +329,7 @@ For more details, check out the excellent `GCC wiki entry`_, where I got most
 of this information from.
 
 .. _GCC wiki entry:
-  http://gcc.gnu.org/wiki/InstallingGCC
+  https://gcc.gnu.org/wiki/InstallingGCC
 
 Once you have a GCC toolchain, configure your build of LLVM to use the new
 toolchain for your host compiler and C++ standard library. Because the new
@@ -336,7 +349,7 @@ If you fail to set rpath, most LLVM binaries will fail on startup with a message
 from the loader similar to ``libstdc++.so.6: version `GLIBCXX_3.4.20' not
 found``. This means you need to tweak the -rpath linker flag.
 
-When you build Clang, you will need to give *it* access to modern C++11
+When you build Clang, you will need to give *it* access to modern C++
 standard library in order to use it as your new host in part of a bootstrap.
 There are two easy ways to do this, either build (and install) libc++ along
 with Clang and then use it with the ``-stdlib=libc++`` compile and link flag,
diff --git a/llvm/docs/LibFuzzer.rst b/llvm/docs/LibFuzzer.rst
index 0737fbbcd93050..37b0833599a945 100644
--- a/llvm/docs/LibFuzzer.rst
+++ b/llvm/docs/LibFuzzer.rst
@@ -645,10 +645,20 @@ coverage set of the process (since the fuzzer is in-process). In other words, by
 using more external dependencies we will slow down the fuzzer while the main
 reason for it to exist is extreme speed.
 
-Q. What about Windows then? The fuzzer contains code that does not build on Windows.
+Q. Does libFuzzer Support Windows?
 ------------------------------------------------------------------------------------
 
-Volunteers are welcome.
+Yes, libFuzzer now supports Windows. Initial support was added in r341082.
+You can download a build of Clang for Windows
+that has libFuzzer from
+`LLVM Snapshot Builds <https://llvm.org/builds/>`_.
+
+Using libFuzzer on Windows without ASAN is unsupported. Building fuzzers with the
+``/MD`` (dynamic runtime library) compile option is unsupported. Support for these
+may be added in the future. Linking fuzzers with the ``/INCREMENTAL`` link option
+(or the ``/DEBUG`` option which implies it) is also unsupported.
+
+Send any questions or comments to the mailing list: libfuzzer(#)googlegroups.com
 
 Q. When libFuzzer is not a good solution for a problem?
 ---------------------------------------------------------
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index db9cf51949520a..9e2ee95c651944 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -40,6 +40,22 @@ Non-comprehensive list of changes in this release
    functionality, or simply have a lot to talk about), see the `NOTE` below
    for adding a new subsection.
 
+* As `discussed on the mailing list
+  <https://lists.llvm.org/pipermail/llvm-dev/2019-January/129452.html>`_,
+  building LLVM will soon require more recent toolchains as follows:
+
+  ============= ====
+  Clang         3.5
+  Apple Clang   6.0
+  GCC           5.1
+  Visual Studio 2017
+  ============= ====
+
+  A new CMake check when configuring LLVM provides a soft-error if your
+  toolchain will become unsupported soon. You can opt out of the soft-error by
+  setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to
+  ``ON``.
+
 * The **llvm-cov** tool can now export lcov trace files using the
   `-format=lcov` option of the `export` command.
 
@@ -48,6 +64,12 @@ Non-comprehensive list of changes in this release
   functionality.  See `Writing an LLVM Pass
   <WritingAnLLVMPass.html#setting-up-the-build-environment>`_.
 
+* For MinGW, references to data variables that might need to be imported
+  from a dll are accessed via a stub, to allow the linker to convert it to
+  a dllimport if needed.
+
+* Added support for labels as offsets in ``.reloc`` directive.
+
 .. NOTE
    If you would like to document a larger change, then you can add a
    subsection about it right here. You can copy the following boilerplate
@@ -62,17 +84,44 @@ Changes to the LLVM IR
 ----------------------
 
 
+Changes to the AArch64 Target
+-----------------------------
+
+* Added support for the ``.arch_extension`` assembler directive, just like
+  on ARM.
+
+
 Changes to the ARM Backend
 --------------------------
 
  During this release ...
 
 
+Changes to the Hexagon Target
+-----------------------------
+
+* Added support for Hexagon/HVX V66 ISA.
+
 Changes to the MIPS Target
 --------------------------
 
- During this release ...
+* Improved support of GlobalISel instruction selection framework.
+
+* Implemented emission of ``R_MIPS_JALR`` and ``R_MICROMIPS_JALR``
+  relocations. These relocations provide hints to a linker for optimization
+  of jumps to protected symbols.
+
+* ORC JIT has been supported for MIPS and MIPS64 architectures.
+
+* Assembler now suggests alternative MIPS instruction mnemonics when
+  an invalid one is specified.
+
+* Improved support for MIPS N32 ABI.
+
+* Added new instructions (``pll.ps``, ``plu.ps``, ``cvt.s.pu``,
+  ``cvt.s.pl``, ``cvt.ps``, ``sigrie``).
 
+* Numerous bug fixes and code cleanups.
 
 Changes to the PowerPC Target
 -----------------------------
@@ -123,7 +172,31 @@ Changes to the DAG infrastructure
 External Open Source Projects Using LLVM 8
 ==========================================
 
-* A project...
+LDC - the LLVM-based D compiler
+-------------------------------
+
+`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
+pragmatically combines efficiency, control, and modeling power, with safety and
+programmer productivity. D supports powerful concepts like Compile-Time Function
+Execution (CTFE) and Template Meta-Programming, provides an innovative approach
+to concurrency and offers many classical paradigms.
+
+`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
+combined with LLVM as backend to produce efficient native code. LDC targets
+x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
+and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
+are underway.
+
+Zig Programming Language
+------------------------
+
+`Zig <https://ziglang.org>`_  is a system programming language intended to be
+an alternative to C. It provides high level features such as generics, compile
+time function execution, and partial evaluation, while exposing low level LLVM
+IR features such as aliases and intrinsics. Zig uses Clang to provide automatic
+import of .h symbols, including inline functions and simple macros. Zig uses
+LLD combined with lazily building compiler-rt to provide out-of-the-box
+cross-compiling for all supported targets.
 
 
 Additional Information
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index d9f0f94b298d4e..b02ddb6b7e299d 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -165,7 +165,8 @@ struct WasmSymbolInfo {
   StringRef Name;
   uint8_t Kind;
   uint32_t Flags;
-  StringRef Module; // For undefined symbols the module name of the import
+  StringRef ImportModule; // For undefined symbols the module of the import
+  StringRef ImportName;   // For undefined symbols the name of the import
   union {
     // For function or global symbols, the index in function or global index
     // space.
@@ -284,6 +285,7 @@ const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
 const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
 const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
+const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
 
 #define WASM_RELOC(name, value) name = value,
 
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 25edf5bcce516a..6dbe1650adabff 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -329,6 +329,7 @@ class MachineFunction {
   bool CallsUnwindInit = false;
   bool HasEHScopes = false;
   bool HasEHFunclets = false;
+  bool HasLocalEscape = false;
 
   /// List of C++ TypeInfo used.
   std::vector<const GlobalValue *> TypeInfos;
@@ -811,6 +812,9 @@ class MachineFunction {
   bool hasEHFunclets() const { return HasEHFunclets; }
   void setHasEHFunclets(bool V) { HasEHFunclets = V; }
 
+  bool hasLocalEscape() const { return HasLocalEscape; }
+  void setHasLocalEscape(bool V) { HasLocalEscape = V; }
+
   /// Find or create an LandingPadInfo for the specified MachineBasicBlock.
   LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
 
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 7913ce828fbc0f..6585cb71769ab0 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -392,6 +392,24 @@ class AMDGPULDSF32Intrin<string clang_builtin> :
     [IntrArgMemOnly, NoCapture<0>]
 >;
 
+class AMDGPUDSOrderedIntrinsic : Intrinsic<
+  [llvm_i32_ty],
+  // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
+  // the bit packing can be optimized at the IR level.
+  [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
+   llvm_i32_ty, // value to add or swap
+   llvm_i32_ty, // ordering
+   llvm_i32_ty, // scope
+   llvm_i1_ty,  // isVolatile
+   llvm_i32_ty, // ordered count index (OA index), also added to the address
+   llvm_i1_ty,  // wave release, usually set to 1
+   llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
+  [NoCapture<0>]
+>;
+
+def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
+def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
+
 def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
 def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
 def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index 8e66dc881d0fba..34639b6ebb6408 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -19,7 +19,8 @@ class MCSymbolWasm : public MCSymbol {
   bool IsWeak = false;
   bool IsHidden = false;
   bool IsComdat = false;
-  std::string ModuleName;
+  Optional<std::string> ImportModule;
+  Optional<std::string> ImportName;
   wasm::WasmSignature *Signature = nullptr;
   Optional<wasm::WasmGlobalType> GlobalType;
   Optional<wasm::WasmEventType> EventType;
@@ -32,7 +33,7 @@ class MCSymbolWasm : public MCSymbol {
   // Use a module name of "env" for now, for compatibility with existing tools.
   // This is temporary, and may change, as the ABI is not yet stable.
   MCSymbolWasm(const StringMapEntry<bool> *Name, bool isTemporary)
-      : MCSymbol(SymbolKindWasm, Name, isTemporary), ModuleName("env") {}
+      : MCSymbol(SymbolKindWasm, Name, isTemporary) {}
   static bool classof(const MCSymbol *S) { return S->isWasm(); }
 
   const MCExpr *getSize() const { return SymbolSize; }
@@ -55,8 +56,21 @@ class MCSymbolWasm : public MCSymbol {
   bool isComdat() const { return IsComdat; }
   void setComdat(bool isComdat) { IsComdat = isComdat; }
 
-  const StringRef getModuleName() const { return ModuleName; }
-  void setModuleName(StringRef Name) { ModuleName = Name; }
+  const StringRef getImportModule() const {
+      if (ImportModule.hasValue()) {
+          return ImportModule.getValue();
+      }
+      return "env";
+  }
+  void setImportModule(StringRef Name) { ImportModule = Name; }
+
+  const StringRef getImportName() const {
+      if (ImportName.hasValue()) {
+          return ImportName.getValue();
+      }
+      return getName();
+  }
+  void setImportName(StringRef Name) { ImportName = Name; }
 
   const wasm::WasmSignature *getSignature() const { return Signature; }
   void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; }
diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h
index 7a04fd52bc50e4..2b95a174e018cb 100644
--- a/llvm/include/llvm/Support/JSON.h
+++ b/llvm/include/llvm/Support/JSON.h
@@ -481,6 +481,7 @@ class Value {
   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
                                       std::string, json::Array, json::Object>
       Union;
+  friend bool operator==(const Value &, const Value &);
 };
 
 bool operator==(const Value &, const Value &);
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index e24398b90012b3..fe13559768189e 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -44,6 +44,11 @@ class FunctionImportGlobalProcessing {
   /// to promote any non-renamable values.
   SmallPtrSet<GlobalValue *, 8> Used;
 
+  /// Keep track of any COMDATs that require renaming (because COMDAT
+  /// leader was promoted and renamed). Maps from original COMDAT to one
+  /// with new name.
+  DenseMap<const Comdat *, Comdat *> RenamedComdats;
+
   /// Check if we should promote the given local value to global scope.
   bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1bf551336c9c7d..f97c2f6d674874 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1964,8 +1964,10 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
 void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
   // Emit the size.
   Asm->OutStreamer->AddComment("Loc expr size");
-  Asm->emitInt16(DebugLocs.getBytes(Entry).size());
-
+  if (getDwarfVersion() >= 5)
+    Asm->EmitULEB128(DebugLocs.getBytes(Entry).size());
+  else
+    Asm->emitInt16(DebugLocs.getBytes(Entry).size());
   // Emit the entry.
   APByteStreamer Streamer(*Asm);
   emitDebugLocEntry(Streamer, Entry);
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index cf8e8c69bc2a61..92df09b7d6a23a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -545,15 +545,17 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
       OS.AddComment(Comment);
   };
 
-  // Emit a label assignment with the SEH frame offset so we can use it for
-  // llvm.eh.recoverfp.
-  StringRef FLinkageName =
-      GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
-  MCSymbol *ParentFrameOffset =
-      Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
-  const MCExpr *MCOffset =
-      MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
-  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+  if (!isAArch64) {
+    // Emit a label assignment with the SEH frame offset so we can use it for
+    // llvm.eh.recoverfp.
+    StringRef FLinkageName =
+        GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
+    MCSymbol *ParentFrameOffset =
+        Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+    const MCExpr *MCOffset =
+        MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+    Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+  }
 
   // Use the assembler to compute the number of table entries through label
   // difference and division.
@@ -937,6 +939,9 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
   if (FI != INT_MAX) {
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
     unsigned UnusedReg;
+    // FIXME: getFrameIndexReference needs to match the behavior of
+    // AArch64RegisterInfo::hasBasePointer in which one of the scenarios where
+    // SP is used is if frame size >= 256.
     Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
   }
 
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 764a84c7e1327e..dc1ad953e71d46 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
   }
 
 #ifndef NDEBUG
-  bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
+  bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
+                   Op.getType() == MachineOperand::MO_MCSymbol;
   // OpNo now points as the desired insertion point.  Unless this is a variadic
   // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
   // RegMask operands go between the explicit and implicit operands.
   assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
-          OpNo < MCID->getNumOperands() || isMetaDataOp) &&
+          OpNo < MCID->getNumOperands() || isDebugOp) &&
          "Trying to add an operand to a machine instr that is already done!");
 #endif
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff5505c9772137..6af01423ca1069 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16214,23 +16214,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   // The build vector contains some number of undef elements and exactly
   // one other element. That other element must be a zero-extended scalar
   // extracted from a vector at a constant index to turn this into a shuffle.
+  // Also, require that the build vector does not implicitly truncate/extend
+  // its elements.
   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+  EVT VT = BV->getValueType(0);
   SDValue Zext = BV->getOperand(ZextElt);
   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+      Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
     return SDValue();
 
-  // The zero-extend must be a multiple of the source size.
+  // The zero-extend must be a multiple of the source size, and we must be
+  // building a vector of the same size as the source of the extract element.
   SDValue Extract = Zext.getOperand(0);
   unsigned DestSize = Zext.getValueSizeInBits();
   unsigned SrcSize = Extract.getValueSizeInBits();
-  if (DestSize % SrcSize != 0)
+  if (DestSize % SrcSize != 0 ||
+      Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
     return SDValue();
 
   // Create a shuffle mask that will combine the extracted element with zeros
   // and undefs.
-  int ZextRatio =  DestSize / SrcSize;
+  int ZextRatio = DestSize / SrcSize;
   int NumMaskElts = NumBVOps * ZextRatio;
   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
   for (int i = 0; i != NumMaskElts; ++i) {
@@ -16260,7 +16266,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
                                       ShufMask);
-  return DAG.getBitcast(BV->getValueType(0), Shuf);
+  return DAG.getBitcast(VT, Shuf);
 }
 
 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 871ab9b29881b2..bfeb3d1bc2b91f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6182,6 +6182,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
           .addFrameIndex(FI);
     }
 
+    MF.setHasLocalEscape(true);
+
     return nullptr;
   }
 
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index f8b5ff6ec8fb60..94df6946f3ae2e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -184,7 +184,8 @@ DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
     }
 
     if (Kind != dwarf::DW_LLE_base_address) {
-      unsigned Bytes = Data.getU16(Offset);
+      unsigned Bytes =
+          Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
       // A single location description describing the location of the object...
       StringRef str = Data.getData().substr(*Offset, Bytes);
       *Offset += Bytes;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index b2eb8b09982e89..27064154221f47 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -469,6 +469,11 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
     }
   }
 
+  if (Name == "seh.recoverfp") {
+    NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
+    return true;
+  }
+
   return false;
 }
 
@@ -544,10 +549,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
       return true;
     }
-    if (Name == "x86.seh.recoverfp") {
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
-      return true;
-    }
     break;
   }
 
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 3c022199145fba..0e4174a7a4c947 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -559,6 +559,11 @@ static void AttemptToFoldSymbolOffsetDifference(
   if (Asm->isThumbFunc(&SA))
     Addend |= 1;
 
+  // If symbol is labeled as micromips, we set low-bit to ensure
+  // correct offset in .gcc_except_table
+  if (Asm->getBackend().isMicroMips(&SA))
+    Addend |= 1;
+
   // Clear the symbol expr pointers to indicate we have folded these
   // operands.
   A = B = nullptr;
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index cf42a6f7075b9a..a0506715be3726 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3364,10 +3364,11 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
   }
 
   if (FileNumber == -1) {
-    if (!getContext().getAsmInfo()->hasSingleParameterDotFile())
-      return Error(DirectiveLoc,
-                   "target does not support '.file' without a number");
-    getStreamer().EmitFileDirective(Filename);
+    // Ignore the directive if there is no number and the target doesn't support
+    // numberless .file directives. This allows some portability of assembler
+    // between different object file formats.
+    if (getContext().getAsmInfo()->hasSingleParameterDotFile())
+      getStreamer().EmitFileDirective(Filename);
   } else {
     // In case there is a -g option as well as debug info from directive .file,
     // we turn off the -g option, directly use the existing debug info instead.
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index 0724b109e1a1da..8bc1f08c88750e 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -453,6 +453,38 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
   }
 }
 
+// Returns the epilog symbol of an epilog with the exact same unwind code
+// sequence, if it exists.  Otherwise, returns nulltpr.
+// EpilogInstrs - Unwind codes for the current epilog.
+// Epilogs - Epilogs that potentialy match the current epilog.
+static MCSymbol*
+FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
+                   const std::vector<MCSymbol *>& Epilogs,
+                   const WinEH::FrameInfo *info) {
+  for (auto *EpilogStart : Epilogs) {
+    auto InstrsIter = info->EpilogMap.find(EpilogStart);
+    assert(InstrsIter != info->EpilogMap.end() &&
+           "Epilog not found in EpilogMap");
+    const auto &Instrs = InstrsIter->second;
+
+    if (Instrs.size() != EpilogInstrs.size())
+      continue;
+
+    bool Match = true;
+    for (unsigned i = 0; i < Instrs.size(); ++i)
+      if (Instrs[i].Operation != EpilogInstrs[i].Operation ||
+          Instrs[i].Offset != EpilogInstrs[i].Offset ||
+          Instrs[i].Register != EpilogInstrs[i].Register) {
+         Match = false;
+         break;
+      }
+
+    if (Match)
+      return EpilogStart;
+  }
+  return nullptr;
+}
+
 // Populate the .xdata section.  The format of .xdata on ARM64 is documented at
 // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
 static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@@ -477,12 +509,28 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
 
   // Process epilogs.
   MapVector<MCSymbol *, uint32_t> EpilogInfo;
+  // Epilogs processed so far.
+  std::vector<MCSymbol *> AddedEpilogs;
+
   for (auto &I : info->EpilogMap) {
     MCSymbol *EpilogStart = I.first;
     auto &EpilogInstrs = I.second;
     uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
-    EpilogInfo[EpilogStart] = TotalCodeBytes;
-    TotalCodeBytes += CodeBytes;
+
+    MCSymbol* MatchingEpilog =
+      FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
+    if (MatchingEpilog) {
+      assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
+             "Duplicate epilog not found");
+      EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog];
+      // Clear the unwind codes in the EpilogMap, so that they don't get output
+      // in the logic below.
+      EpilogInstrs.clear();
+    } else {
+      EpilogInfo[EpilogStart] = TotalCodeBytes;
+      TotalCodeBytes += CodeBytes;
+      AddedEpilogs.push_back(EpilogStart);
+    }
   }
 
   // Code Words, Epilog count, E, X, Vers, Function Length
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 0cca3757be907f..333748db91904c 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -982,7 +982,8 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
       case wasm::WASM_SYMBOL_TYPE_GLOBAL:
       case wasm::WASM_SYMBOL_TYPE_EVENT:
         encodeULEB128(Sym.ElementIndex, W.OS);
-        if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0)
+        if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
+            (Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
           writeString(Sym.Name);
         break;
       case wasm::WASM_SYMBOL_TYPE_DATA:
@@ -1162,8 +1163,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   MCSymbolWasm *MemorySym =
       cast<MCSymbolWasm>(Ctx.getOrCreateSymbol("__linear_memory"));
   wasm::WasmImport MemImport;
-  MemImport.Module = MemorySym->getModuleName();
-  MemImport.Field = MemorySym->getName();
+  MemImport.Module = MemorySym->getImportModule();
+  MemImport.Field = MemorySym->getImportName();
   MemImport.Kind = wasm::WASM_EXTERNAL_MEMORY;
   Imports.push_back(MemImport);
 
@@ -1173,8 +1174,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   MCSymbolWasm *TableSym =
       cast<MCSymbolWasm>(Ctx.getOrCreateSymbol("__indirect_function_table"));
   wasm::WasmImport TableImport;
-  TableImport.Module = TableSym->getModuleName();
-  TableImport.Field = TableSym->getName();
+  TableImport.Module = TableSym->getImportModule();
+  TableImport.Field = TableSym->getImportName();
   TableImport.Kind = wasm::WASM_EXTERNAL_TABLE;
   TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF;
   Imports.push_back(TableImport);
@@ -1200,8 +1201,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
     if (!WS.isDefined() && !WS.isComdat()) {
       if (WS.isFunction()) {
         wasm::WasmImport Import;
-        Import.Module = WS.getModuleName();
-        Import.Field = WS.getName();
+        Import.Module = WS.getImportModule();
+        Import.Field = WS.getImportName();
         Import.Kind = wasm::WASM_EXTERNAL_FUNCTION;
         Import.SigIndex = getFunctionType(WS);
         Imports.push_back(Import);
@@ -1211,8 +1212,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
           report_fatal_error("undefined global symbol cannot be weak");
 
         wasm::WasmImport Import;
-        Import.Module = WS.getModuleName();
-        Import.Field = WS.getName();
+        Import.Module = WS.getImportModule();
+        Import.Field = WS.getImportName();
         Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
         Import.Global = WS.getGlobalType();
         Imports.push_back(Import);
@@ -1222,8 +1223,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
           report_fatal_error("undefined event symbol cannot be weak");
 
         wasm::WasmImport Import;
-        Import.Module = WS.getModuleName();
-        Import.Field = WS.getName();
+        Import.Module = WS.getImportModule();
+        Import.Field = WS.getImportName();
         Import.Kind = wasm::WASM_EXTERNAL_EVENT;
         Import.Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
         Import.Event.SigIndex = getEventType(WS);
@@ -1448,6 +1449,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL;
     if (WS.isUndefined())
       Flags |= wasm::WASM_SYMBOL_UNDEFINED;
+    if (WS.getName() != WS.getImportName())
+      Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME;
 
     wasm::WasmSymbolInfo Info;
     Info.Name = WS.getName();
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index d84cb48c9fbd5d..66a53becbb0561 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -505,9 +505,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
           Function.SymbolName = Info.Name;
       } else {
         wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex];
+        if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+          Info.Name = readString(Ctx);
+        else
+          Info.Name = Import.Field;
         Signature = &Signatures[Import.SigIndex];
-        Info.Name = Import.Field;
-        Info.Module = Import.Module;
+        Info.ImportName = Import.Field;
+        Info.ImportModule = Import.Module;
       }
       break;
 
@@ -530,8 +534,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
           Global.SymbolName = Info.Name;
       } else {
         wasm::WasmImport &Import = *ImportedGlobals[Info.ElementIndex];
-        Info.Name = Import.Field;
+        if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+          Info.Name = readString(Ctx);
+        else
+          Info.Name = Import.Field;
         GlobalType = &Import.Global;
+        Info.ImportName = Import.Field;
+        Info.ImportModule = Import.Module;
       }
       break;
 
@@ -585,9 +594,14 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
 
       } else {
         wasm::WasmImport &Import = *ImportedEvents[Info.ElementIndex];
+        if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+          Info.Name = readString(Ctx);
+        else
+          Info.Name = Import.Field;
         EventType = &Import.Event;
         Signature = &Signatures[EventType->SigIndex];
-        Info.Name = Import.Field;
+        Info.ImportName = Import.Field;
+        Info.ImportModule = Import.Module;
       }
       break;
     }
diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index d468013fb94a53..07a556814915d1 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -182,6 +182,12 @@ bool operator==(const Value &L, const Value &R) {
   case Value::Boolean:
     return *L.getAsBoolean() == *R.getAsBoolean();
   case Value::Number:
+    // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
+    // The same integer must convert to the same double, per the standard.
+    // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
+    // So we avoid floating point promotion for exact comparisons.
+    if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
+      return L.getAsInteger() == R.getAsInteger();
     return *L.getAsNumber() == *R.getAsNumber();
   case Value::String:
     return *L.getAsString() == *R.getAsString();
diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc
index 2d49ce1ad747a0..92bec36d6a2dae 100644
--- a/llvm/lib/Support/Unix/Threading.inc
+++ b/llvm/lib/Support/Unix/Threading.inc
@@ -202,6 +202,12 @@ void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
   char buf[len];
   ::pthread_getname_np(::pthread_self(), buf, len);
 
+  Name.append(buf, buf + strlen(buf));
+#elif defined(__OpenBSD__)
+  constexpr uint32_t len = get_max_thread_name_length_impl();
+  char buf[len];
+  ::pthread_get_name_np(::pthread_self(), buf, len);
+
   Name.append(buf, buf + strlen(buf));
 #elif defined(__linux__)
 #if HAVE_PTHREAD_GETNAME_NP
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 0442076992e24e..0254a572434f46 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -694,6 +694,34 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
     break;
+    case AArch64::MOVMCSym: {
+    unsigned DestReg = MI->getOperand(0).getReg();
+    const MachineOperand &MO_Sym = MI->getOperand(1);
+    MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym);
+    MCOperand Hi_MCSym, Lo_MCSym;
+
+    Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S);
+    Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC);
+
+    MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym);
+    MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym);
+
+    MCInst MovZ;
+    MovZ.setOpcode(AArch64::MOVZXi);
+    MovZ.addOperand(MCOperand::createReg(DestReg));
+    MovZ.addOperand(Hi_MCSym);
+    MovZ.addOperand(MCOperand::createImm(16));
+    EmitToStreamer(*OutStreamer, MovZ);
+
+    MCInst MovK;
+    MovK.setOpcode(AArch64::MOVKXi);
+    MovK.addOperand(MCOperand::createReg(DestReg));
+    MovK.addOperand(MCOperand::createReg(DestReg));
+    MovK.addOperand(Lo_MCSym);
+    MovK.addOperand(MCOperand::createImm(0));
+    EmitToStreamer(*OutStreamer, MovK);
+    return;
+  }
   case AArch64::MOVIv2d_ns:
     // If the target has <rdar://problem/16473581>, lower this
     // instruction to movi.16b instead.
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 538a8d7e8fbcf5..621aa8bc783a61 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -228,6 +228,10 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
       MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
     return true;
 
+  // Win64 SEH requires frame pointer if funclets are present.
+  if (MF.hasLocalEscape())
+    return true;
+
   return false;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e01ca14d7f63dd..762f4413d72b91 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2743,6 +2743,34 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::aarch64_neon_umin:
     return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+
+  case Intrinsic::localaddress: {
+    // Returns one of the stack, base, or frame pointer registers, depending on
+    // which is used to reference local variables.
+    MachineFunction &MF = DAG.getMachineFunction();
+    const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+    unsigned Reg;
+    if (RegInfo->hasBasePointer(MF))
+      Reg = RegInfo->getBaseRegister();
+    else // This function handles the SP or FP case.
+      Reg = RegInfo->getFrameRegister(MF);
+    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
+                              Op.getSimpleValueType());
+  }
+
+  case Intrinsic::eh_recoverfp: {
+    // FIXME: This needs to be implemented to correctly handle highly aligned
+    // stack objects. For now we simply return the incoming FP. Refer D53541
+    // for more details.
+    SDValue FnOp = Op.getOperand(1);
+    SDValue IncomingFPOp = Op.getOperand(2);
+    GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
+    auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
+    if (!Fn)
+      report_fatal_error(
+          "llvm.eh.recoverfp must take a function as the first argument");
+    return IncomingFPOp;
+  }
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index ada06788857293..50316ebe218b70 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2292,6 +2292,31 @@ void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
   }
 }
 
+void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       DebugLoc DL, unsigned DestReg,
+                                       unsigned SrcReg, bool KillSrc,
+                                       unsigned Opcode, unsigned ZeroReg,
+                                       llvm::ArrayRef<unsigned> Indices) const {
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  unsigned NumRegs = Indices.size();
+
+#ifndef NDEBUG
+  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
+         "GPR reg sequences should not be able to overlap");
+#endif
+
+  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
+    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
+    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+    MIB.addReg(ZeroReg);
+    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+    MIB.addImm(0);
+  }
+}
+
 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    const DebugLoc &DL, unsigned DestReg,
@@ -2431,6 +2456,22 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
+  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
+      AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
+    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
+                    AArch64::XZR, Indices);
+    return;
+  }
+
+  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
+      AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
+    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
+                    AArch64::WZR, Indices);
+    return;
+  }
+
   if (AArch64::FPR128RegClass.contains(DestReg) &&
       AArch64::FPR128RegClass.contains(SrcReg)) {
     if (Subtarget.hasNEON()) {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 9954669d567508..e48c26d4a84a03 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -122,6 +122,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
                         const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                         bool KillSrc, unsigned Opcode,
                         llvm::ArrayRef<unsigned> Indices) const;
+  void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                       DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+                       bool KillSrc, unsigned Opcode, unsigned ZeroReg,
+                       llvm::ArrayRef<unsigned> Indices) const;
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c24b8b36441bbb..86a4119c45f580 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -133,6 +133,10 @@ def UseNegativeImmediates
     : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
                                              "NegativeImmediates">;
 
+def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
+                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                                       SDTCisInt<1>]>>;
+
 
 //===----------------------------------------------------------------------===//
 // AArch64-specific DAG Nodes.
@@ -6801,5 +6805,8 @@ def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
 def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
           (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
 
+def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
+def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
+
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 96ae45ae3d0dcd..3daac23592de7d 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -466,6 +466,13 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
+
+  if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
+    MachineOperand &FI = MI.getOperand(FIOperandNum);
+    FI.ChangeToImmediate(Offset);
+    return;
+  }
+
   if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
     return;
 
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index d3710cea0687ec..8e6aa69eae854f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -649,10 +649,12 @@ def FPR128Op : RegisterOperand<FPR128, "printOperand"> {
 // ARMv8.1a atomic CASP register operands
 
 
-def WSeqPairs : RegisterTuples<[sube32, subo32], 
-                               [(rotl GPR32, 0), (rotl GPR32, 1)]>;
-def XSeqPairs : RegisterTuples<[sube64, subo64], 
-                               [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+                               [(decimate (rotl GPR32, 0), 2),
+                                (decimate (rotl GPR32, 1), 2)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+                               [(decimate (rotl GPR64, 0), 2),
+                                (decimate (rotl GPR64, 1), 2)]>;
 
 def WSeqPairsClass   : RegisterClass<"AArch64", [untyped], 32, 
                                      (add WSeqPairs)>{
diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index e9699b0367d3d5..50300305abe337 100644
--- a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -103,6 +103,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
@@ -146,25 +147,31 @@ class AArch64SpeculationHardening : public MachineFunctionPass {
   BitVector RegsAlreadyMasked;
 
   bool functionUsesHardeningRegister(MachineFunction &MF) const;
-  bool instrumentControlFlow(MachineBasicBlock &MBB);
+  bool instrumentControlFlow(MachineBasicBlock &MBB,
+                             bool &UsesFullSpeculationBarrier);
   bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                                MachineBasicBlock *&FBB,
                                AArch64CC::CondCode &CondCode) const;
   void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
                           AArch64CC::CondCode &CondCode, DebugLoc DL) const;
-  void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+  void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI) const;
-  void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+  void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
                                      unsigned TmpReg) const;
+  void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    DebugLoc DL) const;
 
   bool slhLoads(MachineBasicBlock &MBB);
   bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
                               MachineInstr &MI, unsigned Reg);
-  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
+                                        bool UsesFullSpeculationBarrier);
   bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MBBI);
+                                  MachineBasicBlock::iterator MBBI,
+                                  bool UsesFullSpeculationBarrier);
   bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                   DebugLoc DL);
 };
@@ -207,15 +214,19 @@ bool AArch64SpeculationHardening::endsWithCondControlFlow(
   return true;
 }
 
+void AArch64SpeculationHardening::insertFullSpeculationBarrier(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    DebugLoc DL) const {
+  // A full control flow speculation barrier consists of (DSB SYS + ISB)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
+}
+
 void AArch64SpeculationHardening::insertTrackingCode(
     MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
     DebugLoc DL) const {
   if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
-        .addImm(0xf);
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
-        .addImm(0xf);
+    insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
   } else {
     BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
         .addDef(MisspeculatingTaintReg)
@@ -227,7 +238,7 @@ void AArch64SpeculationHardening::insertTrackingCode(
 }
 
 bool AArch64SpeculationHardening::instrumentControlFlow(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
   LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
 
   bool Modified = false;
@@ -263,55 +274,105 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
   }
 
   // Perform correct code generation around function calls and before returns.
-  {
-    SmallVector<MachineInstr *, 4> ReturnInstructions;
-    SmallVector<MachineInstr *, 4> CallInstructions;
+  // The below variables record the return/terminator instructions and the call
+  // instructions respectively; including which register is available as a
+  // temporary register just before the recorded instructions.
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
+  // if a temporary register is not available for at least one of the
+  // instructions for which we need to transfer taint to the stack pointer, we
+  // need to insert a full speculation barrier.
+  // TmpRegisterNotAvailableEverywhere tracks that condition.
+  bool TmpRegisterNotAvailableEverywhere = false;
+
+  RegScavenger RS;
+  RS.enterBasicBlock(MBB);
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
+    MachineInstr &MI = *I;
+    if (!MI.isReturn() && !MI.isCall())
+      continue;
 
-    for (MachineInstr &MI : MBB) {
-      if (MI.isReturn())
-        ReturnInstructions.push_back(&MI);
-      else if (MI.isCall())
-        CallInstructions.push_back(&MI);
-    }
+    // The RegScavenger represents registers available *after* the MI
+    // instruction pointed to by RS.getCurrentPosition().
+    // We need to have a register that is available *before* the MI is executed.
+    if (I != MBB.begin())
+      RS.forward(std::prev(I));
+    // FIXME: The below just finds *a* unused register. Maybe code could be
+    // optimized more if this looks for the register that isn't used for the
+    // longest time around this place, to enable more scheduling freedom. Not
+    // sure if that would actually result in a big performance difference
+    // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
+    // already to do this - but it's unclear if that could easily be used here.
+    unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+    LLVM_DEBUG(dbgs() << "RS finds "
+                      << ((TmpReg == 0) ? "no register " : "register ");
+               if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
+               dbgs() << "to be available at MI " << MI);
+    if (TmpReg == 0)
+      TmpRegisterNotAvailableEverywhere = true;
+    if (MI.isReturn())
+      ReturnInstructions.push_back({&MI, TmpReg});
+    else if (MI.isCall())
+      CallInstructions.push_back({&MI, TmpReg});
+  }
 
-    Modified |=
-        (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+  if (TmpRegisterNotAvailableEverywhere) {
+    // When a temporary register is not available everywhere in this basic
+    // basic block where a propagate-taint-to-sp operation is needed, just
+    // emit a full speculation barrier at the start of this basic block, which
+    // renders the taint/speculation tracking in this basic block unnecessary.
+    insertFullSpeculationBarrier(MBB, MBB.begin(),
+                                 (MBB.begin())->getDebugLoc());
+    UsesFullSpeculationBarrier = true;
+    Modified = true;
+  } else {
+    for (auto MI_Reg : ReturnInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(
+          dbgs()
+          << " About to insert Reg to SP taint propagation with temp register "
+          << printReg(MI_Reg.second, TRI)
+          << " on instruction: " << *MI_Reg.first);
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
+    }
 
-    for (MachineInstr *Return : ReturnInstructions)
-      insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
-    for (MachineInstr *Call : CallInstructions) {
+    for (auto MI_Reg : CallInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
+                           "propagation with temp register "
+                        << printReg(MI_Reg.second, TRI)
+                        << " around instruction: " << *MI_Reg.first);
       // Just after the call:
-      MachineBasicBlock::iterator i = Call;
-      i++;
-      insertSPToRegTaintPropagation(Call->getParent(), i);
+      insertSPToRegTaintPropagation(
+          MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
       // Just before the call:
-      insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
     }
   }
-
   return Modified;
 }
 
 void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
   // If full control flow speculation barriers are used, emit a control flow
   // barrier to block potential miss-speculation in flight coming in to this
   // function.
   if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+    insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
     return;
   }
 
   // CMP   SP, #0   === SUBS   xzr, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
       .addDef(AArch64::XZR)
       .addUse(AArch64::SP)
       .addImm(0)
       .addImm(0); // no shift
   // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
       .addDef(MisspeculatingTaintReg)
       .addUse(AArch64::XZR)
       .addUse(AArch64::XZR)
@@ -319,7 +380,7 @@ void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
 }
 
 void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     unsigned TmpReg) const {
   // If full control flow speculation barriers are used, there will not be
   // miss-speculation when returning from this function, and therefore, also
@@ -328,19 +389,19 @@ void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
     return;
 
   // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
       .addDef(TmpReg)
       .addUse(AArch64::SP)
       .addImm(0)
       .addImm(0); // no shift
   // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
       .addDef(TmpReg, RegState::Renamable)
       .addUse(TmpReg, RegState::Kill | RegState::Renamable)
       .addUse(MisspeculatingTaintReg, RegState::Kill)
       .addImm(0);
   // mov   SP, Xtmp === ADD SP, Xtmp, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
       .addDef(AArch64::SP)
       .addUse(TmpReg, RegState::Kill)
       .addImm(0)
@@ -484,7 +545,8 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
 /// \brief If MBBI references a pseudo instruction that should be expanded
 /// here, do the expansion and return true. Otherwise return false.
 bool AArch64SpeculationHardening::expandSpeculationSafeValue(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    bool UsesFullSpeculationBarrier) {
   MachineInstr &MI = *MBBI;
   unsigned Opcode = MI.getOpcode();
   bool Is64Bit = true;
@@ -499,7 +561,7 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue(
     // Just remove the SpeculationSafe pseudo's if control flow
     // miss-speculation isn't happening because we're already inserting barriers
     // to guarantee that.
-    if (!UseControlFlowSpeculationBarrier) {
+    if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned SrcReg = MI.getOperand(1).getReg();
       // Mark this register and all its aliasing registers as needing to be
@@ -537,7 +599,7 @@ bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
 }
 
 bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
   bool Modified = false;
 
   RegsNeedingCSDBBeforeUse.reset();
@@ -572,15 +634,16 @@ bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
           break;
         }
 
-    if (NeedToEmitBarrier)
+    if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
       Modified |= insertCSDB(MBB, MBBI, DL);
 
-    Modified |= expandSpeculationSafeValue(MBB, MBBI);
+    Modified |=
+        expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
 
     MBBI = NMBBI;
   }
 
-  if (RegsNeedingCSDBBeforeUse.any())
+  if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
     Modified |= insertCSDB(MBB, MBBI, DL);
 
   return Modified;
@@ -609,7 +672,7 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
       Modified |= slhLoads(MBB);
   }
 
-  // 2.a Add instrumentation code to function entry and exits.
+  // 2. Add instrumentation code to function entry and exits.
   LLVM_DEBUG(
       dbgs()
       << "***** AArch64SpeculationHardening - track control flow *****\n");
@@ -620,17 +683,15 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
     EntryBlocks.push_back(LPI.LandingPadBlock);
   for (auto Entry : EntryBlocks)
     insertSPToRegTaintPropagation(
-        Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
-
-  // 2.b Add instrumentation code to every basic block.
-  for (auto &MBB : MF)
-    Modified |= instrumentControlFlow(MBB);
+        *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
 
-  LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
-                       "SpeculationSafeValue Pseudos *****\n");
-  // Step 3: Lower SpeculationSafeValue pseudo instructions.
-  for (auto &MBB : MF)
-    Modified |= lowerSpeculationSafeValuePseudos(MBB);
+  // 3. Add instrumentation code to every basic block.
+  for (auto &MBB : MF) {
+    bool UsesFullSpeculationBarrier = false;
+    Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
+    Modified |=
+        lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
+  }
 
   return Modified;
 }
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 4102f1eb5cc12c..64afabd450c1fa 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1779,8 +1779,8 @@ static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst,
   if (RegNo & 0x1)
     return Fail;
 
-  unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo);
-  Inst.addOperand(MCOperand::createReg(Register));
+  unsigned Reg = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo / 2);
+  Inst.addOperand(MCOperand::createReg(Reg));
   return Success;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index bb7801c172f60b..55668867cc8e2c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -254,7 +254,7 @@ namespace AMDGPUAS {
 
     FLAT_ADDRESS = 0,     ///< Address space for flat memory.
     GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0).
-    REGION_ADDRESS = 2,   ///< Address space for region memory.
+    REGION_ADDRESS = 2,   ///< Address space for region memory. (GDS)
 
     CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
     LOCAL_ADDRESS = 3,    ///< Address space for local memory.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 6951c915b1772d..8d36511a28303f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
+  NODE_NAME_CASE(DS_ORDERED_COUNT)
   NODE_NAME_CASE(ATOMIC_CMP_SWAP)
   NODE_NAME_CASE(ATOMIC_INC)
   NODE_NAME_CASE(ATOMIC_DEC)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 0d22cb2e3e20bc..d4a751d00a503d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -474,6 +474,7 @@ enum NodeType : unsigned {
   TBUFFER_STORE_FORMAT_D16,
   TBUFFER_LOAD_FORMAT,
   TBUFFER_LOAD_FORMAT_D16,
+  DS_ORDERED_COUNT,
   ATOMIC_CMP_SWAP,
   ATOMIC_INC,
   ATOMIC_DEC,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 9dbd7751b4d885..4d0962f65fdc1f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -72,6 +72,8 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
 def : SourceOfDivergence<int_amdgcn_ps_live>;
 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
 
 foreach intr = AMDGPUImageDimAtomicIntrinsics in
 def : SourceOfDivergence<intr>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 11e4ba4b5010dc..62e7e44ddb80c6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   switch (Inst->getIntrinsicID()) {
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap:
   case Intrinsic::amdgcn_ds_fadd:
   case Intrinsic::amdgcn_ds_fmin:
   case Intrinsic::amdgcn_ds_fmax: {
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 31d2ebef481d20..9c7097e9a52066 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax_local">;
 
 defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
 
+def : Pat <
+  (SIds_ordered_count i32:$value, i16:$offset),
+  (DS_ORDERED_COUNT $value, (as_i16imm $offset))
+>;
+
 //===----------------------------------------------------------------------===//
 // Real instructions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c6396de89c4f6e..69ddbfb5395833 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) {
   }
 }
 
-static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
+                                    const MachineInstr &MI) {
+  if (TII.isAlwaysGDS(MI.getOpcode()))
+    return true;
+
   switch (MI.getOpcode()) {
   case AMDGPU::S_SENDMSG:
   case AMDGPU::S_SENDMSGHALT:
   case AMDGPU::S_TTRACEDATA:
     return true;
+  // These DS opcodes don't support GDS.
+  case AMDGPU::DS_NOP:
+  case AMDGPU::DS_PERMUTE_B32:
+  case AMDGPU::DS_BPERMUTE_B32:
+    return false;
   default:
-    // TODO: GDS
+    if (TII.isDS(MI.getOpcode())) {
+      int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                           AMDGPU::OpName::gds);
+      if (MI.getOperand(GDS).getImm())
+        return true;
+    }
     return false;
   }
 }
@@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       checkReadM0Hazards(MI) > 0)
     return NoopHazard;
 
-  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
       checkReadM0Hazards(MI) > 0)
     return NoopHazard;
 
@@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
                                            isSMovRel(MI->getOpcode())))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
-  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
   return WaitStates;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0ba921647097d3..12113fcc1fcb8c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   switch (IntrID) {
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap:
   case Intrinsic::amdgcn_ds_fadd:
   case Intrinsic::amdgcn_ds_fmin:
   case Intrinsic::amdgcn_ds_fmax: {
@@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
   switch (II->getIntrinsicID()) {
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap:
   case Intrinsic::amdgcn_ds_fadd:
   case Intrinsic::amdgcn_ds_fmin:
   case Intrinsic::amdgcn_ds_fmax: {
@@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   SDLoc DL(Op);
 
   switch (IntrID) {
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap: {
+    MemSDNode *M = cast<MemSDNode>(Op);
+    SDValue Chain = M->getOperand(0);
+    SDValue M0 = M->getOperand(2);
+    SDValue Value = M->getOperand(3);
+    unsigned OrderedCountIndex = M->getConstantOperandVal(7);
+    unsigned WaveRelease = M->getConstantOperandVal(8);
+    unsigned WaveDone = M->getConstantOperandVal(9);
+    unsigned ShaderType;
+    unsigned Instruction;
+
+    switch (IntrID) {
+    case Intrinsic::amdgcn_ds_ordered_add:
+      Instruction = 0;
+      break;
+    case Intrinsic::amdgcn_ds_ordered_swap:
+      Instruction = 1;
+      break;
+    }
+
+    if (WaveDone && !WaveRelease)
+      report_fatal_error("ds_ordered_count: wave_done requires wave_release");
+
+    switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
+    case CallingConv::AMDGPU_CS:
+    case CallingConv::AMDGPU_KERNEL:
+      ShaderType = 0;
+      break;
+    case CallingConv::AMDGPU_PS:
+      ShaderType = 1;
+      break;
+    case CallingConv::AMDGPU_VS:
+      ShaderType = 2;
+      break;
+    case CallingConv::AMDGPU_GS:
+      ShaderType = 3;
+      break;
+    default:
+      report_fatal_error("ds_ordered_count unsupported for this calling conv");
+    }
+
+    unsigned Offset0 = OrderedCountIndex << 2;
+    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
+                       (Instruction << 4);
+    unsigned Offset = Offset0 | (Offset1 << 8);
+
+    SDValue Ops[] = {
+      Chain,
+      Value,
+      DAG.getTargetConstant(Offset, DL, MVT::i16),
+      copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
+    };
+    return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
+                                   M->getVTList(), Ops, M->getMemoryVT(),
+                                   M->getMemOperand());
+  }
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
   case Intrinsic::amdgcn_ds_fadd:
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index afc0b446761095..3c13bccd94fa85 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -536,10 +536,13 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
             CurrScore);
       }
       if (Inst.mayStore()) {
-        setExpScore(
-            &Inst, TII, TRI, MRI,
-            AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
-            CurrScore);
+        if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+                                       AMDGPU::OpName::data0) != -1) {
+          setExpScore(
+              &Inst, TII, TRI, MRI,
+              AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
+              CurrScore);
+        }
         if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
                                        AMDGPU::OpName::data1) != -1) {
           setExpScore(&Inst, TII, TRI, MRI,
@@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
   // bracket and the destination operand scores.
   // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
   if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
-    if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
+    if (TII->isAlwaysGDS(Inst.getOpcode()) ||
+        TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
       ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
       ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
     } else {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2370d5fa7b27b3..7f7f1807987ab2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
          changesVGPRIndexingMode(MI);
 }
 
+bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
+  return Opcode == AMDGPU::DS_ORDERED_COUNT ||
+         Opcode == AMDGPU::DS_GWS_INIT ||
+         Opcode == AMDGPU::DS_GWS_SEMA_V ||
+         Opcode == AMDGPU::DS_GWS_SEMA_BR ||
+         Opcode == AMDGPU::DS_GWS_SEMA_P ||
+         Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
+         Opcode == AMDGPU::DS_GWS_BARRIER;
+}
+
 bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
 
@@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
   //       EXEC = 0, but checking for that case here seems not worth it
   //       given the typical code patterns.
   if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
-      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
+      Opcode == AMDGPU::DS_ORDERED_COUNT)
     return true;
 
   if (MI.isInlineAsm())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 5b1a05f3785ec7..8847fd6babb362 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -450,6 +450,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return get(Opcode).TSFlags & SIInstrFlags::DS;
   }
 
+  bool isAlwaysGDS(uint16_t Opcode) const;
+
   static bool isMIMG(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 13afa4d4974bf1..180a7b0601d74e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
   [SDNPMayLoad, SDNPMemOperand]
 >;
 
+def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
+  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
+  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
+>;
+
 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
   [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
 >;
diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index f39c21fc8aa2b0..5e9b108c2de3a1 100644
--- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -17,6 +17,7 @@
 #include "MSP430InstrInfo.h"
 #include "MSP430MCInstLower.h"
 #include "MSP430TargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -28,6 +29,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -44,6 +46,8 @@ namespace {
 
     StringRef getPassName() const override { return "MSP430 Assembly Printer"; }
 
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
     void printOperand(const MachineInstr *MI, int OpNum,
                       raw_ostream &O, const char* Modifier = nullptr);
     void printSrcMemOperand(const MachineInstr *MI, int OpNum,
@@ -55,6 +59,8 @@ namespace {
                                unsigned OpNo, unsigned AsmVariant,
                                const char *ExtraCode, raw_ostream &O) override;
     void EmitInstruction(const MachineInstr *MI) override;
+
+    void EmitInterruptVectorSection(MachineFunction &ISR);
   };
 } // end of anonymous namespace
 
@@ -153,6 +159,32 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
+void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
+  MCSection *Cur = OutStreamer->getCurrentSectionOnly();
+  const auto *F = &ISR.getFunction();
+  assert(F->hasFnAttribute("interrupt") &&
+         "Functions with MSP430_INTR CC should have 'interrupt' attribute");
+  StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString();
+  MCSection *IV = OutStreamer->getContext().getELFSection(
+    "__interrupt_vector_" + IVIdx,
+    ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
+  OutStreamer->SwitchSection(IV);
+
+  const MCSymbol *FunctionSymbol = getSymbol(F);
+  OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+  OutStreamer->SwitchSection(Cur);
+}
+
+bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  // Emit separate section for an interrupt vector if ISR
+  if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR)
+    EmitInterruptVectorSection(MF);
+
+  SetupMachineFunction(MF);
+  EmitFunctionBody();
+  return false;
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeMSP430AsmPrinter() {
   RegisterAsmPrinter<MSP430AsmPrinter> X(getTheMSP430Target());
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d2fed6861477b9..f10d100bfe11ce 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -65,10 +65,7 @@ class MCInstrInfo;
 
 } // end namespace llvm
 
-static cl::opt<bool>
-EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
-              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
-              cl::init(true));
+extern cl::opt<bool> EmitJalrReloc;
 
 namespace {
 
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 18d7dd99be34a7..2f2dd4e03c40ae 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -15,6 +15,13 @@
 
 using namespace llvm;
 
+// Note: this option is defined here to be visible from libLLVMMipsAsmParser
+//       and libLLVMMipsCodeGen
+cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+              cl::init(true));
+
 namespace {
 static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};
 
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index a90db2384c4664..ab8a6753eadcf2 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -89,7 +89,10 @@ namespace MipsII {
     MO_GOT_HI16,
     MO_GOT_LO16,
     MO_CALL_HI16,
-    MO_CALL_LO16
+    MO_CALL_LO16,
+
+    /// Helper operand used to generate R_MIPS_JALR
+    MO_JALR
   };
 
   enum {
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index f43a4d980f92aa..e3dcbaccfd0806 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -614,8 +614,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
       llvm_unreachable("Unhandled fixup kind!");
       break;
     case MipsMCExpr::MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-      break;
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getExprOpValue(MipsExpr->getSubExpr(), Fixups, STI);
     case MipsMCExpr::MEK_CALL_HI16:
       FixupKind = Mips::fixup_Mips_CALL_HI16;
       break;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 99857e083c6ca8..2d7312725205fc 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -44,8 +44,10 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
   case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
+    // MEK_DTPREL is used for marking TLS DIEExpr only
+    // and contains a regular sub-expression.
+    getSubExpr()->print(OS, MAI, true);
+    return;
   case MEK_CALL_HI16:
     OS << "%call_hi";
     break;
@@ -161,7 +163,9 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
     case MEK_Special:
       llvm_unreachable("MEK_None and MEK_Special are invalid");
     case MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
     case MEK_DTPREL_HI:
     case MEK_DTPREL_LO:
     case MEK_GOT:
@@ -249,9 +253,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   case MEK_Special:
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
-  case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
   case MEK_CALL_HI16:
   case MEK_CALL_LO16:
   case MEK_GOT:
@@ -274,6 +275,7 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
     if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr()))
       E->fixELFSymbolsInTLSFixups(Asm);
     break;
+  case MEK_DTPREL:
   case MEK_DTPREL_HI:
   case MEK_DTPREL_LO:
   case MEK_TLSLDM:
diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 814918d25e70d7..c441aa76ad40f7 100644
--- a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -460,6 +460,7 @@ class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
   let isCall = 1;
   let hasDelaySlot = 0;
   let Defs = [RA];
+  let hasPostISelHook = 1;
 }
 class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
 
diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index af380a0ec71e0a..ccc4f04bb92d82 100644
--- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -426,6 +426,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
   let isCall = 1;
   let hasDelaySlot = 1;
   let Defs = [RA];
+  let hasPostISelHook = 1;
 }
 
 // 16-bit Jump Reg
diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 2bd0cf2d59a64d..fb239f572ef22c 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -1105,7 +1105,7 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),
 
 // Pseudo instructions
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
-    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT], hasPostISelHook = 1 in {
   class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
     PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
     PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 362431fd42a6c0..a7a748b0840e19 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -68,6 +68,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips-asm-printer"
 
+extern cl::opt<bool> EmitJalrReloc;
+
 MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
   return static_cast<MipsTargetStreamer &>(*OutStreamer->getTargetStreamer());
 }
@@ -148,6 +150,40 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
   EmitToStreamer(OutStreamer, TmpInst0);
 }
 
+// If there is an MO_JALR operand, insert:
+//
+// .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+// tmplabel:
+//
+// This is an optimization hint for the linker which may then replace
+// an indirect call with a direct branch.
+static void emitDirectiveRelocJalr(const MachineInstr &MI,
+                                   MCContext &OutContext,
+                                   TargetMachine &TM,
+                                   MCStreamer &OutStreamer,
+                                   const MipsSubtarget &Subtarget) {
+  for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands();
+       I < E; ++I) {
+    MachineOperand MO = MI.getOperand(I);
+    if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) {
+      MCSymbol *Callee = MO.getMCSymbol();
+      if (Callee && !Callee->getName().empty()) {
+        MCSymbol *OffsetLabel = OutContext.createTempSymbol();
+        const MCExpr *OffsetExpr =
+            MCSymbolRefExpr::create(OffsetLabel, OutContext);
+        const MCExpr *CaleeExpr =
+            MCSymbolRefExpr::create(Callee, OutContext);
+        OutStreamer.EmitRelocDirective
+            (*OffsetExpr,
+             Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+             CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo());
+        OutStreamer.EmitLabel(OffsetLabel);
+        return;
+      }
+    }
+  }
+}
+
 void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MipsTargetStreamer &TS = getTargetStreamer();
   unsigned Opc = MI->getOpcode();
@@ -207,6 +243,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  if (EmitJalrReloc &&
+      (MI->isReturn() || MI->isCall() || MI->isIndirectBranch())) {
+    emitDirectiveRelocJalr(*MI, OutContext, TM, *OutStreamer, *Subtarget);
+  }
+
   MachineBasicBlock::const_instr_iterator I = MI->getIterator();
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
 
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 22ade31a72cdbb..a18416b9e86100 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -56,6 +56,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -75,6 +76,8 @@
 
 using namespace llvm;
 
+extern cl::opt<bool> EmitJalrReloc;
+
 namespace {
 
 class MipsFastISel final : public FastISel {
@@ -1551,6 +1554,16 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   CLI.Call = MIB;
 
+  if (EmitJalrReloc && !Subtarget->inMips16Mode()) {
+    // Attach callee address to the instruction, let asm printer emit
+    // .reloc R_MIPS_JALR.
+    if (Symbol)
+      MIB.addSym(Symbol, MipsII::MO_JALR);
+    else
+      MIB.addSym(FuncInfo.MF->getContext().getOrCreateSymbol(
+	                   Addr.getGlobalValue()->getName()), MipsII::MO_JALR);
+  }
+
   // Finish off the call including any return values.
   return finishCall(CLI, RetVT, NumBytes);
 }
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 8c2a364cdfa951..0f9c075ba0ccf3 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -57,6 +57,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
@@ -91,6 +92,8 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
                cl::desc("MIPS: Don't trap on integer division by zero."),
                cl::init(false));
 
+extern cl::opt<bool> EmitJalrReloc;
+
 static const MCPhysReg Mips64DPRegs[8] = {
   Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
   Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@@ -2879,6 +2882,54 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
     Ops.push_back(InFlag);
 }
 
+void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                                       SDNode *Node) const {
+  switch (MI.getOpcode()) {
+    default:
+      return;
+    case Mips::JALR:
+    case Mips::JALRPseudo:
+    case Mips::JALR64:
+    case Mips::JALR64Pseudo:
+    case Mips::JALR16_MM:
+    case Mips::JALRC16_MMR6:
+    case Mips::TAILCALLREG:
+    case Mips::TAILCALLREG64:
+    case Mips::TAILCALLR6REG:
+    case Mips::TAILCALL64R6REG:
+    case Mips::TAILCALLREG_MM:
+    case Mips::TAILCALLREG_MMR6: {
+      if (!EmitJalrReloc ||
+          Subtarget.inMips16Mode() ||
+          !isPositionIndependent() ||
+          Node->getNumOperands() < 1 ||
+          Node->getOperand(0).getNumOperands() < 2) {
+        return;
+      }
+      // We are after the callee address, set by LowerCall().
+      // If added to MI, asm printer will emit .reloc R_MIPS_JALR for the
+      // symbol.
+      const SDValue TargetAddr = Node->getOperand(0).getOperand(1);
+      StringRef Sym;
+      if (const GlobalAddressSDNode *G =
+              dyn_cast_or_null<const GlobalAddressSDNode>(TargetAddr)) {
+        Sym = G->getGlobal()->getName();
+      }
+      else if (const ExternalSymbolSDNode *ES =
+                   dyn_cast_or_null<const ExternalSymbolSDNode>(TargetAddr)) {
+        Sym = ES->getSymbol();
+      }
+
+      if (Sym.empty())
+        return;
+
+      MachineFunction *MF = MI.getParent()->getParent();
+      MCSymbol *S = MF->getContext().getOrCreateSymbol(Sym);
+      MI.addOperand(MachineOperand::CreateMCSymbol(S, MipsII::MO_JALR));
+    }
+  }
+}
+
 /// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 SDValue
@@ -2930,7 +2981,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // the maximum out going argument area (including the reserved area), and
   // preallocates the stack space on entrance to the caller.
   //
-  // FIXME: We should do the same for efficency and space.
+  // FIXME: We should do the same for efficiency and space.
 
   // Note: The check on the calling convention below must match
   //       MipsABIInfo::GetCalleeAllocdArgSizeInBytes().
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index e043f133a09fd9..c88633be02b77c 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -341,6 +341,9 @@ class TargetRegisterClass;
     EmitInstrWithCustomInserter(MachineInstr &MI,
                                 MachineBasicBlock *MBB) const override;
 
+    void AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                       SDNode *Node) const override;
+
     void HandleByVal(CCState *, unsigned &, unsigned) const override;
 
     unsigned getRegisterByName(const char* RegName, EVT VT,
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index bfb4c775205de6..e38bef4663a7d3 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -653,6 +653,16 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
 
     MIB.addImm(0);
 
+    // If I has an MCSymbol operand (used by asm printer, to emit R_MIPS_JALR),
+    // add it to the new instruction.
+    for (unsigned J = I->getDesc().getNumOperands(), E = I->getNumOperands();
+         J < E; ++J) {
+      const MachineOperand &MO = I->getOperand(J);
+      if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR))
+        MIB.addSym(MO.getMCSymbol(), MipsII::MO_JALR);
+    }
+
+
   } else {
     for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
       if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
@@ -825,7 +835,8 @@ MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
     {MO_GOT_HI16,     "mips-got-hi16"},
     {MO_GOT_LO16,     "mips-got-lo16"},
     {MO_CALL_HI16,    "mips-call-hi16"},
-    {MO_CALL_LO16,    "mips-call-lo16"}
+    {MO_CALL_LO16,    "mips-call-lo16"},
+    {MO_JALR,         "mips-jalr"}
   };
   return makeArrayRef(Flags);
 }
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index d9398b7d6024a5..46721e6cb9e5fa 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -1623,11 +1623,15 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in {
   class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
                           Register RetReg, RegisterOperand ResRO = RO>:
     PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
-    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
+    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)> {
+    let hasPostISelHook = 1;
+  }
 
   class JumpLinkReg<string opstr, RegisterOperand RO>:
     InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
-           [], II_JALR, FrmR, opstr>;
+           [], II_JALR, FrmR, opstr> {
+    let hasPostISelHook = 1;
+  }
 
   class BGEZAL_FT<string opstr, DAGOperand opnd,
                   RegisterOperand RO> :
@@ -1646,7 +1650,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
 
   class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
     PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
-    PseudoInstExpansion<(JumpInst RO:$rs)>;
+    PseudoInstExpansion<(JumpInst RO:$rs)> {
+    let hasPostISelHook = 1;
+  }
 }
 
 class BAL_BR_Pseudo<Instruction RealInst, DAGOperand opnd> :
diff --git a/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 46b37ceae39181..4a7c0ce2be19b8 100644
--- a/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -117,6 +117,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case MipsII::MO_CALL_LO16:
     TargetKind = MipsMCExpr::MEK_CALL_LO16;
     break;
+  case MipsII::MO_JALR:
+    return MCOperand();
   }
 
   switch (MOTy) {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2a825c1316f3bb..607e55bf71c872 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -5618,55 +5618,96 @@ SDValue SystemZTargetLowering::combineBSWAP(
 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
   // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
   // set by the CCReg instruction using the CCValid / CCMask masks,
-  // If the CCReg instruction is itself a (ICMP (SELECT_CCMASK)) testing
-  // the condition code set by some other instruction, see whether we
-  // can directly use that condition code.
-  bool Invert = false;
+  // If the CCReg instruction is itself a ICMP testing the condition
+  // code set by some other instruction, see whether we can directly
+  // use that condition code.
 
-  // Verify that we have an appropriate mask for a EQ or NE comparison.
+  // Verify that we have an ICMP against some constant.
   if (CCValid != SystemZ::CCMASK_ICMP)
     return false;
-  if (CCMask == SystemZ::CCMASK_CMP_NE)
-    Invert = !Invert;
-  else if (CCMask != SystemZ::CCMASK_CMP_EQ)
-    return false;
-
-  // Verify that we have an ICMP that is the user of a SELECT_CCMASK.
-  SDNode *ICmp = CCReg.getNode();
+  auto *ICmp = CCReg.getNode();
   if (ICmp->getOpcode() != SystemZISD::ICMP)
     return false;
-  SDNode *Select = ICmp->getOperand(0).getNode();
-  if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
+  auto *CompareLHS = ICmp->getOperand(0).getNode();
+  auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
+  if (!CompareRHS)
     return false;
 
-  // Verify that the ICMP compares against one of select values.
-  auto *CompareVal = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
-  if (!CompareVal)
-    return false;
-  auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
-  if (!TrueVal)
-    return false;
-  auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
-  if (!FalseVal)
-    return false;
-  if (CompareVal->getZExtValue() == FalseVal->getZExtValue())
-    Invert = !Invert;
-  else if (CompareVal->getZExtValue() != TrueVal->getZExtValue())
-    return false;
+  // Optimize the case where CompareLHS is a SELECT_CCMASK.
+  if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
+    // Verify that we have an appropriate mask for a EQ or NE comparison.
+    bool Invert = false;
+    if (CCMask == SystemZ::CCMASK_CMP_NE)
+      Invert = !Invert;
+    else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+      return false;
 
-  // Compute the effective CC mask for the new branch or select.
-  auto *NewCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
-  auto *NewCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
-  if (!NewCCValid || !NewCCMask)
-    return false;
-  CCValid = NewCCValid->getZExtValue();
-  CCMask = NewCCMask->getZExtValue();
-  if (Invert)
-    CCMask ^= CCValid;
+    // Verify that the ICMP compares against one of select values.
+    auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
+    if (!TrueVal)
+      return false;
+    auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
+    if (!FalseVal)
+      return false;
+    if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
+      Invert = !Invert;
+    else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
+      return false;
 
-  // Return the updated CCReg link.
-  CCReg = Select->getOperand(4);
-  return true;
+    // Compute the effective CC mask for the new branch or select.
+    auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
+    auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
+    if (!NewCCValid || !NewCCMask)
+      return false;
+    CCValid = NewCCValid->getZExtValue();
+    CCMask = NewCCMask->getZExtValue();
+    if (Invert)
+      CCMask ^= CCValid;
+
+    // Return the updated CCReg link.
+    CCReg = CompareLHS->getOperand(4);
+    return true;
+  }
+
+  // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
+  if (CompareLHS->getOpcode() == ISD::SRA) {
+    auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
+    if (!SRACount || SRACount->getZExtValue() != 30)
+      return false;
+    auto *SHL = CompareLHS->getOperand(0).getNode();
+    if (SHL->getOpcode() != ISD::SHL)
+      return false;
+    auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
+    if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
+      return false;
+    auto *IPM = SHL->getOperand(0).getNode();
+    if (IPM->getOpcode() != SystemZISD::IPM)
+      return false;
+
+    // Avoid introducing CC spills (because SRA would clobber CC).
+    if (!CompareLHS->hasOneUse())
+      return false;
+    // Verify that the ICMP compares against zero.
+    if (CompareRHS->getZExtValue() != 0)
+      return false;
+
+    // Compute the effective CC mask for the new branch or select.
+    switch (CCMask) {
+    case SystemZ::CCMASK_CMP_EQ: break;
+    case SystemZ::CCMASK_CMP_NE: break;
+    case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
+    case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
+    case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
+    case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
+    default: return false;
+    }
+
+    // Return the updated CCReg link.
+    CCReg = IPM->getOperand(0);
+    return true;
+  }
+
+  return false;
 }
 
 SDValue SystemZTargetLowering::combineBR_CCMASK(
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index b03b4edaa4abc8..8aab5c2c49998e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -557,80 +557,6 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
   return false;
 }
 
-// If Reg is a virtual register, return its definition, otherwise return null.
-static MachineInstr *getDef(unsigned Reg,
-                            const MachineRegisterInfo *MRI) {
-  if (TargetRegisterInfo::isPhysicalRegister(Reg))
-    return nullptr;
-  return MRI->getUniqueVRegDef(Reg);
-}
-
-// Return true if MI is a shift of type Opcode by Imm bits.
-static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) {
-  return (MI->getOpcode() == Opcode &&
-          !MI->getOperand(2).getReg() &&
-          MI->getOperand(3).getImm() == Imm);
-}
-
-// If the destination of MI has no uses, delete it as dead.
-static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
-  if (MRI->use_nodbg_empty(MI->getOperand(0).getReg()))
-    MI->eraseFromParent();
-}
-
-// Compare compares SrcReg against zero.  Check whether SrcReg contains
-// the result of an IPM sequence whose input CC survives until Compare,
-// and whether Compare is therefore redundant.  Delete it and return
-// true if so.
-static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg,
-                                  const MachineRegisterInfo *MRI,
-                                  const TargetRegisterInfo *TRI) {
-  MachineInstr *LGFR = nullptr;
-  MachineInstr *RLL = getDef(SrcReg, MRI);
-  if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
-    LGFR = RLL;
-    RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
-  }
-  if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
-    return false;
-
-  MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI);
-  if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC))
-    return false;
-
-  MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI);
-  if (!IPM || IPM->getOpcode() != SystemZ::IPM)
-    return false;
-
-  // Check that there are no assignments to CC between the IPM and Compare,
-  if (IPM->getParent() != Compare.getParent())
-    return false;
-  MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator();
-  for (++MBBI; MBBI != MBBE; ++MBBI) {
-    MachineInstr &MI = *MBBI;
-    if (MI.modifiesRegister(SystemZ::CC, TRI))
-      return false;
-  }
-
-  Compare.eraseFromParent();
-  if (LGFR)
-    eraseIfDead(LGFR, MRI);
-  eraseIfDead(RLL, MRI);
-  eraseIfDead(SRL, MRI);
-  eraseIfDead(IPM, MRI);
-
-  return true;
-}
-
-bool SystemZInstrInfo::optimizeCompareInstr(
-    MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask,
-    int Value, const MachineRegisterInfo *MRI) const {
-  assert(!SrcReg2 && "Only optimizing constant comparisons so far");
-  bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0;
-  return Value == 0 && !IsLogical &&
-         removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
-}
-
 bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
                                        ArrayRef<MachineOperand> Pred,
                                        unsigned TrueReg, unsigned FalseReg,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 216139eb7c7969..0392430ed872c3 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -208,9 +208,6 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                         int *BytesAdded = nullptr) const override;
   bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
                       unsigned &SrcReg2, int &Mask, int &Value) const override;
-  bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
-                            unsigned SrcReg2, int Mask, int Value,
-                            const MachineRegisterInfo *MRI) const override;
   bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
                        unsigned, unsigned, int&, int&, int&) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index e0d7bca9a94b9e..4592e82eea718b 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -164,17 +164,17 @@ static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
 }
 
 // Convert the current CC value into an integer that is 0 if CC == 0,
-// less than zero if CC == 1 and greater than zero if CC >= 2.
+// greater than zero if CC == 1 and less than zero if CC >= 2.
 // The sequence starts with IPM, which puts CC into bits 29 and 28
 // of an integer and clears bits 30 and 31.
 static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg,
                               SelectionDAG &DAG) {
   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
-  SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
-                            DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
-  SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
-                             DAG.getConstant(31, DL, MVT::i32));
-  return ROTL;
+  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM,
+                            DAG.getConstant(30 - SystemZ::IPM_CC, DL, MVT::i32));
+  SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL,
+                            DAG.getConstant(30, DL, MVT::i32));
+  return SRA;
 }
 
 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
@@ -184,7 +184,8 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
   if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
     uint64_t Bytes = CSize->getZExtValue();
     assert(Bytes > 0 && "Caller should have handled 0-size case");
-    SDValue CCReg = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+    // Swap operands to invert CC == 1 vs. CC == 2 cases.
+    SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
     Chain = CCReg.getValue(1);
     return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
   }
@@ -232,7 +233,8 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp(
     SDValue Src2, MachinePointerInfo Op1PtrInfo,
     MachinePointerInfo Op2PtrInfo) const {
   SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::i32, MVT::Other);
-  SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
+  // Swap operands to invert CC == 1 vs. CC == 2 cases.
+  SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src2, Src1,
                                DAG.getConstant(0, DL, MVT::i32));
   SDValue CCReg = Unused.getValue(1);
   Chain = Unused.getValue(2);
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 50143fb0ece362..7caeebb1a9aad7 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -113,8 +113,15 @@ void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
 }
 
 void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym,
-                                                    StringRef ModuleName) {
-  OS << "\t.import_module\t" << Sym->getName() << ", " << ModuleName << '\n';
+                                                    StringRef ImportModule) {
+  OS << "\t.import_module\t" << Sym->getName() << ", "
+                             << ImportModule << '\n';
+}
+
+void WebAssemblyTargetAsmStreamer::emitImportName(const MCSymbolWasm *Sym,
+                                                  StringRef ImportName) {
+  OS << "\t.import_name\t" << Sym->getName() << ", "
+                           << ImportName << '\n';
 }
 
 void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 3073938118b458..2ee9956c8e3840 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -45,7 +45,10 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer {
   virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
   /// .import_module
   virtual void emitImportModule(const MCSymbolWasm *Sym,
-                                StringRef ModuleName) = 0;
+                                StringRef ImportModule) = 0;
+  /// .import_name
+  virtual void emitImportName(const MCSymbolWasm *Sym,
+                              StringRef ImportName) = 0;
 
 protected:
   void emitValueType(wasm::ValType Type);
@@ -67,7 +70,8 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
   void emitIndIdx(const MCExpr *Value) override;
   void emitGlobalType(const MCSymbolWasm *Sym) override;
   void emitEventType(const MCSymbolWasm *Sym) override;
-  void emitImportModule(const MCSymbolWasm *Sym, StringRef ModuleName) override;
+  void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override;
+  void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override;
 };
 
 /// This part is for Wasm object output
@@ -82,7 +86,9 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer {
   void emitGlobalType(const MCSymbolWasm *Sym) override {}
   void emitEventType(const MCSymbolWasm *Sym) override {}
   void emitImportModule(const MCSymbolWasm *Sym,
-                        StringRef ModuleName) override {}
+                        StringRef ImportModule) override {}
+  void emitImportName(const MCSymbolWasm *Sym,
+                      StringRef ImportName) override {}
 };
 
 /// This part is for null output
@@ -98,6 +104,7 @@ class WebAssemblyTargetNullStreamer final : public WebAssemblyTargetStreamer {
   void emitGlobalType(const MCSymbolWasm *) override {}
   void emitEventType(const MCSymbolWasm *) override {}
   void emitImportModule(const MCSymbolWasm *, StringRef) override {}
+  void emitImportName(const MCSymbolWasm *, StringRef) override {}
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index c4f03dfa7f9e45..b492d114695072 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -111,9 +111,16 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
           F.hasFnAttribute("wasm-import-module")) {
         StringRef Name =
             F.getFnAttribute("wasm-import-module").getValueAsString();
-        Sym->setModuleName(Name);
+        Sym->setImportModule(Name);
         getTargetStreamer()->emitImportModule(Sym, Name);
       }
+      if (TM.getTargetTriple().isOSBinFormatWasm() &&
+          F.hasFnAttribute("wasm-import-name")) {
+        StringRef Name =
+            F.getFnAttribute("wasm-import-name").getValueAsString();
+        Sym->setImportName(Name);
+        getTargetStreamer()->emitImportName(Sym, Name);
+      }
     }
   }
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index 1a416520f97d09..13f37f611ed058 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -36,11 +36,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-fix-function-bitcasts"
 
-static cl::opt<bool>
-    TemporaryWorkarounds("wasm-temporary-workarounds",
-                         cl::desc("Apply certain temporary workarounds"),
-                         cl::init(true), cl::Hidden);
-
 namespace {
 class FixFunctionBitcasts final : public ModulePass {
   StringRef getPassName() const override {
@@ -227,6 +222,17 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
   return Wrapper;
 }
 
+// Test whether a main function with type FuncTy should be rewritten to have
+// type MainTy.
+bool shouldFixMainFunction(FunctionType *FuncTy, FunctionType *MainTy) {
+  // Only fix the main function if it's the standard zero-arg form. That way,
+  // the standard cases will work as expected, and users will see signature
+  // mismatches from the linker for non-standard cases.
+  return FuncTy->getReturnType() == MainTy->getReturnType() &&
+         FuncTy->getNumParams() == 0 &&
+         !FuncTy->isVarArg();
+}
+
 bool FixFunctionBitcasts::runOnModule(Module &M) {
   LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n");
 
@@ -243,14 +249,14 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
     // "int main(int argc, char *argv[])", create an artificial call with it
     // bitcasted to that type so that we generate a wrapper for it, so that
     // the C runtime can call it.
-    if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") {
+    if (F.getName() == "main") {
       Main = &F;
       LLVMContext &C = M.getContext();
       Type *MainArgTys[] = {Type::getInt32Ty(C),
                             PointerType::get(Type::getInt8PtrTy(C), 0)};
       FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
                                                /*isVarArg=*/false);
-      if (F.getFunctionType() != MainTy) {
+      if (shouldFixMainFunction(F.getFunctionType(), MainTy)) {
         LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: "
                           << *F.getFunctionType() << "\n");
         Value *Args[] = {UndefValue::get(MainArgTys[0]),
@@ -298,12 +304,18 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
     Main->setName("__original_main");
     Function *MainWrapper =
         cast<Function>(CallMain->getCalledValue()->stripPointerCasts());
-    MainWrapper->setName("main");
-    MainWrapper->setLinkage(Main->getLinkage());
-    MainWrapper->setVisibility(Main->getVisibility());
-    Main->setLinkage(Function::PrivateLinkage);
-    Main->setVisibility(Function::DefaultVisibility);
     delete CallMain;
+    if (Main->isDeclaration()) {
+      // The wrapper is not needed in this case as we don't need to export
+      // it to anyone else.
+      MainWrapper->eraseFromParent();
+    } else {
+      // Otherwise give the wrapper the same linkage as the original main
+      // function, so that it can be called from the same places.
+      MainWrapper->setName("main");
+      MainWrapper->setLinkage(Main->getLinkage());
+      MainWrapper->setVisibility(Main->getVisibility());
+    }
   }
 
   return true;
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 899b50d0f78f3e..81391b96d12676 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1115,8 +1115,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
   }
 
   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
-  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
-    RegNo = X86::ST0;
+  if (RegNo == X86::ST0) {
     Parser.Lex(); // Eat 'st'
 
     // Check to see if we have '(4)' after %st.
diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 0e861d5ddbc9d9..3a074818c762b6 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -200,3 +200,14 @@ void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
   O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
     << markup(">");
 }
+
+void X86ATTInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  unsigned Reg = Op.getReg();
+  // Override the default printing to print st(0) instead st.
+  if (Reg == X86::ST0)
+    OS << markup("<reg:") << "%st(0)" << markup(">");
+  else
+    printRegName(OS, Reg);
+}
diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 57422bc9a0b2a3..584dc9c286e60e 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -44,6 +44,7 @@ class X86ATTInstPrinter final : public X86InstPrinterCommon {
   void printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
   void printDstIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
   void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
 
   void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
     printMemReference(MI, OpNo, O);
diff --git a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 044b715641520e..b31f8ab80838d9 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -160,3 +160,14 @@ void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
 
   O << formatImm(MI->getOperand(Op).getImm() & 0xff);
 }
+
+void X86IntelInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  unsigned Reg = Op.getReg();
+  // Override the default printing to print st(0) instead st.
+  if (Reg == X86::ST0)
+    OS << "st(0)";
+  else
+    printRegName(OS, Reg);
+}
diff --git a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 3b34a8052becba..fe52bd482a262d 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -39,6 +39,7 @@ class X86IntelInstPrinter final : public X86InstPrinterCommon {
   void printSrcIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
 
   void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
     printMemReference(MI, OpNo, O);
diff --git a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
index 3654bf04f4e988..6bee20b617dde9 100644
--- a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -27,6 +27,14 @@ using namespace llvm;
 
 #define DEBUG_TYPE "x86-discriminate-memops"
 
+static cl::opt<bool> EnableDiscriminateMemops(
+    DEBUG_TYPE, cl::init(false),
+    cl::desc("Generate unique debug info for each instruction with a memory "
+             "operand. Should be enabled for profile-drived cache prefetching, "
+             "both in the build of the binary being profiled, as well as in "
+             "the build of the binary consuming the profile."),
+    cl::Hidden);
+
 namespace {
 
 using Location = std::pair<StringRef, unsigned>;
@@ -67,6 +75,9 @@ char X86DiscriminateMemOps::ID = 0;
 X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
 
 bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+  if (!EnableDiscriminateMemops)
+    return false;
+
   DISubprogram *FDI = MF.getFunction().getSubprogram();
   if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
     return false;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b6a692ee187d83..f4f37a894620ed 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27202,6 +27202,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VSHLI:              return "X86ISD::VSHLI";
   case X86ISD::VSRLI:              return "X86ISD::VSRLI";
   case X86ISD::VSRAI:              return "X86ISD::VSRAI";
+  case X86ISD::VSHLV:              return "X86ISD::VSHLV";
+  case X86ISD::VSRLV:              return "X86ISD::VSRLV";
   case X86ISD::VSRAV:              return "X86ISD::VSRAV";
   case X86ISD::VROTLI:             return "X86ISD::VROTLI";
   case X86ISD::VROTRI:             return "X86ISD::VROTRI";
@@ -42505,6 +42507,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     if (StringRef("{flags}").equals_lower(Constraint))
       return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
 
+    // dirflag -> DF
+    if (StringRef("{dirflag}").equals_lower(Constraint))
+      return std::make_pair(X86::DF, &X86::DFCCRRegClass);
+
+    // fpsr -> FPSW
+    if (StringRef("{fpsr}").equals_lower(Constraint))
+      return std::make_pair(X86::FPSW, &X86::FPCCRRegClass);
+
     // 'A' means [ER]AX + [ER]DX.
     if (Constraint == "A") {
       if (Subtarget.is64Bit())
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 910acd80e8b8cb..66d5d43946a264 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -315,10 +315,8 @@ namespace llvm {
       // Vector shift elements
       VSHL, VSRL, VSRA,
 
-      // Vector variable shift right arithmetic.
-      // Unlike ISD::SRA, in case shift count greater then element size
-      // use sign bit to fill destination data element.
-      VSRAV,
+      // Vector variable shift
+      VSHLV, VSRLV, VSRAV,
 
       // Vector shift elements by immediate
       VSHLI, VSRLI, VSRAI,
diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 30b46a09ef0f90..8bd57aa2278bad 100644
--- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -34,7 +34,8 @@ using namespace sampleprof;
 
 static cl::opt<std::string>
     PrefetchHintsFile("prefetch-hints-file",
-                      cl::desc("Path to the prefetch hints profile."),
+                      cl::desc("Path to the prefetch hints profile. See also "
+                               "-x86-discriminate-memops"),
                       cl::Hidden);
 namespace {
 
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 7423cb85acd250..85676f102be0ac 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6445,52 +6445,53 @@ defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoV
 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
 
 // Special handing for handling VPSRAV intrinsics.
-multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
-                                         list<Predicate> p> {
+multiclass avx512_var_shift_int_lowering<string InstrStr, SDNode OpNode,
+                                         X86VectorVTInfo _, list<Predicate> p> {
   let Predicates = p in {
-    def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
+    def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
               (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
                _.RC:$src2)>;
-    def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
+    def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
                _.RC:$src1, addr:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
+                     (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)),
               (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
                _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
+                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
                      _.RC:$src0)),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
+                     (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
               (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
                _.RC:$src1, _.RC:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
+                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
                      _.ImmAllZerosV)),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
                _.RC:$src1, addr:$src2)>;
   }
 }
 
-multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
-                                         list<Predicate> p> :
-           avx512_var_shift_int_lowering<InstrStr, _, p> {
+multiclass avx512_var_shift_int_lowering_mb<string InstrStr, SDNode OpNode,
+                                            X86VectorVTInfo _,
+                                            list<Predicate> p> :
+           avx512_var_shift_int_lowering<InstrStr, OpNode, _, p> {
   let Predicates = p in {
-    def : Pat<(_.VT (X86vsrav _.RC:$src1,
+    def : Pat<(_.VT (OpNode _.RC:$src1,
                      (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
                _.RC:$src1, addr:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1,
+                     (OpNode _.RC:$src1,
                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
                      _.RC:$src0)),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1,
+                     (OpNode _.RC:$src1,
                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
                      _.ImmAllZerosV)),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
@@ -6498,15 +6499,47 @@ multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
   }
 }
 
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
+multiclass avx512_var_shift_int_lowering_vl<string InstrStr, SDNode OpNode,
+                                            AVX512VLVectorVTInfo VTInfo,
+                                            Predicate p> {
+  defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info512, [p]>;
+  defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info256,
+                                       [HasVLX, p]>;
+  defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info128,
+                                       [HasVLX, p]>;
+}
+
+multiclass avx512_var_shift_int_lowering_mb_vl<string InstrStr, SDNode OpNode,
+                                               AVX512VLVectorVTInfo VTInfo,
+                                               Predicate p> {
+  defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info512, [p]>;
+  defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info256,
+                                          [HasVLX, p]>;
+  defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info128,
+                                          [HasVLX, p]>;
+}
+
+defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info,
+                                        HasBWI>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav,
+                                           avx512vl_i32_info, HasAVX512>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav,
+                                           avx512vl_i64_info, HasAVX512>;
+
+defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info,
+                                        HasBWI>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv,
+                                           avx512vl_i32_info, HasAVX512>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv,
+                                           avx512vl_i64_info, HasAVX512>;
+
+defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info,
+                                        HasBWI>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv,
+                                           avx512vl_i32_info, HasAVX512>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv,
+                                           avx512vl_i64_info, HasAVX512>;
+
 
 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
 let Predicates = [HasAVX512, NoVLX] in {
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index 5912a31996131b..8e12efff77eab8 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -230,7 +230,7 @@ def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src),
 } // mayLoad = 1, hasSideEffects = 1
 }
 
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
 // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
 // resources.
 let hasNoSchedulingInfo = 1 in {
@@ -258,42 +258,42 @@ defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
 } // Defs = [FPSW]
 
 class FPST0rInst<Format fp, string asm>
-  : FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
+  : FPI<0xD8, fp, (outs), (ins RSTi:$op), asm>;
 class FPrST0Inst<Format fp, string asm>
-  : FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
+  : FPI<0xDC, fp, (outs), (ins RSTi:$op), asm>;
 class FPrST0PInst<Format fp, string asm>
-  : FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
+  : FPI<0xDE, fp, (outs), (ins RSTi:$op), asm>;
 
 // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
 // of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
 // we have to put some 'r's in and take them out of weird places.
-let SchedRW = [WriteFAdd] in {
-def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t$op">;
-def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
-def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t$op">;
-def SUBR_FST0r  : FPST0rInst <MRM5r, "fsubr\t$op">;
-def SUB_FrST0   : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
-def SUB_FPrST0  : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
-def SUB_FST0r   : FPST0rInst <MRM4r, "fsub\t$op">;
-def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
-def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
+let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in {
+def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t{$op, %st|st, $op}">;
+def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st, $op|$op, st}">;
+def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t{%st, $op|$op, st}">;
+def SUBR_FST0r  : FPST0rInst <MRM5r, "fsubr\t{$op, %st|st, $op}">;
+def SUB_FrST0   : FPrST0Inst <MRM5r, "fsub{r}\t{%st, $op|$op, st}">;
+def SUB_FPrST0  : FPrST0PInst<MRM5r, "fsub{r}p\t{%st, $op|$op, st}">;
+def SUB_FST0r   : FPST0rInst <MRM4r, "fsub\t{$op, %st|st, $op}">;
+def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st, $op|$op, st}">;
+def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t{%st, $op|$op, st}">;
 } // SchedRW
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in {
 def COM_FST0r   : FPST0rInst <MRM2r, "fcom\t$op">;
 def COMP_FST0r  : FPST0rInst <MRM3r, "fcomp\t$op">;
 } // SchedRW
-let SchedRW = [WriteFMul] in {
-def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t$op">;
-def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
-def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t$op">;
+let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in {
+def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t{$op, %st|st, $op}">;
+def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st, $op|$op, st}">;
+def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t{%st, $op|$op, st}">;
 } // SchedRW
-let SchedRW = [WriteFDiv] in {
-def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t$op">;
-def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
-def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
-def DIV_FST0r   : FPST0rInst <MRM6r, "fdiv\t$op">;
-def DIVR_FrST0  : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
-def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
+let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in {
+def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t{$op, %st|st, $op}">;
+def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st, $op|$op, st}">;
+def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t{%st, $op|$op, st}">;
+def DIV_FST0r   : FPST0rInst <MRM6r, "fdiv\t{$op, %st|st, $op}">;
+def DIVR_FrST0  : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st, $op|$op, st}">;
+def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t{%st, $op|$op, st}">;
 } // SchedRW
 
 // Unary operations.
@@ -307,7 +307,7 @@ def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
 def _F     : FPI<0xD9, fp, (outs), (ins), asmstring>;
 }
 
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
 
 let SchedRW = [WriteFSign] in {
 defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
@@ -335,7 +335,7 @@ def TST_F  : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
 
 // Versions of FP instructions that take a single memory operand.  Added for the
 //   disassembler; remove as they are included with patterns elsewhere.
-let SchedRW = [WriteFComLd] in {
+let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in {
 def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
 def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
 
@@ -398,22 +398,22 @@ defm CMOVNP : FPCMov<X86_COND_NP>;
 
 let Predicates = [HasCMov] in {
 // These are not factored because there's no clean way to pass DA/DB.
-def CMOVB_F  : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
-                  "fcmovb\t{$op, %st(0)|st(0), $op}">;
-def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
-                  "fcmovbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVE_F  : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
-                  "fcmove\t{$op, %st(0)|st(0), $op}">;
-def CMOVP_F  : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
-                  "fcmovu\t{$op, %st(0)|st(0), $op}">;
-def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
-                  "fcmovnb\t{$op, %st(0)|st(0), $op}">;
-def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
-                  "fcmovnbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
-                  "fcmovne\t{$op, %st(0)|st(0), $op}">;
-def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
-                  "fcmovnu\t{$op, %st(0)|st(0), $op}">;
+def CMOVB_F  : FPI<0xDA, MRM0r, (outs), (ins RSTi:$op),
+                  "fcmovb\t{$op, %st|st, $op}">;
+def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RSTi:$op),
+                  "fcmovbe\t{$op, %st|st, $op}">;
+def CMOVE_F  : FPI<0xDA, MRM1r, (outs), (ins RSTi:$op),
+                  "fcmove\t{$op, %st|st, $op}">;
+def CMOVP_F  : FPI<0xDA, MRM3r, (outs), (ins RSTi:$op),
+                  "fcmovu\t{$op, %st|st, $op}">;
+def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RSTi:$op),
+                  "fcmovnb\t{$op, %st|st, $op}">;
+def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RSTi:$op),
+                  "fcmovnbe\t{$op, %st|st, $op}">;
+def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RSTi:$op),
+                  "fcmovne\t{$op, %st|st, $op}">;
+def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RSTi:$op),
+                  "fcmovnu\t{$op, %st|st, $op}">;
 } // Predicates = [HasCMov]
 } // SchedRW
 
@@ -454,7 +454,7 @@ def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
                   [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
 } // SchedRW
 
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStore], Uses = [FPCW] in {
 def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
                   [(store RFP32:$src, addr:$op)]>;
 def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
@@ -489,7 +489,7 @@ def IST_Fp16m80  : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
 def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
 def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
 } // mayStore
-} // SchedRW
+} // SchedRW, Uses = [FPCW]
 
 let mayLoad = 1, SchedRW = [WriteLoad] in {
 def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
@@ -499,7 +499,7 @@ def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
 def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
 def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
 }
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
 def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
 def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
 def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
@@ -513,7 +513,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
 }
 
 // FISTTP requires SSE3 even though it's a FPStack op.
-let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
+let Predicates = [HasSSE3], SchedRW = [WriteStore], Uses = [FPCW] in {
 def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
                     [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
 def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
@@ -534,7 +534,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
                     [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
 } // Predicates = [HasSSE3]
 
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
 def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
 def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
 def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
@@ -542,10 +542,10 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">
 
 // FP Stack manipulation instructions.
 let SchedRW = [WriteMove] in {
-def LD_Frr   : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
-def ST_Frr   : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
-def ST_FPrr  : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
-def XCH_F    : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
+def LD_Frr   : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">;
+def ST_Frr   : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">;
+def ST_FPrr  : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">;
+def XCH_F    : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
 }
 
 // Floating point constant loads.
@@ -570,7 +570,7 @@ def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
 let SchedRW = [WriteFLD1] in
 def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
 
-let SchedRW = [WriteFLDC], Defs = [FPSW] in {
+let SchedRW = [WriteFLDC] in {
 def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
 def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
 def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
@@ -579,7 +579,7 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
 } // SchedRW
 
 // Floating point compares.
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], Uses = [FPCW] in {
 def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
 def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@@ -591,37 +591,37 @@ def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
 
 let SchedRW = [WriteFCom] in {
 // CC = ST(0) cmp ST(i)
-let Defs = [EFLAGS, FPSW] in {
-let Predicates = [FPStackf32, HasCMov] in
-def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
-let Predicates = [FPStackf64, HasCMov] in
-def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
-let Predicates = [HasCMov] in
+let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
+def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>,
+                  Requires<[FPStackf32, HasCMov]>;
+def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>,
+                  Requires<[FPStackf64, HasCMov]>;
 def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>,
+                  Requires<[HasCMov]>;
 }
 
-let Defs = [FPSW], Uses = [ST0] in {
+let Defs = [FPSW], Uses = [ST0, FPCW] in {
 def UCOM_Fr    : FPI<0xDD, MRM4r,    // FPSW = cmp ST(0) with ST(i)
-                    (outs), (ins RST:$reg), "fucom\t$reg">;
+                    (outs), (ins RSTi:$reg), "fucom\t$reg">;
 def UCOM_FPr   : FPI<0xDD, MRM5r,    // FPSW = cmp ST(0) with ST(i), pop
-                    (outs), (ins RST:$reg), "fucomp\t$reg">;
+                    (outs), (ins RSTi:$reg), "fucomp\t$reg">;
 def UCOM_FPPr  : FPI<0xDA, MRM_E9,       // cmp ST(0) with ST(1), pop, pop
                     (outs), (ins), "fucompp">;
 }
 
-let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
+let Defs = [EFLAGS, FPSW], Uses = [ST0, FPCW] in {
 def UCOM_FIr   : FPI<0xDB, MRM5r,     // CC = cmp ST(0) with ST(i)
-                    (outs), (ins RST:$reg), "fucomi\t$reg">;
+                    (outs), (ins RSTi:$reg), "fucomi\t{$reg, %st|st, $reg}">;
 def UCOM_FIPr  : FPI<0xDF, MRM5r,     // CC = cmp ST(0) with ST(i), pop
-                    (outs), (ins RST:$reg), "fucompi\t$reg">;
-}
+                    (outs), (ins RSTi:$reg), "fucompi\t{$reg, %st|st, $reg}">;
 
-let Defs = [EFLAGS, FPSW] in {
-def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
-def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
+def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RSTi:$reg),
+                  "fcomi\t{$reg, %st|st, $reg}">;
+def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),
+                   "fcompi\t{$reg, %st|st, $reg}">;
 }
 } // SchedRW
 
@@ -631,12 +631,12 @@ let Defs = [AX], Uses = [FPSW] in
 def FNSTSW16r : I<0xDF, MRM_E0,                  // AX = fp flags
                   (outs), (ins), "fnstsw\t{%ax|ax}",
                   [(set AX, (X86fp_stsw FPSW))]>;
-let Defs = [FPSW] in
+let Defs = [FPSW], Uses = [FPCW] in
 def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
                   (outs), (ins i16mem:$dst), "fnstcw\t$dst",
                   [(X86fp_cwd_get16 addr:$dst)]>;
 } // SchedRW
-let Defs = [FPSW], mayLoad = 1 in
+let Defs = [FPSW,FPCW], mayLoad = 1 in
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                   (outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
                 Sched<[WriteLoad]>;
@@ -645,8 +645,8 @@ def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
 let SchedRW = [WriteMicrocoded] in {
 let Defs = [FPSW] in {
 def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
-def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
-def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
+def FFREE : FPI<0xDD, MRM0r, (outs), (ins RSTi:$reg), "ffree\t$reg">;
+def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RSTi:$reg), "ffreep\t$reg">;
 
 // Clear exceptions
 def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 11a27ba90586be..3d508e2c34f39a 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -198,6 +198,8 @@ def X86vsra    : SDNode<"X86ISD::VSRA", X86vshiftuniform>;
 def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                              SDTCisSameAs<0,2>, SDTCisInt<0>]>;
 
+def X86vshlv   : SDNode<"X86ISD::VSHLV", X86vshiftvariable>;
+def X86vsrlv   : SDNode<"X86ISD::VSRLV", X86vshiftvariable>;
 def X86vsrav   : SDNode<"X86ISD::VSRAV", X86vshiftvariable>;
 
 def X86vshli   : SDNode<"X86ISD::VSHLI", X86vshiftimm>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index e53f83baa3c62d..4ec4d566ca9986 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -3231,39 +3231,39 @@ def : InstAlias<"fucompi",      (UCOM_FIPr   ST1), 0>;
 // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
 // gas.
 multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
- def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
-                 (Inst RST:$op), EmitAlias>;
- def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
+ def : InstAlias<!strconcat(Mnemonic, "\t$op"),
+                 (Inst RSTi:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, "\t{%st, %st|st, st}"),
                  (Inst ST0), EmitAlias>;
 }
 
-defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
+defm : FpUnaryAlias<"fadd",   ADD_FST0r, 0>;
 defm : FpUnaryAlias<"faddp",  ADD_FPrST0, 0>;
-defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
-defm : FpUnaryAlias<"fsub{|r}p",  SUBR_FPrST0>;
-defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
-defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
-defm : FpUnaryAlias<"fmul",   MUL_FST0r>;
-defm : FpUnaryAlias<"fmulp",  MUL_FPrST0>;
-defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
-defm : FpUnaryAlias<"fdiv{|r}p",  DIVR_FPrST0>;
-defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
-defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
+defm : FpUnaryAlias<"fsub",   SUB_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{|r}p",  SUBR_FPrST0, 0>;
+defm : FpUnaryAlias<"fsubr",  SUBR_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0, 0>;
+defm : FpUnaryAlias<"fmul",   MUL_FST0r, 0>;
+defm : FpUnaryAlias<"fmulp",  MUL_FPrST0, 0>;
+defm : FpUnaryAlias<"fdiv",   DIV_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{|r}p",  DIVR_FPrST0, 0>;
+defm : FpUnaryAlias<"fdivr",  DIVR_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0, 0>;
 defm : FpUnaryAlias<"fcomi",   COM_FIr, 0>;
 defm : FpUnaryAlias<"fucomi",  UCOM_FIr, 0>;
-defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
-defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
+defm : FpUnaryAlias<"fcompi",   COM_FIPr, 0>;
+defm : FpUnaryAlias<"fucompi",  UCOM_FIPr, 0>;
 
 
-// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// Handle "f{mulp,addp} $op, %st(0)" the same as "f{mulp,addp} $op", since they
 // commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
 // solely because gas supports it.
-def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
-def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
-def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
-def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
-def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
-def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
+def : InstAlias<"faddp\t{$op, %st|st, $op}", (ADD_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fmulp\t{$op, %st|st, $op}", (MUL_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{|r}p\t{$op, %st|st, $op}", (SUBR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{r|}p\t{$op, %st|st, $op}", (SUB_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{|r}p\t{$op, %st|st, $op}", (DIVR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{r|}p\t{$op, %st|st, $op}", (DIV_FPrST0 RSTi:$op), 0>;
 
 def : InstAlias<"fnstsw"     , (FNSTSW16r), 0>;
 
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index e2bcd18ce66079..ddfc369b1180ec 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8318,7 +8318,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
 // Variable Bit Shifts
 //
 multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                          ValueType vt128, ValueType vt256> {
+                          SDNode IntrinNode, ValueType vt128, ValueType vt256> {
   def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
              (ins VR128:$src1, VR128:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -8347,23 +8347,23 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
                        (vt256 (load addr:$src2)))))]>,
              VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
                                    SchedWriteVarVecShift.YMM.ReadAfterFold]>;
+
+  def : Pat<(vt128 (IntrinNode VR128:$src1, VR128:$src2)),
+            (!cast<Instruction>(NAME#"rr") VR128:$src1, VR128:$src2)>;
+  def : Pat<(vt128 (IntrinNode VR128:$src1, (load addr:$src2))),
+            (!cast<Instruction>(NAME#"rm") VR128:$src1, addr:$src2)>;
+  def : Pat<(vt256 (IntrinNode VR256:$src1, VR256:$src2)),
+            (!cast<Instruction>(NAME#"Yrr") VR256:$src1, VR256:$src2)>;
+  def : Pat<(vt256 (IntrinNode VR256:$src1, (load addr:$src2))),
+            (!cast<Instruction>(NAME#"Yrm") VR256:$src1, addr:$src2)>;
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
-  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
-  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
-  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
-  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
-  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-
-  def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
-            (VPSRAVDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
-            (VPSRAVDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
-            (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
-            (VPSRAVDYrm VR256:$src1, addr:$src2)>;
+  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, X86vshlv, v4i32, v8i32>;
+  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, X86vshlv, v2i64, v4i64>, VEX_W;
+  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, X86vsrlv, v4i32, v8i32>;
+  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, X86vsrlv, v2i64, v4i64>, VEX_W;
+  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, X86vsrav, v4i32, v8i32>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 151e1b9136c4a4..acb3d48463de68 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -389,10 +389,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
   X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
@@ -405,10 +405,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
   X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
   X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
   X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -943,11 +943,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
   X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
@@ -971,11 +971,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
   X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
   X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
   X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 55842a4a209144..bc39cee34c4adb 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -497,6 +497,9 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const X86FrameLowering *TFI = getFrameLowering(MF);
 
+  // Set the floating point control register as reserved.
+  Reserved.set(X86::FPCW);
+
   // Set the stack-pointer register and its aliases as reserved.
   for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
        ++I)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index aa20273f89abea..6a0538138528b0 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -278,7 +278,7 @@ def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
 // pseudo registers, but we still mark them as aliasing FP registers. That
 // way both kinds can be live without exceeding the stack depth. ST registers
 // are only live around inline assembly.
-def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
+def ST0 : X86Reg<"st", 0>, DwarfRegNum<[33, 12, 11]>;
 def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
 def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
 def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
@@ -288,7 +288,10 @@ def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
 def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
 
 // Floating-point status word
-def FPSW : X86Reg<"fpsw", 0>;
+def FPSW : X86Reg<"fpsr", 0>;
+
+// Floating-point control word
+def FPCW : X86Reg<"fpcr", 0>;
 
 // Status flags register.
 //
@@ -539,6 +542,9 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
   let isAllocatable = 0;
 }
 
+// Helper to allow %st to print as %st(0) when its encoded in the instruction.
+def RSTi : RegisterOperand<RST, "printSTiRegOperand">;
+
 // Generic vector registers: VR64 and VR128.
 // Ensure that float types are declared first - only float is legal on SSE1.
 def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index be7d43bbcf2c32..f530ee1246e8c2 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3065,9 +3065,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
       I->isTerminator())
     return false;
 
-  // Do not sink alloca instructions out of the entry block.
-  if (isa<AllocaInst>(I) && I->getParent() ==
-        &DestBlock->getParent()->getEntryBlock())
+  // Do not sink static or dynamic alloca instructions. Static allocas must
+  // remain in the entry block, and dynamic allocas must not be sunk in between
+  // a stacksave / stackrestore pair, which would incorrectly shorten its
+  // lifetime.
+  if (isa<AllocaInst>(I))
     return false;
 
   // Do not sink into catchswitch blocks.
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e6573af2077dcc..b2230afa13d8a1 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -321,6 +321,7 @@ static cl::opt<unsigned long long> ClOriginBase("msan-origin-base",
        cl::desc("Define custom MSan OriginBase"),
        cl::Hidden, cl::init(0));
 
+static const char *const kMsanModuleCtorName = "msan.module_ctor";
 static const char *const kMsanInitName = "__msan_init";
 
 namespace {
@@ -586,6 +587,8 @@ class MemorySanitizer {
 
   /// An empty volatile inline asm that prevents callback merge.
   InlineAsm *EmptyAsm;
+
+  Function *MsanCtorFunction;
 };
 
 /// A legacy function pass for msan instrumentation.
@@ -839,6 +842,8 @@ Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
 }
 
 /// Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
 void MemorySanitizer::initializeModule(Module &M) {
   auto &DL = M.getDataLayout();
 
@@ -913,7 +918,22 @@ void MemorySanitizer::initializeModule(Module &M) {
   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
 
   if (!CompileKernel) {
-    getOrCreateInitFunction(M, kMsanInitName);
+    std::tie(MsanCtorFunction, std::ignore) =
+        getOrCreateSanitizerCtorAndInitFunctions(
+            M, kMsanModuleCtorName, kMsanInitName,
+            /*InitArgTypes=*/{},
+            /*InitArgs=*/{},
+            // This callback is invoked when the functions are created the first
+            // time. Hook them into the global ctors list in that case:
+            [&](Function *Ctor, Function *) {
+              if (!ClWithComdat) {
+                appendToGlobalCtors(M, Ctor, 0);
+                return;
+              }
+              Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
+              Ctor->setComdat(MsanCtorComdat);
+              appendToGlobalCtors(M, Ctor, 0, Ctor);
+            });
 
     if (TrackOrigins)
       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
@@ -4458,6 +4478,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
 }
 
 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+  if (!CompileKernel && (&F == MsanCtorFunction))
+    return false;
   MemorySanitizerVisitor Visitor(F, *this, TLI);
 
   // Clear out readonly/readnone attributes.
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index eab77cf4cda9b2..68ca6c47c8f1a4 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3031,7 +3031,10 @@ class llvm::sroa::AllocaSliceRewriter
     ConstantInt *Size =
         ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
                          NewEndOffset - NewBeginOffset);
-    Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+    // Lifetime intrinsics always expect an i8* so directly get such a pointer
+    // for the new alloca slice.
+    Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
+    Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
     Value *New;
     if (II.getIntrinsicID() == Intrinsic::lifetime_start)
       New = IRB.CreateLifetimeStart(Ptr, Size);
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index a9772e31da5095..81d63ee80394ef 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -249,6 +249,8 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
   bool DoPromote = false;
   if (GV.hasLocalLinkage() &&
       ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+    // Save the original name string before we rename GV below.
+    auto Name = GV.getName().str();
     // Once we change the name or linkage it is difficult to determine
     // again whether we should promote since shouldPromoteLocalToGlobal needs
     // to locate the summary (based on GUID from name and linkage). Therefore,
@@ -257,6 +259,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
     GV.setLinkage(getLinkage(&GV, DoPromote));
     if (!GV.hasLocalLinkage())
       GV.setVisibility(GlobalValue::HiddenVisibility);
+
+    // If we are renaming a COMDAT leader, ensure that we record the COMDAT
+    // for later renaming as well. This is required for COFF.
+    if (const auto *C = GV.getComdat())
+      if (C->getName() == Name)
+        RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
   } else
     GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
 
@@ -281,6 +289,16 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
     processGlobalForThinLTO(SF);
   for (GlobalAlias &GA : M.aliases())
     processGlobalForThinLTO(GA);
+
+  // Replace any COMDATS that required renaming (because the COMDAT leader was
+  // promoted and renamed).
+  if (!RenamedComdats.empty())
+    for (auto &GO : M.global_objects())
+      if (auto *C = GO.getComdat()) {
+        auto Replacement = RenamedComdats.find(C);
+        if (Replacement != RenamedComdats.end())
+          GO.setComdat(Replacement->second);
+      }
 }
 
 bool FunctionImportGlobalProcessing::run() {
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index a93d1aeb62ef91..112e80d27e345d 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -217,7 +217,10 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
     // When the value is absent it is interpreted as 'attribute set'.
     return true;
   case 2:
-    return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+    if (ConstantInt *IntMD =
+            mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
+      return IntMD->getZExtValue();
+    return true;
   }
   llvm_unreachable("unexpected number of options");
 }
@@ -376,17 +379,17 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
   Optional<int> InterleaveCount =
       getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
 
-  if (Enable == true) {
-    // 'Forcing' vector width and interleave count to one effectively disables
-    // this tranformation.
-    if (VectorizeWidth == 1 && InterleaveCount == 1)
-      return TM_SuppressedByUser;
-    return TM_ForcedByUser;
-  }
+  // 'Forcing' vector width and interleave count to one effectively disables
+  // this tranformation.
+  if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+    return TM_SuppressedByUser;
 
   if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
     return TM_Disable;
 
+  if (Enable == true)
+    return TM_ForcedByUser;
+
   if (VectorizeWidth == 1 && InterleaveCount == 1)
     return TM_Disable;
 
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2e856a7e6802ed..a07fffe9b98b9a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
   // If any of the scalars is marked as a value that needs to stay scalar, then
   // we need to gather the scalars.
+  // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
   for (unsigned i = 0, e = VL.size(); i != e; ++i) {
-    if (MustGather.count(VL[i])) {
+    if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
       LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
       newTreeEntry(VL, false, UserTreeIdx);
       return;
diff --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
new file mode 100644
index 00000000000000..a785533e8db9be
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
@@ -0,0 +1,441 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    zip1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[2]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[3]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    zip1 v1.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov v0.s[3], wzr
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[2]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[3]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+; This would crash because we did not expect to create
+; a shuffle for a vector where the source operand is
+; not the same size as the result.
+; TODO: Should we handle this pattern? Ie, is moving to/from
+; registers the optimal code?
+
+define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
+; CHECK-LABEL: larger_bv_than_source:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %t1 = extractelement <4 x i16> %t0, i32 2
+  %vgetq_lane = zext i16 %t1 to i32
+  %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0
+  ret <4 x i32> %t2
+}
+
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll
new file mode 100644
index 00000000000000..9c6d8cc2053312
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple arm64-apple-ios -mattr=+lse %s -o - | FileCheck %s
+
+; Only "even,even+1" pairs are valid for CASP instructions. Make sure LLVM
+; doesn't allocate odd ones and that it can copy them around properly. N.b. we
+; don't actually check that they're sequential because FileCheck can't; odd/even
+; will have to be good enough.
+define void @test_atomic_cmpxchg_i128_register_shuffling(i128* %addr, i128 %desired, i128 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i128_register_shuffling:
+; CHECK-DAG: mov [[DESIRED_LO:x[0-9]*[02468]]], x1
+; CHECK-DAG: mov [[DESIRED_HI:x[0-9]*[13579]]], x2
+; CHECK-DAG: mov [[NEW_LO:x[0-9]*[02468]]], x3
+; CHECK-DAG: mov [[NEW_HI:x[0-9]*[13579]]], x4
+; CHECK: caspal [[DESIRED_LO]], [[DESIRED_HI]], [[NEW_LO]], [[NEW_HI]], [x0]
+
+  %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/eh_recoverfp.ll b/llvm/test/CodeGen/AArch64/eh_recoverfp.ll
new file mode 100644
index 00000000000000..777bcee543827b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/eh_recoverfp.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple arm64-windows %s -o - 2>&1 | FileCheck %s
+
+define i8* @foo(i8* %a) {
+; CHECK-LABEL: foo
+; CHECK-NOT: llvm.x86.seh.recoverfp
+  %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @f to i8*), i8* %a)
+  ret i8* %1
+}
+
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+declare i32 @f()
diff --git a/llvm/test/CodeGen/AArch64/seh-finally.ll b/llvm/test/CodeGen/AArch64/seh-finally.ll
new file mode 100644
index 00000000000000..3cbbd03385c5fb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/seh-finally.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple arm64-windows -o - %s | FileCheck %s
+
+; Function Attrs: noinline optnone uwtable
+define dso_local i32 @foo() {
+entry:
+; CHECK-LABEL: foo
+; CHECK: orr     w8, wzr, #0x1
+; CHECK: mov     w0, wzr
+; CHECK: mov     x1, x29
+; CHECK: .set .Lfoo$frame_escape_0, -4
+; CHECK: stur    w8, [x29, #-4]
+; CHECK: bl      "?fin$0@0@foo@@"
+; CHECK: ldur    w0, [x29, #-4]
+
+  %count = alloca i32, align 4
+  call void (...) @llvm.localescape(i32* %count)
+  store i32 0, i32* %count, align 4
+  %0 = load i32, i32* %count, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %count, align 4
+  %1 = call i8* @llvm.localaddress()
+  call void @"?fin$0@0@foo@@"(i8 0, i8* %1)
+  %2 = load i32, i32* %count, align 4
+  ret i32 %2
+}
+
+define internal void @"?fin$0@0@foo@@"(i8 %abnormal_termination, i8* %frame_pointer) {
+entry:
+; CHECK-LABEL: @"?fin$0@0@foo@@"
+; CHECK: sub     sp, sp, #16
+; CHECK: str     x1, [sp, #8]
+; CHECK: strb    w0, [sp, #7]
+; CHECK: movz    x8, #:abs_g1_s:.Lfoo$frame_escape_0
+; CHECK: movk    x8, #:abs_g0_nc:.Lfoo$frame_escape_0
+; CHECK: add     x8, x1, x8
+; CHECK: ldr     w9, [x8]
+; CHECK: add     w9, w9, #1
+; CHECK: str     w9, [x8]
+
+  %frame_pointer.addr = alloca i8*, align 8
+  %abnormal_termination.addr = alloca i8, align 1
+  %0 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @foo to i8*), i8* %frame_pointer, i32 0)
+  %count = bitcast i8* %0 to i32*
+  store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
+  store i8 %abnormal_termination, i8* %abnormal_termination.addr, align 1
+  %1 = zext i8 %abnormal_termination to i32
+  %cmp = icmp eq i32 %1, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32, i32* %count, align 4
+  %add = add nsw i32 %2, 1
+  store i32 %add, i32* %count, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.localrecover(i8*, i8*, i32)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.localaddress()
+
+; Function Attrs: nounwind
+declare void @llvm.localescape(...)
diff --git a/llvm/test/CodeGen/AArch64/seh-localescape.ll b/llvm/test/CodeGen/AArch64/seh-localescape.ll
new file mode 100644
index 00000000000000..0a1675014f62ff
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/seh-localescape.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple arm64-windows %s -o - | FileCheck %s
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @foo() {
+entry:
+; CHECK-LABEL: foo
+; CHECK: .set .Lfoo$frame_escape_0, -4
+
+  %count = alloca i32, align 4
+  call void (...) @llvm.localescape(i32* %count)
+  ret i32 0
+}
+
+define internal i32 @"?filt$0@0@foo@@"(i8* %exception_pointers, i8* %frame_pointer) {
+entry:
+; CHECK-LABEL: @"?filt$0@0@foo@@"
+; CHECK: movz    x8, #:abs_g1_s:.Lfoo$frame_escape_0
+; CHECK: movk    x8, #:abs_g0_nc:.Lfoo$frame_escape_0
+
+  %0 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @foo to i8*), i8* %frame_pointer, i32 0)
+  %count = bitcast i8* %0 to i32*
+  %1 = load i32, i32* %count, align 4
+  ret i32 %1
+}
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.localrecover(i8*, i8*, i32) #2
+
+; Function Attrs: nounwind
+declare void @llvm.localescape(...) #3
diff --git a/llvm/test/CodeGen/AArch64/seqpaircopy.mir b/llvm/test/CodeGen/AArch64/seqpaircopy.mir
new file mode 100644
index 00000000000000..89511cbf726bd0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/seqpaircopy.mir
@@ -0,0 +1,23 @@
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+v8.1a -run-pass=postrapseudos | FileCheck %s
+---
+# CHECK-LABEL: name: copy_xseqpairs
+name: copy_xseqpairs
+body: |
+  bb.0:
+    ; CHECK: $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0
+    ; CHECK: $x0 = ORRXrs $xzr, $x4, 0
+    ; CHECK: $x1 = ORRXrs $xzr, $x5, 0
+    $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0
+    $x0_x1 = COPY $x4_x5
+...
+---
+# CHECK-LABEL: name: copy_wseqpairs
+name: copy_wseqpairs
+body: |
+  bb.0:
+    ; CHECK: $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0
+    ; CHECK: $w0 = ORRWrs $wzr, $w4, 0
+    ; CHECK: $w1 = ORRWrs $wzr, $w5, 0
+    $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0
+    $w0_w1 = COPY $w4_w5
+...
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll
index 0b8f8d31b3162e..e90fb19a522a1f 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll
@@ -11,10 +11,10 @@ entry:
 ; CHECK-NEXT: and   x8, x8, x16
 ; CHECK-NEXT: and   x1, x1, x16
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
-; CHECK-NEXT: and x17, x17, x16
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
 ; CHECK-NEXT: mov x0, x8
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -29,9 +29,9 @@ entry:
 ; CHECK-NEXT: and   x0, x0, x16
 ; CHECK-NEXT: csdb
 ; CHECK-NEXT: ldr   d0, [x0]
-; CHECK-NEXT: mov x17, sp
-; CHECK-NEXT: and x17, x17, x16
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -51,12 +51,12 @@ entry:
 ; CHECK-NEXT: and x8, x8, x16
 ; csdb instruction must occur before the add instruction with w8 as operand.
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
 ; CHECK-NEXT: add w9, w1, w8
 ; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: and x17, x17, x16
 ; CHECK-NEXT: csel w0, w1, w9, eq
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -76,12 +76,12 @@ entry:
 ; CHECK-NEXT: and w8, w8, w16
 ; csdb instruction must occur before the add instruction with x8 as operand.
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
 ; CHECK-NEXT: add x9, x1, x8
 ; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: and x17, x17, x16
 ; CHECK-NEXT: csel x0, x1, x9, eq
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -112,11 +112,11 @@ entry:
 ; CHECK-NEXT:  and     x1, x1, x16
 ; CHECK-NEXT:  csdb
 ; CHECK-NEXT:  ldr     d0, [x1]
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
 ; CHECK-NEXT:  mov     v0.d[1], v0.d[0]
 ; CHECK-NEXT:  str     q0, [x0]
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
 }
 
@@ -129,9 +129,9 @@ entry:
 ; CHECK-NEXT:  and     x1, x1, x16
 ; CHECK-NEXT:  csdb
 ; CHECK-NEXT:  ld1     { v0.d }[0], [x1]
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
   %0 = load double, double* %b, align 16
   %vld1_lane = insertelement <2 x double> <double undef, double 0.000000e+00>, double %0, i32 0
@@ -147,9 +147,9 @@ entry:
 ; CHECK-NEXT:  .cfi_def_cfa_offset 16
 ; CHECK-NEXT:  ldr     w8, [sp, #12]
 ; CHECK-NEXT:  add     sp, sp, #16
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
   %a = alloca i32, align 4
   %val = load volatile i32, i32* %a, align 4
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll
index 3535b63c32cc8f..51156f68dec8c9 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening.ll
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll
@@ -1,9 +1,9 @@
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure
+; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
 
 define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
 ; CHECK-LABEL: f
@@ -13,12 +13,12 @@ entry:
 ; NOSLH-NOT:  cmp sp, #0
 ; NOSLH-NOT:  csetm x16, ne
 
-; SLH:  mov x17, sp
-; SLH:  and x17, x17, x16
-; SLH:  mov sp, x17
-; NOSLH-NOT:  mov x17, sp
-; NOSLH-NOT:  and x17, x17, x16
-; NOSLH-NOT:  mov sp, x17
+; SLH:  mov [[TMPREG:x[0-9]+]], sp
+; SLH:  and [[TMPREG]], [[TMPREG]], x16
+; SLH:  mov sp, [[TMPREG]]
+; NOSLH-NOT:  mov [[TMPREG:x[0-9]+]], sp
+; NOSLH-NOT:  and [[TMPREG]], [[TMPREG]], x16
+; NOSLH-NOT:  mov sp, [[TMPREG]]
   %call = tail call i32 @tail_callee(i32 %i)
 ; SLH:  cmp sp, #0
 ; SLH:  csetm x16, ne
@@ -43,29 +43,37 @@ if.then:                                          ; preds = %entry
 ; NOSLH-NOT: csel x16, x16, xzr, [[COND]]
 return:                                           ; preds = %entry, %if.then
   %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ]
-; SLH:  mov x17, sp
-; SLH:  and x17, x17, x16
-; SLH:  mov sp, x17
-; NOSLH-NOT:  mov x17, sp
-; NOSLH-NOT:  and x17, x17, x16
-; NOSLH-NOT:  mov sp, x17
+; SLH:  mov [[TMPREG:x[0-9]+]], sp
+; SLH:  and [[TMPREG]], [[TMPREG]], x16
+; SLH:  mov sp, [[TMPREG]]
+; NOSLH-NOT:  mov [[TMPREG:x[0-9]+]], sp
+; NOSLH-NOT:  and [[TMPREG]], [[TMPREG]], x16
+; NOSLH-NOT:  mov sp, [[TMPREG]]
   ret i32 %retval.0
 }
 
 ; Make sure that for a tail call, taint doesn't get put into SP twice.
 define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR {
 ; CHECK-LABEL: tail_caller:
-; SLH:     mov     x17, sp
-; SLH:     and     x17, x17, x16
-; SLH:     mov     sp, x17
-; NOSLH-NOT:     mov     x17, sp
-; NOSLH-NOT:     and     x17, x17, x16
-; NOSLH-NOT:     mov     sp, x17
+; NOGISELSLH:     mov [[TMPREG:x[0-9]+]], sp
+; NOGISELSLH:     and [[TMPREG]], [[TMPREG]], x16
+; NOGISELSLH:     mov sp, [[TMPREG]]
+; NOGISELNOSLH-NOT:     mov [[TMPREG:x[0-9]+]], sp
+; NOGISELNOSLH-NOT:     and [[TMPREG]], [[TMPREG]], x16
+; NOGISELNOSLH-NOT:     mov sp, [[TMPREG]]
+; GISELSLH:     mov [[TMPREG:x[0-9]+]], sp
+; GISELSLH:     and [[TMPREG]], [[TMPREG]], x16
+; GISELSLH:     mov sp, [[TMPREG]]
+; GISELNOSLH-NOT:     mov [[TMPREG:x[0-9]+]], sp
+; GISELNOSLH-NOT:     and [[TMPREG]], [[TMPREG]], x16
+; GISELNOSLH-NOT:     mov sp, [[TMPREG]]
 ;  GlobalISel doesn't optimize tail calls (yet?), so only check that
 ;  cross-call taint register setup code is missing if a tail call was
 ;  actually produced.
-; SLH:     {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}}
-; SLH-NOT: cmp sp, #0
+; NOGISELSLH:     b tail_callee
+; GISELSLH:       bl tail_callee
+; GISELSLH:       cmp sp, #0
+; SLH-NOT:        cmp sp, #0
   %call = tail call i32 @tail_callee(i32 %a)
   ret i32 %call
 }
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
index cf8357d9558b00..5991c4df0407f7 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening.mir
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
@@ -25,6 +25,22 @@
   define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening {
    ret void
   }
+  ; Also check that a non-default temporary register gets picked correctly to
+  ; transfer the SP to to and it with the taint register when the default
+  ; temporary isn't available.
+  define void @indirect_call_x17(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  @g = common dso_local local_unnamed_addr global i64 (...)* null, align 8
+  define void @indirect_tailcall_x17(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  define void @indirect_call_lr(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  define void @RS_cannot_find_available_regs() speculative_load_hardening {
+   ret void
+  }
 ...
 ---
 name:            nobranch_fallthrough
@@ -115,3 +131,72 @@ body:             |
   ; CHECK-NOT: csel
    RET undef $lr, implicit $x0
 ...
+---
+name:            indirect_call_x17
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x17
+    ; CHECK-LABEL: indirect_call_x17
+    ; CHECK:       mov x0, sp
+    ; CHECK:       and x0, x0, x16
+    ; CHECK:       mov sp, x0
+    ; CHECK:       blr x17
+    BLR killed renamable $x17, implicit-def dead $lr, implicit $sp
+    RET undef $lr, implicit undef $w0
+...
+---
+name:           indirect_tailcall_x17
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: indirect_tailcall_x17
+    ; CHECK:       mov x1, sp
+    ; CHECK:       and x1, x1, x16
+    ; CHECK:       mov sp, x1
+    ; CHECK:       br x17
+    $x8 = ADRP target-flags(aarch64-page) @g
+    $x17 = LDRXui killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @g
+    TCRETURNri killed $x17, 0, implicit $sp, implicit $x0
+...
+---
+name:           indirect_call_lr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: indirect_call_lr
+    ; CHECK:            mov x1, sp
+    ; CHECK-NEXT:       and x1, x1, x16
+    ; CHECK-NEXT:       mov sp, x1
+    ; CHECK-NEXT:       blr x30
+    liveins: $x0, $lr
+    BLR killed renamable $lr, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+    $w0 = nsw ADDWri killed $w0, 1, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:           RS_cannot_find_available_regs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; In the rare case when no free temporary register is available for the
+    ; propagate taint-to-sp operation, just put in a full speculation barrier
+    ; (isb+dsb sy) at the start of the basic block. And don't put masks on
+    ; instructions for the rest of the basic block, since speculation in that
+    ; basic block was already done, so no need to do masking.
+    ; CHECK-LABEL: RS_cannot_find_available_regs
+    ; CHECK:       dsb sy
+    ; CHECK-NEXT:  isb
+    ; CHECK-NEXT:  ldr x0, [x0]
+    ; The following 2 instructions come from propagating the taint encoded in
+    ; sp at function entry to x16. It turns out the taint info in x16 is not
+    ; used in this function, so those instructions could be optimized away. An
+    ; optimization for later if it turns out this situation occurs often enough.
+    ; CHECK-NEXT:  cmp sp, #0
+    ; CHECK-NEXT:  csetm x16, ne
+    ; CHECK-NEXT:  ret
+    liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr
+     $x0 = LDRXui killed $x0, 0
+     RET undef $lr, implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/wineh4.mir b/llvm/test/CodeGen/AArch64/wineh4.mir
index 4d4cc892c2e82f..3a324324431c7c 100644
--- a/llvm/test/CodeGen/AArch64/wineh4.mir
+++ b/llvm/test/CodeGen/AArch64/wineh4.mir
@@ -1,7 +1,7 @@
 # RUN: llc -o - %s -mtriple=aarch64-windows -start-after=prologepilog \
 # RUN:   -disable-branch-fold  -filetype=obj \
 # RUN: | llvm-readobj -unwind | FileCheck %s
-# Check that multiple epilgoues are correctly placed in .xdata.
+# Check that identical multiple epilgoues are correctly shared in .xdata.
 
 # CHECK:        ExceptionData {
 # CHECK-NEXT:      FunctionLength: 164
@@ -9,7 +9,7 @@
 # CHECK-NEXT:      ExceptionData: No
 # CHECK-NEXT:      EpiloguePacked: No
 # CHECK-NEXT:      EpilogueScopes: 2
-# CHECK-NEXT:      ByteCodeLength: 48
+# CHECK-NEXT:      ByteCodeLength: 32
 # CHECK-NEXT:      Prologue [
 # CHECK-NEXT:        0xc80c              ; stp x19, x20, [sp, #96]
 # CHECK-NEXT:        0xc88a              ; stp x21, x22, [sp, #80]
@@ -37,7 +37,7 @@
 # CHECK-NEXT:        }
 # CHECK-NEXT:        EpilogueScope {
 # CHECK-NEXT:          StartOffset: 33
-# CHECK-NEXT:          EpilogueStartIndex: 30
+# CHECK-NEXT:          EpilogueStartIndex: 15
 # CHECK-NEXT:          Opcodes [
 # CHECK-NEXT:            0xc80c              ; ldp x19, x20, [sp, #96]
 # CHECK-NEXT:            0xc88a              ; ldp x21, x22, [sp, #80]
diff --git a/llvm/test/CodeGen/AArch64/wineh8.mir b/llvm/test/CodeGen/AArch64/wineh8.mir
new file mode 100644
index 00000000000000..606bc140b232dc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/wineh8.mir
@@ -0,0 +1,225 @@
+# RUN: llc -o - %s -mtriple=aarch64-windows -start-after=prologepilog \
+# RUN:   -disable-branch-fold  -filetype=obj \
+# RUN: | llvm-readobj -unwind | FileCheck %s
+# Check that non-identical multiple epilgoues are correctly shared in .xdata.
+
+# CHECK:        ExceptionData {
+# CHECK-NEXT:      FunctionLength: 160
+# CHECK-NEXT:      Version: 0
+# CHECK-NEXT:      ExceptionData: No
+# CHECK-NEXT:      EpiloguePacked: No
+# CHECK-NEXT:      EpilogueScopes: 2
+# CHECK-NEXT:      ByteCodeLength: 44
+# CHECK-NEXT:      Prologue [
+# CHECK-NEXT:        0xc80c              ; stp x19, x20, [sp, #96]
+# CHECK-NEXT:        0xc88a              ; stp x21, x22, [sp, #80]
+# CHECK-NEXT:        0xc908              ; stp x23, x24, [sp, #64]
+# CHECK-NEXT:        0xc986              ; stp x25, x26, [sp, #48]
+# CHECK-NEXT:        0xca04              ; stp x27, x28, [sp, #32]
+# CHECK-NEXT:        0xd802              ; stp d8, d9, [sp, #16]
+# CHECK-NEXT:        0xda8d              ; stp d10, d11, [sp, #-112]!
+# CHECK-NEXT:        0xe4                ; end
+# CHECK-NEXT:      ]
+# CHECK-NEXT:      EpilogueScopes [
+# CHECK-NEXT:        EpilogueScope {
+# CHECK-NEXT:          StartOffset: 16
+# CHECK-NEXT:          EpilogueStartIndex: 15
+# CHECK-NEXT:          Opcodes [
+# CHECK-NEXT:            0xc80c              ; ldp x19, x20, [sp, #96]
+# CHECK-NEXT:            0xc88a              ; ldp x21, x22, [sp, #80]
+# CHECK-NEXT:            0xc908              ; ldp x23, x24, [sp, #64]
+# CHECK-NEXT:            0xc986              ; ldp x25, x26, [sp, #48]
+# CHECK-NEXT:            0xd802              ; ldp d8, d9, [sp, #16]
+# CHECK-NEXT:            0xda8d              ; ldp d10, d11, [sp], #112
+# CHECK-NEXT:            0xe4                ; end
+# CHECK-NEXT:          ]
+# CHECK-NEXT:        }
+# CHECK-NEXT:        EpilogueScope {
+# CHECK-NEXT:          StartOffset: 32
+# CHECK-NEXT:          EpilogueStartIndex: 28
+# CHECK-NEXT:          Opcodes [
+# CHECK-NEXT:            0xc80c              ; ldp x19, x20, [sp, #96]
+# CHECK-NEXT:            0xc88a              ; ldp x21, x22, [sp, #80]
+# CHECK-NEXT:            0xc908              ; ldp x23, x24, [sp, #64]
+# CHECK-NEXT:            0xc986              ; ldp x25, x26, [sp, #48]
+# CHECK-NEXT:            0xca04              ; ldp x27, x28, [sp, #32]
+# CHECK-NEXT:            0xd802              ; ldp d8, d9, [sp, #16]
+# CHECK-NEXT:            0xda8d              ; ldp d10, d11, [sp], #112
+# CHECK-NEXT:            0xe4                ; end
+# CHECK-NEXT:          ]
+# CHECK-NEXT:        }
+# CHECK-NEXT:      ]
+# CHECK-NEXT:    }
+...
+---
+name:            test
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       true
+registers:
+liveins:
+  - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       112
+  offsetAdjustment: 0
+  maxAlignment:    8
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: true
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x19', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x20', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x21', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x22', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 4, name: '', type: spill-slot, offset: -40, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x23', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 5, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x24', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 6, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x25', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 7, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x26', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 8, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x27', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 9, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$x28', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 10, name: '', type: spill-slot, offset: -88, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$d8', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 11, name: '', type: spill-slot, offset: -96, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$d9', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 12, name: '', type: spill-slot, offset: -104, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$d10', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 13, name: '', type: spill-slot, offset: -112, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '$d11', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+    liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20
+
+    early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store 8 into %stack.12), (store 8 into %stack.13)
+    frame-setup SEH_SaveFRegP_X 10, 11, -112
+    frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store 8 into %stack.10), (store 8 into %stack.11)
+    frame-setup SEH_SaveFRegP 8, 9, 16
+    frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store 8 into %stack.8), (store 8 into %stack.9)
+    frame-setup SEH_SaveRegP 27, 28, 32
+    frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store 8 into %stack.6), (store 8 into %stack.7)
+    frame-setup SEH_SaveRegP 25, 26, 48
+    frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store 8 into %stack.4), (store 8 into %stack.5)
+    frame-setup SEH_SaveRegP 23, 24, 64
+    frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store 8 into %stack.2), (store 8 into %stack.3)
+    frame-setup SEH_SaveRegP 21, 22, 80
+    frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store 8 into %stack.0), (store 8 into %stack.1)
+    frame-setup SEH_SaveRegP 19, 20, 96
+    frame-setup SEH_PrologEnd
+    frame-setup CFI_INSTRUCTION def_cfa_offset 112
+    frame-setup CFI_INSTRUCTION offset $w19, -8
+    frame-setup CFI_INSTRUCTION offset $w20, -16
+    frame-setup CFI_INSTRUCTION offset $w21, -24
+    frame-setup CFI_INSTRUCTION offset $w22, -32
+    frame-setup CFI_INSTRUCTION offset $w23, -40
+    frame-setup CFI_INSTRUCTION offset $w24, -48
+    frame-setup CFI_INSTRUCTION offset $w25, -56
+    frame-setup CFI_INSTRUCTION offset $w26, -64
+    frame-setup CFI_INSTRUCTION offset $w27, -72
+    frame-setup CFI_INSTRUCTION offset $w28, -80
+    frame-setup CFI_INSTRUCTION offset $b8, -88
+    frame-setup CFI_INSTRUCTION offset $b9, -96
+    frame-setup CFI_INSTRUCTION offset $b10, -104
+    frame-setup CFI_INSTRUCTION offset $b11, -112
+    $x19 = ADDXrr $x0, killed $x1
+    $d8 = FADDDrr killed $d0, $d1
+    $d9 = FADDDrr $d8, $d1
+    $d10 = FADDDrr $d9, $d8
+    $d11 = FADDDrr killed $d9, $d10
+    $x20 = SUBSXrr $x19, killed $x0, implicit-def $nzcv
+    Bcc 1, %bb.2, implicit killed $nzcv
+    B %bb.1
+
+  bb.1:
+    liveins: $x19, $x20
+
+    $x21 = ADDXrr $x20, killed $x19
+    $x22 = ADDXrr $x21, killed $x20
+    $x23 = ADDXrr $x22, killed $x21
+    $x24 = ADDXrr $x23, killed $x22
+    $x25 = ADDXrr $x24, killed $x23
+    $x26 = ADDXrr $x25, killed $x24
+    $x27 = ADDXrr $x26, killed $x25
+    $x28 = ADDXrr $x27, killed $x26
+    $x0 = COPY $x28
+    frame-destroy SEH_EpilogStart
+    $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1)
+    frame-destroy SEH_SaveRegP 19, 20, 96
+    $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3)
+    frame-destroy SEH_SaveRegP 21, 22, 80
+    $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5)
+    frame-destroy SEH_SaveRegP 23, 24, 64
+    $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7)
+    frame-destroy SEH_SaveRegP 25, 26, 48
+    $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.8), (load 8 from %stack.9)
+    frame-destroy SEH_SaveRegP 27, 28, 32
+    $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11)
+    frame-destroy SEH_SaveFRegP 8, 9, 16
+    early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13)
+    frame-destroy SEH_SaveFRegP_X 10, 11, -112
+    frame-destroy SEH_EpilogEnd
+    RET_ReallyLR implicit $x0
+
+  bb.2:
+    liveins: $x28, $d11
+
+    $x0 = COPY $d11
+    $x0 = ADDXrr $x0, killed $x28
+    frame-destroy SEH_EpilogStart
+    $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1)
+    frame-destroy SEH_SaveRegP 19, 20, 96
+    $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3)
+    frame-destroy SEH_SaveRegP 21, 22, 80
+    $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5)
+    frame-destroy SEH_SaveRegP 23, 24, 64
+    $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7)
+    frame-destroy SEH_SaveRegP 25, 26, 48
+    $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11)
+    frame-destroy SEH_SaveFRegP 8, 9, 16
+    early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13)
+    frame-destroy SEH_SaveFRegP_X 10, 11, -112
+    frame-destroy SEH_EpilogEnd
+    RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
new file mode 100644
index 00000000000000..ad489debc46cb1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
+
+; FUNC-LABEL: {{^}}ds_ordered_add:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; Below are various modifications of input operands and shader types.
+
+; FUNC-LABEL: {{^}}ds_ordered_add_counter2:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds
+define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true)
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_nodone:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds
+define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false)
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_norelease:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds
+define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false)
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_cs:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, s0
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_ps:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, s0
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_vs:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, s0
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_gs:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, s0
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
new file mode 100644
index 00000000000000..acb1133c6a0b8b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
+
+; FUNC-LABEL: {{^}}ds_ordered_swap:
+; GCN: s_mov_b32 m0, s0
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) {
+  %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_swap_conditional:
+; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
+; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
+; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
+; GCN: s_cbranch_execz [[BB:BB._.]]
+; GCN: s_mov_b32 m0, s0
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
+; GCN-NEXT: [[BB]]:
+; // Wait for expcnt(0) before modifying EXEC
+; GCN-NEXT: s_waitcnt expcnt(0)
+; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) {
+entry:
+  %c = icmp ne i32 %value, 0
+  br i1 %c, label %if-true, label %endif
+
+if-true:
+  %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  br label %endif
+
+endif:
+  %v = phi i32 [ %val, %if-true ], [ undef, %entry ]
+  %r = bitcast i32 %v to float
+  ret float %r
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
diff --git a/llvm/test/CodeGen/MIR/X86/memory-operands.mir b/llvm/test/CodeGen/MIR/X86/memory-operands.mir
index 2ac7bea2fc9b91..89b28126b9167b 100644
--- a/llvm/test/CodeGen/MIR/X86/memory-operands.mir
+++ b/llvm/test/CodeGen/MIR/X86/memory-operands.mir
@@ -359,8 +359,8 @@ body: |
     CFI_INSTRUCTION def_cfa_offset 32
     LD_F80m $rsp, 1, $noreg, 32, $noreg, implicit-def dead $fpsw
   ; CHECK: name: stack_psv
-  ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw :: (store 10 into stack, align 16)
-    ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw :: (store 10 into stack, align 16)
+  ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16)
+    ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16)
     CALL64pcrel32 &cosl, csr_64, implicit $rsp, implicit-def $rsp, implicit-def $fp0
     $rsp = ADD64ri8 $rsp, 24, implicit-def dead $eflags
     RETQ
diff --git a/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll b/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
index c3d69c7c0db5e2..be82e98dffe370 100644
--- a/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
+++ b/llvm/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
 target triple = "msp430-unknown-linux-gnu"
 
-define msp430_intrcc void @foo() nounwind {
+define msp430_intrcc void @foo() nounwind #0 {
 entry:
 	%fa = call i8* @llvm.frameaddress(i32 0)
 	store i8 0, i8* %fa
@@ -11,3 +11,5 @@ entry:
 }
 
 declare i8* @llvm.frameaddress(i32)
+
+attributes #0 = { noinline nounwind optnone "interrupt"="2" }
diff --git a/llvm/test/CodeGen/MSP430/fp.ll b/llvm/test/CodeGen/MSP430/fp.ll
index e7d7c519657eee..bf603704a91b68 100644
--- a/llvm/test/CodeGen/MSP430/fp.ll
+++ b/llvm/test/CodeGen/MSP430/fp.ll
@@ -27,3 +27,5 @@ define msp430_intrcc void @fpb_alloced() #0 {
   call void asm sideeffect "nop", "r"(i8 0)
   ret void
 }
+
+attributes #0 = { noinline nounwind optnone "interrupt"="2" }
diff --git a/llvm/test/CodeGen/MSP430/interrupt.ll b/llvm/test/CodeGen/MSP430/interrupt.ll
index 5fa0c849c2602a..94fb3bc457a356 100644
--- a/llvm/test/CodeGen/MSP430/interrupt.ll
+++ b/llvm/test/CodeGen/MSP430/interrupt.ll
@@ -13,6 +13,9 @@ target triple = "msp430-generic-generic"
 ; instruction RETI, which restores the SR register and branches to the PC where
 ; the interrupt occurred.
 
+; CHECK:      .section	__interrupt_vector_2,"ax",@progbits
+; CHECK-NEXT:	.short	ISR
+
 @g = global float 0.0
 
 define msp430_intrcc void @ISR() #0 {
@@ -47,3 +50,4 @@ entry:
   ret void
 }
 
+attributes #0 = { noinline nounwind optnone "interrupt"="2" }
diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index 6a07c4f34564de..5c7c3f424c380f 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
-; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
 ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
-; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB
 ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
-; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
 ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
-; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL
 
 ; Test that vector types are passed through the integer register set whether or
 ; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
diff --git a/llvm/test/CodeGen/Mips/gprestore.ll b/llvm/test/CodeGen/Mips/gprestore.ll
index 88ac047b660954..a1e696b0ac08b8 100644
--- a/llvm/test/CodeGen/Mips/gprestore.ll
+++ b/llvm/test/CodeGen/Mips/gprestore.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=O32
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=N64
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 | FileCheck %s --check-prefix=N32
-; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3O32
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3N64
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 | FileCheck %s --check-prefix=O3N32
+; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=O32
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=N64
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -mips-jalr-reloc=false | FileCheck %s --check-prefix=N32
+; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3O32
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N64
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N32
 
 ; Test that PIC calls use the $25 register. This is an ABI requirement.
 
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll b/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll
index e54eaa63222a05..af3d4f50f3fe4f 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: sdiv_i1:
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/srem.ll b/llvm/test/CodeGen/Mips/llvm-ir/srem.ll
index ef0502c85d59bb..487a5b9b6cbc5e 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/srem.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/srem.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define signext i1 @srem_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: srem_i1:
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll b/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll
index 8694a9f92b65ab..3b7243712024b6 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R2
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) {
 ; GP32-LABEL: udiv_i1:
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/urem.ll b/llvm/test/CodeGen/Mips/llvm-ir/urem.ll
index b744f706cbf9ce..4105d67da6f1ac 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/urem.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/urem.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define signext i1 @urem_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: urem_i1:
diff --git a/llvm/test/CodeGen/Mips/long-call-attr.ll b/llvm/test/CodeGen/Mips/long-call-attr.ll
index 5b6ba94aaa3567..beda290a9725b4 100644
--- a/llvm/test/CodeGen/Mips/long-call-attr.ll
+++ b/llvm/test/CodeGen/Mips/long-call-attr.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=O32 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s
 ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=O32 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s
 ; RUN: llc -march=mips64 -target-abi n64 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=N64 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s
 ; RUN: llc -march=mips64 -target-abi n64 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=N64 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s
 
 declare void @far() #0
 
diff --git a/llvm/test/CodeGen/Mips/long-call-mcount.ll b/llvm/test/CodeGen/Mips/long-call-mcount.ll
index 70a4410d060ba3..580f452526f736 100644
--- a/llvm/test/CodeGen/Mips/long-call-mcount.ll
+++ b/llvm/test/CodeGen/Mips/long-call-mcount.ll
@@ -1,8 +1,8 @@
 ; Check call to mcount in case of long/short call options.
 ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,LONG %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,LONG %s
 ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,SHORT %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,SHORT %s
 
 ; Function Attrs: noinline nounwind optnone
 define void @foo() #0 {
diff --git a/llvm/test/CodeGen/Mips/micromips-b-range.ll b/llvm/test/CodeGen/Mips/micromips-b-range.ll
index 5831ae81baedad..27a0db545f743f 100644
--- a/llvm/test/CodeGen/Mips/micromips-b-range.ll
+++ b/llvm/test/CodeGen/Mips/micromips-b-range.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT:    1e:	fb fd 00 00 	sw	$ra, 0($sp)
 ; CHECK-NEXT:    22:	41 a1 00 01 	lui	$1, 1
 ; CHECK-NEXT:    26:	40 60 00 02 	bal	8 <foo+0x2e>
-; CHECK-NEXT:    2a:	30 21 04 68 	addiu	$1, $1, 1128
+; CHECK-NEXT:    2a:	30 21 04 69 	addiu	$1, $1, 1129
 ; CHECK-NEXT:    2e:	00 3f 09 50 	addu	$1, $ra, $1
 ; CHECK-NEXT:    32:	ff fd 00 00 	lw	$ra, 0($sp)
 ; CHECK-NEXT:    36:	00 01 0f 3c 	jr	$1
@@ -27,7 +27,7 @@
 ; CHECK-NEXT:    56:	fb fd 00 00 	sw	$ra, 0($sp)
 ; CHECK-NEXT:    5a:	41 a1 00 01 	lui	$1, 1
 ; CHECK-NEXT:    5e:	40 60 00 02 	bal	8 <foo+0x66>
-; CHECK-NEXT:    62:	30 21 04 5c 	addiu	$1, $1, 1116
+; CHECK-NEXT:    62:	30 21 04 5d 	addiu	$1, $1, 1117
 ; CHECK-NEXT:    66:	00 3f 09 50 	addu	$1, $ra, $1
 ; CHECK-NEXT:    6a:	ff fd 00 00 	lw	$ra, 0($sp)
 ; CHECK-NEXT:    6e:	00 01 0f 3c 	jr	$1
@@ -39,7 +39,7 @@
 ; CHECK-NEXT:    86:	fb fd 00 00 	sw	$ra, 0($sp)
 ; CHECK-NEXT:    8a:	41 a1 00 01 	lui	$1, 1
 ; CHECK-NEXT:    8e:	40 60 00 02 	bal	8 <foo+0x96>
-; CHECK-NEXT:    92:	30 21 04 2c 	addiu	$1, $1, 1068
+; CHECK-NEXT:    92:	30 21 04 2d 	addiu	$1, $1, 1069
 ; CHECK-NEXT:    96:	00 3f 09 50 	addu	$1, $ra, $1
 ; CHECK-NEXT:    9a:	ff fd 00 00 	lw	$ra, 0($sp)
 ; CHECK-NEXT:    9e:	00 01 0f 3c 	jr	$1
@@ -51,7 +51,7 @@
 ; CHECK-NEXT: 10476:	fb fd 00 00 	sw	$ra, 0($sp)
 ; CHECK-NEXT: 1047a:	41 a1 00 01 	lui	$1, 1
 ; CHECK-NEXT: 1047e:	40 60 00 02 	bal	8 <foo+0x10486>
-; CHECK-NEXT: 10482:	30 21 04 00 	addiu	$1, $1, 1024
+; CHECK-NEXT: 10482:	30 21 04 01 	addiu	$1, $1, 1025
 ; CHECK-NEXT: 10486:	00 3f 09 50 	addu	$1, $ra, $1
 ; CHECK-NEXT: 1048a:	ff fd 00 00 	lw	$ra, 0($sp)
 ; CHECK-NEXT: 1048e:	00 01 0f 3c 	jr	$1
diff --git a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll
index 9105e9249d4f02..8544a75c50a628 100644
--- a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll
+++ b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR5,MIPS32-O32,MIPS32R5-O32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N32,MIPS64R5-N32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N64,MIPS64R5-N64
 
 ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR6,MIPSR6-O32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N32,MIPSR6-N32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N64,MIPSR6-N64
 
 
diff --git a/llvm/test/CodeGen/Mips/o32_cc_byval.ll b/llvm/test/CodeGen/Mips/o32_cc_byval.ll
index 19eb80b79bafe8..d9951ebeaf3a99 100644
--- a/llvm/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/llvm/test/CodeGen/Mips/o32_cc_byval.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic \
+; RUN:     -mips-jalr-reloc=false < %s | FileCheck %s
 
 %0 = type { i8, i16, i32, i64, double, i32, [4 x i8] }
 %struct.S1 = type { i8, i16, i32, i64, double, i32 }
diff --git a/llvm/test/CodeGen/Mips/reloc-jalr.ll b/llvm/test/CodeGen/Mips/reloc-jalr.ll
new file mode 100644
index 00000000000000..f8fd903110045d
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/reloc-jalr.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips32r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R6,TAILCALL-32R6
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips64r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R6,TAILCALL-64R6
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips32r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips64r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mattr=+micromips -mcpu=mips32r2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM,TAILCALL-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mattr=+micromips -mcpu=mips32r6 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips32r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R6
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips64r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R6
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips32r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips64r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mattr=+micromips -mcpu=mips32r2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mattr=+micromips -mcpu=mips32r6 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O0 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \
+; RUN:     -O0 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -mips-tail-calls=1 \
+; RUN:     -O2 -relocation-model=static < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static \
+; RUN:     -O0 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+define internal void @foo() noinline {
+entry:
+  ret void
+}
+
+define void @checkCall() {
+entry:
+; ALL-LABEL: checkCall:
+  call void @foo()
+;	JALR-32R2: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; JALR-32R2-NEXT: [[TMPLABEL]]:
+;	JALR-32R2-NEXT: 	jalr	$25
+
+;	JALR-64R2: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; JALR-64R2-NEXT: [[TMPLABEL]]:
+;	JALR-64R2-NEXT: 	jalr	$25
+
+;	JALR-MM: 	.reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo
+; JALR-MM-NEXT: [[TMPLABEL]]:
+;	JALR-MM-NEXT: 	jalr	$25
+
+;	JALR-32R6: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; JALR-32R6-NEXT: [[TMPLABEL]]:
+;	JALR-32R6-NEXT: 	jalrc	$25
+
+;	JALR-64R6: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; JALR-64R6-NEXT: [[TMPLABEL]]:
+;	JALR-64R6-NEXT: 	jalrc	$25
+
+; NORELOC-NOT: R_MIPS_JALR
+ ret void
+}
+
+define void @checkTailCall() {
+entry:
+; ALL-LABEL: checkTailCall:
+  tail call void @foo()
+;	TAILCALL-32R2: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; TAILCALL-32R2-NEXT: [[TMPLABEL]]:
+;	TAILCALL-32R2-NEXT: 	jr	$25
+
+;	TAILCALL-64R2: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; TAILCALL-64R2-NEXT: [[TMPLABEL]]:
+;	TAILCALL-64R2-NEXT: 	jr	$25
+
+;	TAILCALL-MM: 	.reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo
+; TAILCALL-MM-NEXT: [[TMPLABEL]]:
+;	TAILCALL-MM-NEXT: 	jrc	$25
+
+;	TAILCALL-32R6: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; TAILCALL-32R6-NEXT: [[TMPLABEL]]:
+;	TAILCALL-32R6-NEXT: 	jrc	$25
+
+;	TAILCALL-64R6: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; TAILCALL-64R6-NEXT: [[TMPLABEL]]:
+;	TAILCALL-64R6-NEXT: 	jrc	$25
+
+; NORELOC-NOT: R_MIPS_JALR
+  ret void
+}
diff --git a/llvm/test/CodeGen/Mips/shrink-wrapping.ll b/llvm/test/CodeGen/Mips/shrink-wrapping.ll
index 54ae8699d1c19c..b08d2f1b64678f 100644
--- a/llvm/test/CodeGen/Mips/shrink-wrapping.ll
+++ b/llvm/test/CodeGen/Mips/shrink-wrapping.ll
@@ -9,11 +9,11 @@
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-STATIC
 
 ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=true \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=SHRINK-WRAP-PIC
 
 ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=false \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-PIC
 
 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \
@@ -25,11 +25,11 @@
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-STATIC
 
 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=SHRINK-WRAP-64-PIC
 
 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=false \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-PIC
 
 declare void @f(i32 signext)
diff --git a/llvm/test/CodeGen/SystemZ/memcmp-01.ll b/llvm/test/CodeGen/SystemZ/memcmp-01.ll
index ac980e49d60bc1..740a86750dd8a6 100644
--- a/llvm/test/CodeGen/SystemZ/memcmp-01.ll
+++ b/llvm/test/CodeGen/SystemZ/memcmp-01.ll
@@ -16,10 +16,10 @@ define i32 @f1(i8 *%src1, i8 *%src2) {
 ; Check a case where the result is used as an integer.
 define i32 @f2(i8 *%src1, i8 *%src2) {
 ; CHECK-LABEL: f2:
-; CHECK: clc 0(2,%r2), 0(%r3)
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: srl [[REG]], 28
-; CHECK: rll %r2, [[REG]], 31
+; CHECK: clc 0(2,%r3), 0(%r2)
+; CHECK: ipm %r2
+; CHECK: sll %r2, 2
+; CHECK: sra %r2, 30
 ; CHECK: br %r14
   %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
   ret i32 %res
@@ -28,7 +28,7 @@ define i32 @f2(i8 *%src1, i8 *%src2) {
 ; Check a case where the result is tested for equality.
 define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f3:
-; CHECK: clc 0(3,%r2), 0(%r3)
+; CHECK: clc 0(3,%r3), 0(%r2)
 ; CHECK-NEXT: ber %r14
 ; CHECK: br %r14
   %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
@@ -46,7 +46,7 @@ exit:
 ; Check a case where the result is tested for inequality.
 define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f4:
-; CHECK: clc 0(4,%r2), 0(%r3)
+; CHECK: clc 0(4,%r3), 0(%r2)
 ; CHECK-NEXT: blhr %r14
 ; CHECK: br %r14
 entry:
@@ -65,8 +65,8 @@ exit:
 ; Check a case where the result is tested via slt.
 define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f5:
-; CHECK: clc 0(5,%r2), 0(%r3)
-; CHECK-NEXT: blr %r14
+; CHECK: clc 0(5,%r3), 0(%r2)
+; CHECK-NEXT: bhr %r14
 ; CHECK: br %r14
 entry:
   %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
@@ -84,8 +84,8 @@ exit:
 ; Check a case where the result is tested for sgt.
 define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f6:
-; CHECK: clc 0(6,%r2), 0(%r3)
-; CHECK-NEXT: bhr %r14
+; CHECK: clc 0(6,%r3), 0(%r2)
+; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
   %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
@@ -104,10 +104,10 @@ exit:
 ; an integer and for branching.
 define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f7:
-; CHECK: clc 0(256,%r2), 0(%r3)
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: srl [[REG]], 28
-; CHECK: rll %r2, [[REG]], 31
+; CHECK: clc 0(256,%r3), 0(%r2)
+; CHECK: ipm %r2
+; CHECK: sll %r2, 2
+; CHECK: sra %r2, 30
 ; CHECK: blr %r14
 ; CHECK: br %r14
 entry:
@@ -126,9 +126,9 @@ exit:
 ; 257 bytes needs two CLCs.
 define i32 @f8(i8 *%src1, i8 *%src2) {
 ; CHECK-LABEL: f8:
-; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: clc 0(256,%r3), 0(%r2)
 ; CHECK: jlh [[LABEL:\..*]]
-; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: clc 256(1,%r3), 256(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: ipm [[REG:%r[0-5]]]
 ; CHECK: br %r14
@@ -139,11 +139,11 @@ define i32 @f8(i8 *%src1, i8 *%src2) {
 ; Test a comparison of 258 bytes in which the CC result can be used directly.
 define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f9:
-; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: clc 0(256,%r3), 0(%r2)
 ; CHECK: jlh [[LABEL:\..*]]
-; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: clc 256(1,%r3), 256(%r2)
 ; CHECK: [[LABEL]]:
-; CHECK-NEXT: blr %r14
+; CHECK-NEXT: bhr %r14
 ; CHECK: br %r14
 entry:
   %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
@@ -161,9 +161,9 @@ exit:
 ; Test the largest size that can use two CLCs.
 define i32 @f10(i8 *%src1, i8 *%src2) {
 ; CHECK-LABEL: f10:
-; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: clc 0(256,%r3), 0(%r2)
 ; CHECK: jlh [[LABEL:\..*]]
-; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: clc 256(256,%r3), 256(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: ipm [[REG:%r[0-5]]]
 ; CHECK: br %r14
@@ -174,11 +174,11 @@ define i32 @f10(i8 *%src1, i8 *%src2) {
 ; Test the smallest size that needs 3 CLCs.
 define i32 @f11(i8 *%src1, i8 *%src2) {
 ; CHECK-LABEL: f11:
-; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: clc 0(256,%r3), 0(%r2)
 ; CHECK: jlh [[LABEL:\..*]]
-; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: clc 256(256,%r3), 256(%r2)
 ; CHECK: jlh [[LABEL]]
-; CHECK: clc 512(1,%r2), 512(%r3)
+; CHECK: clc 512(1,%r3), 512(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: ipm [[REG:%r[0-5]]]
 ; CHECK: br %r14
@@ -189,11 +189,11 @@ define i32 @f11(i8 *%src1, i8 *%src2) {
 ; Test the largest size than can use 3 CLCs.
 define i32 @f12(i8 *%src1, i8 *%src2) {
 ; CHECK-LABEL: f12:
-; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: clc 0(256,%r3), 0(%r2)
 ; CHECK: jlh [[LABEL:\..*]]
-; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: clc 256(256,%r3), 256(%r2)
 ; CHECK: jlh [[LABEL]]
-; CHECK: clc 512(256,%r2), 512(%r3)
+; CHECK: clc 512(256,%r3), 512(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: ipm [[REG:%r[0-5]]]
 ; CHECK: br %r14
@@ -207,12 +207,12 @@ define i32 @f13(i8 *%src1, i8 *%src2) {
 ; CHECK-LABEL: f13:
 ; CHECK: lghi [[COUNT:%r[0-5]]], 3
 ; CHECK: [[LOOP:.L[^:]*]]:
-; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: clc 0(256,%r3), 0(%r2)
 ; CHECK: jlh [[LABEL:\..*]]
 ; CHECK-DAG: la %r2, 256(%r2)
 ; CHECK-DAG: la %r3, 256(%r3)
 ; CHECK: brctg [[COUNT]], [[LOOP]]
-; CHECK: clc 0(1,%r2), 0(%r3)
+; CHECK: clc 0(1,%r3), 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: ipm [[REG:%r[0-5]]]
 ; CHECK: br %r14
diff --git a/llvm/test/CodeGen/SystemZ/strcmp-01.ll b/llvm/test/CodeGen/SystemZ/strcmp-01.ll
index ef05d832e73e95..a3e3bbbb23be7f 100644
--- a/llvm/test/CodeGen/SystemZ/strcmp-01.ll
+++ b/llvm/test/CodeGen/SystemZ/strcmp-01.ll
@@ -9,12 +9,12 @@ define i32 @f1(i8 *%src1, i8 *%src2) {
 ; CHECK-LABEL: f1:
 ; CHECK: lhi %r0, 0
 ; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: clst %r2, %r3
+; CHECK: clst %r3, %r2
 ; CHECK-NEXT: jo [[LABEL]]
 ; CHECK-NEXT: %bb.{{[0-9]+}}
-; CHECK-NEXT: ipm [[REG:%r[0-5]]]
-; CHECK: srl [[REG]], 28
-; CHECK: rll %r2, [[REG]], 31
+; CHECK-NEXT: ipm %r2
+; CHECK: sll %r2, 2
+; CHECK: sra %r2, 30
 ; CHECK: br %r14
   %res = call i32 @strcmp(i8 *%src1, i8 *%src2)
   ret i32 %res
@@ -25,7 +25,7 @@ define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f2:
 ; CHECK: lhi %r0, 0
 ; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: clst %r2, %r3
+; CHECK: clst %r3, %r2
 ; CHECK-NEXT: jo [[LABEL]]
 ; CHECK-NEXT: %bb.{{[0-9]+}}
 ; CHECK-NEXT: ber %r14
@@ -48,12 +48,12 @@ define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
 ; CHECK-LABEL: f3:
 ; CHECK: lhi %r0, 0
 ; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: clst %r2, %r3
+; CHECK: clst %r3, %r2
 ; CHECK-NEXT: jo [[LABEL]]
 ; CHECK-NEXT: %bb.{{[0-9]+}}
-; CHECK-NEXT: ipm [[REG:%r[0-5]]]
-; CHECK: srl [[REG]], 28
-; CHECK: rll %r2, [[REG]], 31
+; CHECK-NEXT: ipm %r2
+; CHECK: sll %r2, 2
+; CHECK: sra %r2, 30
 ; CHECK: blr %r14
 ; CHECK: br %r14
 entry:
diff --git a/llvm/test/CodeGen/WebAssembly/call.ll b/llvm/test/CodeGen/WebAssembly/call.ll
index db666a6c36686e..77f17c850edab4 100644
--- a/llvm/test/CodeGen/WebAssembly/call.ll
+++ b/llvm/test/CodeGen/WebAssembly/call.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-temporary-workarounds=false -mattr=+sign-ext,+simd128 | FileCheck %s
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel -fast-isel-abort=1 -wasm-temporary-workarounds=false -mattr=+sign-ext,+simd128 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -mattr=+sign-ext,+simd128 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel -fast-isel-abort=1 -mattr=+sign-ext,+simd128 | FileCheck %s
 
 ; Test that basic call operations assemble as expected.
 
diff --git a/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll b/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll
index 515c5703d86c06..b542276e068f6d 100644
--- a/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll
+++ b/llvm/test/CodeGen/WebAssembly/function-bitcasts-varargs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false -wasm-keep-registers | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -wasm-keep-registers | FileCheck %s
 
 ; Test that function pointer casts casting away varargs are replaced with
 ; wrappers.
diff --git a/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll b/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll
index a779cbe414225e..813e8420ae54bb 100644
--- a/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll
+++ b/llvm/test/CodeGen/WebAssembly/function-bitcasts.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions -wasm-temporary-workarounds=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions | FileCheck %s
 
 ; Test that function pointer casts are replaced with wrappers.
 
diff --git a/llvm/test/CodeGen/WebAssembly/import-module.ll b/llvm/test/CodeGen/WebAssembly/import-module.ll
index a8202a77acb5ae..0cf0f2f25e0b93 100644
--- a/llvm/test/CodeGen/WebAssembly/import-module.ll
+++ b/llvm/test/CodeGen/WebAssembly/import-module.ll
@@ -12,8 +12,9 @@ define void @test() {
 declare void @foo() #0
 declare void @plain()
 
-attributes #0 = { "wasm-import-module"="bar" }
+attributes #0 = { "wasm-import-module"="bar" "wasm-import-name"="qux" }
 
 ; CHECK-NOT: .import_module plain
 ;     CHECK: .import_module foo, bar
+;     CHECK: .import_name foo, qux
 ; CHECK-NOT: .import_module plain
diff --git a/llvm/test/CodeGen/WebAssembly/main-declaration.ll b/llvm/test/CodeGen/WebAssembly/main-declaration.ll
index f9d68db2bae8e7..544f5588c5043c 100644
--- a/llvm/test/CodeGen/WebAssembly/main-declaration.ll
+++ b/llvm/test/CodeGen/WebAssembly/main-declaration.ll
@@ -1,20 +1,18 @@
-; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
 
 ; Test main functions with alternate signatures.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-declare void @main()
+declare i32 @main()
 
-define void @foo() {
-  call void @main()
-  ret void
+define i32 @foo() {
+  %t = call i32 @main()
+  ret i32 %t
 }
 
-; CHECK-NOT:   __original_main
 ; CHECK-LABEL: foo:
-; CHECK-NEXT:    .functype foo () -> ()
-; CHECK-NEXT:    call main@FUNCTION
+; CHECK-NEXT:    .functype foo () -> (i32)
+; CHECK-NEXT:    call __original_main@FUNCTION
 ; CHECK-NEXT:    end_function
-; CHECK-NOT:   __original_main
diff --git a/llvm/test/CodeGen/WebAssembly/main-no-args.ll b/llvm/test/CodeGen/WebAssembly/main-no-args.ll
index 0bc46717d97be3..97023e269454b6 100644
--- a/llvm/test/CodeGen/WebAssembly/main-no-args.ll
+++ b/llvm/test/CodeGen/WebAssembly/main-no-args.ll
@@ -1,18 +1,19 @@
-; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
 
 ; Test main functions with alternate signatures.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-define void @main() {
-  ret void
+define i32 @main() {
+  ret i32 0
 }
 
-; CHECK-LABEL: .L__original_main:
-; CHECK-NEXT: .functype .L__original_main () -> ()
+; CHECK-LABEL: __original_main:
+; CHECK-NEXT: .functype __original_main () -> (i32)
+; CHECK-NEXT: i32.const 0
 ; CHECK-NEXT: end_function
 
 ; CHECK-LABEL: main:
 ; CHECK-NEXT: .functype main (i32, i32) -> (i32)
-; CHECK:      call .L__original_main@FUNCTION
+; CHECK:      call __original_main@FUNCTION
diff --git a/llvm/test/CodeGen/WebAssembly/main-three-args.ll b/llvm/test/CodeGen/WebAssembly/main-three-args.ll
new file mode 100644
index 00000000000000..77b3e5b8c30631
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/main-three-args.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that main function with a non-standard third argument is
+; not wrapped.
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+define i32 @main(i32 %a, i8** %b, i8** %c) {
+  ret i32 0
+}
+
+; CHECK-LABEL: main:
+; CHECK-NEXT: .functype main (i32, i32, i32) -> (i32)
+
+; CHECK-NOT: __original_main:
diff --git a/llvm/test/CodeGen/WebAssembly/main-with-args.ll b/llvm/test/CodeGen/WebAssembly/main-with-args.ll
index d4a11ef14d46e5..205cb133f8ca68 100644
--- a/llvm/test/CodeGen/WebAssembly/main-with-args.ll
+++ b/llvm/test/CodeGen/WebAssembly/main-with-args.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -wasm-temporary-workarounds=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
 
 ; Test that main function with expected signature is not wrapped
 
diff --git a/llvm/test/CodeGen/X86/and-su.ll b/llvm/test/CodeGen/X86/and-su.ll
index 55bfa8def44f2e..de384368bfca5f 100644
--- a/llvm/test/CodeGen/X86/and-su.ll
+++ b/llvm/test/CodeGen/X86/and-su.ll
@@ -49,7 +49,7 @@ define fastcc double @bar(i32 %hash, double %x, double %y) nounwind {
 ; CHECK-NEXT:    fchs
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:  .LBB1_5: # %bb16
-; CHECK-NEXT:    faddp %st(1)
+; CHECK-NEXT:    faddp %st, %st(1)
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 617e198bce4c1b..de20c07d41399a 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1183,38 +1183,58 @@ define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
 define <4 x i32> @test_x86_avx2_psllv_d_const() {
 ; X86-AVX-LABEL: test_x86_avx2_psllv_d_const:
 ; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
-; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX-NEXT:    vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9]
+; X86-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const:
 ; X86-AVX512VL:       # %bb.0:
-; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u>
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
 ; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
 ; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX512VL-NEXT:    vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
-; X86-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295]
+; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9]
+; X86-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psllv_d_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
-; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9]
+; X64-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u>
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
 ; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
 ; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
-; X64-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295]
+; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9]
+; X64-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
   %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1,  i32 -1>)
@@ -1241,38 +1261,62 @@ define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
 define <8 x i32> @test_x86_avx2_psllv_d_256_const() {
 ; X86-AVX-LABEL: test_x86_avx2_psllv_d_256_const:
 ; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8]
-; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX-NEXT:    vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
+; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const:
 ; X86-AVX512VL:       # %bb.0:
-; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0>
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
 ; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
 ; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX512VL-NEXT:    vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
-; X86-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psllv_d_256_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8]
-; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0>
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
 ; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
 ; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
-; X64-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
   %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1>)
@@ -1316,14 +1360,20 @@ define <2 x i64> @test_x86_avx2_psllv_q_const() {
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psllv_q_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00]
-; X64-AVX-NEXT:    vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [4,18446744073709551615]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00]
-; X64-AVX512VL-NEXT:    vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,18446744073709551615]
+; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>)
   ret <2 x i64> %res
@@ -1366,15 +1416,19 @@ define <4 x i64> @test_x86_avx2_psllv_q_256_const() {
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psllv_q_256_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [8,8,8,8]
-; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [4,4,4,18446744073709551615]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8]
-; X64-AVX512VL-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,18446744073709551615]
+; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
 ; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
@@ -1400,38 +1454,62 @@ define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
 define <4 x i32> @test_x86_avx2_psrlv_d_const() {
 ; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const:
 ; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
-; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX-NEXT:    vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
+; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const:
 ; X86-AVX512VL:       # %bb.0:
-; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u>
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
 ; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
 ; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX512VL-NEXT:    vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
-; X86-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295]
+; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psrlv_d_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
-; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u>
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
 ; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
 ; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
-; X64-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295]
+; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
   %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1,  i32 -1>)
@@ -1458,38 +1536,62 @@ define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
 define <8 x i32> @test_x86_avx2_psrlv_d_256_const() {
 ; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256_const:
 ; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
-; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX-NEXT:    vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
+; X86-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X86-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
 ; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
+; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const:
 ; X86-AVX512VL:       # %bb.0:
-; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0>
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
 ; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
 ; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-AVX512VL-NEXT:    vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
-; X86-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X86-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
+; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
-; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0>
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
 ; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
 ; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
-; X64-AVX512VL-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
+; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
   %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1>)
@@ -1534,14 +1636,20 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() {
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
-; X64-AVX-NEXT:    vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
+; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [4,4]
+; X64-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
-; X64-AVX512VL-NEXT:    vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
+; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4]
+; X64-AVX512VL-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>)
   ret <2 x i64> %res
@@ -1585,15 +1693,19 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() {
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [2,2,2,2]
-; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
+; X64-AVX-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4]
+; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2]
-; X64-AVX512VL-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4]
+; X64-AVX512VL-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
 ; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index a0e8393309de0b..6a1d9d3da91709 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -5229,8 +5229,11 @@ define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) {
 define <16 x i32> @test_x86_avx512_psllv_d_512_const() {
 ; CHECK-LABEL: test_x86_avx512_psllv_d_512_const:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,u,0,32,5,u,0,80,3,u,0>
-; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0]
+; CHECK-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295]
+; CHECK-NEXT:    vpsllvd {{.*}}(%rip), %zmm1, %zmm1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
   %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1>)
@@ -5277,8 +5280,11 @@ define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) {
 define <8 x i64> @test_x86_avx512_psllv_q_512_const() {
 ; CHECK-LABEL: test_x86_avx512_psllv_q_512_const:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,18446744056529682432,0>
-; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0]
+; CHECK-NEXT:    vpsllvq {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615]
+; CHECK-NEXT:    vpsllvq {{.*}}(%rip), %zmm1, %zmm1
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
   %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1,  i64 1, i64 1, i64 1, i64 -1>)
@@ -5397,8 +5403,11 @@ define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) {
 define <16 x i32> @test_x86_avx512_psrlv_d_512_const() {
 ; CHECK-LABEL: test_x86_avx512_psrlv_d_512_const:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,u,0,0,5,u,0,0,3,u,0>
-; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0]
+; CHECK-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295]
+; CHECK-NEXT:    vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
   %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1  >)
@@ -5445,8 +5454,11 @@ define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) {
 define <8 x i64> @test_x86_avx512_psrlv_q_512_const() {
 ; CHECK-LABEL: test_x86_avx512_psrlv_q_512_const:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,1073741823,0>
-; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0]
+; CHECK-NEXT:    vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615]
+; CHECK-NEXT:    vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
   %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1,  i64 1, i64 1, i64 1, i64 -1>)
diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
index 1136a3a50693e8..985860166a3f23 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
@@ -508,17 +508,17 @@ define x86_regcallcc double @test_CallargRetDouble(double %a)  {
 define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
 ; X32-LABEL: test_argRetf80:
 ; X32:       # %bb.0:
-; X32-NEXT:    fadd %st(0), %st(0)
+; X32-NEXT:    fadd %st, %st(0)
 ; X32-NEXT:    retl
 ;
 ; WIN64-LABEL: test_argRetf80:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    fadd %st(0), %st(0)
+; WIN64-NEXT:    fadd %st, %st(0)
 ; WIN64-NEXT:    retq
 ;
 ; LINUXOSX64-LABEL: test_argRetf80:
 ; LINUXOSX64:       # %bb.0:
-; LINUXOSX64-NEXT:    fadd %st(0), %st(0)
+; LINUXOSX64-NEXT:    fadd %st, %st(0)
 ; LINUXOSX64-NEXT:    retq
   %r0 = fadd x86_fp80 %a0, %a0
   ret x86_fp80 %r0
@@ -529,9 +529,9 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
 ; X32-LABEL: test_CallargRetf80:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %esp
-; X32-NEXT:    fadd %st(0), %st(0)
+; X32-NEXT:    fadd %st, %st(0)
 ; X32-NEXT:    calll _test_argRetf80
-; X32-NEXT:    fadd %st(0), %st(0)
+; X32-NEXT:    fadd %st, %st(0)
 ; X32-NEXT:    popl %esp
 ; X32-NEXT:    retl
 ;
@@ -540,9 +540,9 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
 ; WIN64-NEXT:    pushq %rsp
 ; WIN64-NEXT:    .seh_pushreg 4
 ; WIN64-NEXT:    .seh_endprologue
-; WIN64-NEXT:    fadd %st(0), %st(0)
+; WIN64-NEXT:    fadd %st, %st(0)
 ; WIN64-NEXT:    callq test_argRetf80
-; WIN64-NEXT:    fadd %st(0), %st(0)
+; WIN64-NEXT:    fadd %st, %st(0)
 ; WIN64-NEXT:    popq %rsp
 ; WIN64-NEXT:    retq
 ; WIN64-NEXT:    .seh_handlerdata
@@ -554,9 +554,9 @@ define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
 ; LINUXOSX64-NEXT:    pushq %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
-; LINUXOSX64-NEXT:    fadd %st(0), %st(0)
+; LINUXOSX64-NEXT:    fadd %st, %st(0)
 ; LINUXOSX64-NEXT:    callq test_argRetf80
-; LINUXOSX64-NEXT:    fadd %st(0), %st(0)
+; LINUXOSX64-NEXT:    fadd %st, %st(0)
 ; LINUXOSX64-NEXT:    popq %rsp
 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index 8bcdc5d5c0298c..a220ab0ad73495 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1158,15 +1158,19 @@ declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind
 define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize {
 ; X86-LABEL: test_x86_avx512_psrlv_w_512_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; X86-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
+; X86-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X86-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_x86_avx512_psrlv_w_512_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; X64-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
+; X64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X64-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,  i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
@@ -1377,15 +1381,19 @@ declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind r
 define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize {
 ; X86-LABEL: test_x86_avx512_psllv_w_512_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; X86-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
+; X86-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X86-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_x86_avx512_psllv_w_512_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; X64-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
+; X64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X64-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4,  i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,  i16 1, i16 1, i16 -1>)
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 10ba0e646055f2..a01252f7d494b7 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -2021,16 +2021,20 @@ define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1
 define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize {
 ; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2]
-; X86-NEXT:    # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
-; X86-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
+; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vpsrlvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2]
-; X64-NEXT:    # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
-; X64-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
+; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vpsrlvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
   ret <8 x i16> %res
@@ -2041,16 +2045,20 @@ declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>)
 define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize {
 ; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; X86-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
-; X86-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vpsrlvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; X64-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
-; X64-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vpsrlvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
   ret <16 x i16> %res
@@ -2195,16 +2203,20 @@ define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1
 define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize {
 ; X86-LABEL: test_int_x86_avx512_psllv_w_128_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8]
-; X86-NEXT:    # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
-; X86-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
+; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vpsllvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_int_x86_avx512_psllv_w_128_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8]
-; X64-NEXT:    # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
-; X64-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
+; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vpsllvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
   ret <8 x i16> %res
@@ -2216,16 +2228,20 @@ declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>)
 define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize {
 ; X86-LABEL: test_int_x86_avx512_psllv_w_256_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; X86-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
-; X86-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X86-NEXT:    vpsllvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
+; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_int_x86_avx512_psllv_w_256_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; X64-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
-; X64-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
+; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-NEXT:    vpsllvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
+; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
   ret <16 x i16> %res
diff --git a/llvm/test/CodeGen/X86/debug-loclists.ll b/llvm/test/CodeGen/X86/debug-loclists.ll
index 20bc0c40378b90..0c2ab3dfad5a90 100644
--- a/llvm/test/CodeGen/X86/debug-loclists.ll
+++ b/llvm/test/CodeGen/X86/debug-loclists.ll
@@ -11,7 +11,7 @@
 ; CHECK-NEXT:               DW_AT_type [DW_FORM_ref4]     (cu + 0x0040 => {0x00000040} "A")
 
 ; CHECK:      .debug_loclists contents:
-; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000017, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000015, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 ; CHECK-NEXT: 0x00000000:
 ; CHECK-NEXT:  [0x0000000000000000, 0x0000000000000004): DW_OP_breg5 RDI+0
 ; CHECK-NEXT:  [0x0000000000000004, 0x0000000000000012): DW_OP_breg3 RBX+0
@@ -32,13 +32,13 @@
 ; ASM-NEXT:  .byte 4                               # DW_LLE_offset_pair
 ; ASM-NEXT:  .uleb128 .Lfunc_begin0-.Lfunc_begin0  # starting offset
 ; ASM-NEXT:  .uleb128 .Ltmp0-.Lfunc_begin0         # ending offset
-; ASM-NEXT:  .short 2                              # Loc expr size
+; ASM-NEXT:  .byte 2                               # Loc expr size
 ; ASM-NEXT:  .byte 117                             # DW_OP_breg5
 ; ASM-NEXT:  .byte 0                               # 0
 ; ASM-NEXT:  .byte 4                               # DW_LLE_offset_pair
 ; ASM-NEXT:  .uleb128 .Ltmp0-.Lfunc_begin0         # starting offset
 ; ASM-NEXT:  .uleb128 .Ltmp1-.Lfunc_begin0         # ending offset
-; ASM-NEXT:  .short 2                              # Loc expr size
+; ASM-NEXT:  .byte 2                               # Loc expr size
 ; ASM-NEXT:  .byte 115                             # DW_OP_breg3
 ; ASM-NEXT:  .byte 0                               # 0
 ; ASM-NEXT:  .byte 0                               # DW_LLE_end_of_list
diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops.ll
index b77a91fafd2cd8..a30dc22a0d9c02 100644
--- a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll
+++ b/llvm/test/CodeGen/X86/discriminate-mem-ops.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -x86-discriminate-memops  < %s | FileCheck %s
 ;
 ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
 ; int sum(int* arr, int pos1, int pos2) {
diff --git a/llvm/test/CodeGen/X86/fcmove.ll b/llvm/test/CodeGen/X86/fcmove.ll
index 35dbb68117ba27..6bb014858d0485 100644
--- a/llvm/test/CodeGen/X86/fcmove.ll
+++ b/llvm/test/CodeGen/X86/fcmove.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown"
 ; Test that we can generate an fcmove, and also that it passes verification.
 
 ; CHECK-LABEL: cmove_f
-; CHECK: fcmove %st({{[0-7]}}), %st(0)
+; CHECK: fcmove %st({{[0-7]}}), %st
 define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) {
   %test = icmp eq i32 %c, 0
   %add = fadd x86_fp80 %a, %b
diff --git a/llvm/test/CodeGen/X86/fmf-flags.ll b/llvm/test/CodeGen/X86/fmf-flags.ll
index 4fb2040b338d93..bb883e92dc1104 100644
--- a/llvm/test/CodeGen/X86/fmf-flags.ll
+++ b/llvm/test/CodeGen/X86/fmf-flags.ll
@@ -20,7 +20,7 @@ define float @fast_recip_sqrt(float %x) {
 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-NEXT:    fsqrt
 ; X86-NEXT:    fld1
-; X86-NEXT:    fdivp %st(1)
+; X86-NEXT:    fdivp %st, %st(1)
 ; X86-NEXT:    retl
   %y = call fast float @llvm.sqrt.f32(float %x)
   %z = fdiv fast float 1.0,  %y
@@ -95,7 +95,7 @@ define float @not_so_fast_recip_sqrt(float %x) {
 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-NEXT:    fsqrt
 ; X86-NEXT:    fld1
-; X86-NEXT:    fdiv %st(1)
+; X86-NEXT:    fdiv %st(1), %st
 ; X86-NEXT:    fxch %st(1)
 ; X86-NEXT:    fstps sqrt1
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/fp-cvt.ll b/llvm/test/CodeGen/X86/fp-cvt.ll
index ab3d40ddcaa527..71738cb85d2e42 100644
--- a/llvm/test/CodeGen/X86/fp-cvt.ll
+++ b/llvm/test/CodeGen/X86/fp-cvt.ll
@@ -486,7 +486,7 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
 ; X64-X87-NEXT:    xorl %eax, %eax
 ; X64-X87-NEXT:    fxch %st(1)
 ; X64-X87-NEXT:    fucompi %st(2)
-; X64-X87-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-X87-NEXT:    fcmovnbe %st(1), %st
 ; X64-X87-NEXT:    fstp %st(1)
 ; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
 ; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
@@ -509,7 +509,7 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
 ; X64-SSSE3-NEXT:    xorl %eax, %eax
 ; X64-SSSE3-NEXT:    fxch %st(1)
 ; X64-SSSE3-NEXT:    fucompi %st(2)
-; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st
 ; X64-SSSE3-NEXT:    fstp %st(1)
 ; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
 ; X64-SSSE3-NEXT:    setbe %al
@@ -568,7 +568,7 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
 ; X64-X87-NEXT:    xorl %eax, %eax
 ; X64-X87-NEXT:    fxch %st(1)
 ; X64-X87-NEXT:    fucompi %st(2)
-; X64-X87-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-X87-NEXT:    fcmovnbe %st(1), %st
 ; X64-X87-NEXT:    fstp %st(1)
 ; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
 ; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
@@ -591,7 +591,7 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
 ; X64-SSSE3-NEXT:    xorl %eax, %eax
 ; X64-SSSE3-NEXT:    fxch %st(1)
 ; X64-SSSE3-NEXT:    fucompi %st(2)
-; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st
 ; X64-SSSE3-NEXT:    fstp %st(1)
 ; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
 ; X64-SSSE3-NEXT:    setbe %al
diff --git a/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll b/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll
new file mode 100644
index 00000000000000..34a77ea5fecdbe
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i686 -stop-after=expand-isel-pseudos | FileCheck %s
+
+; CHECK: INLINEASM &"", 1, 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
+define void @foo() {
+entry:
+  call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"()
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
index 1c36d31c480b58..db6127acb0ae72 100644
--- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -75,20 +75,20 @@ define void @test6(double %A, double %B, double %C, double %D, double %E) nounwi
 ; CHECK-NEXT:    fldl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fldl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    foo %st(0) %st(0)
+; CHECK-NEXT:    foo %st %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fstp %st(0)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    bar %st(1) %st(0)
+; CHECK-NEXT:    bar %st(1) %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fstp %st(1)
 ; CHECK-NEXT:    fstp %st(0)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    baz %st(1) %st(0)
+; CHECK-NEXT:    baz %st(1) %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fstp %st(0)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    baz %st(0)
+; CHECK-NEXT:    baz %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fstp %st(0)
 ; CHECK-NEXT:    retl
@@ -117,10 +117,10 @@ define void @testPR4185() {
 ; CHECK-NEXT:    flds LCPI6_0
 ; CHECK-NEXT:    fld %st(0)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistpl %st(0)
+; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistpl %st(0)
+; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retl
 return:
@@ -138,10 +138,10 @@ define void @testPR4185b() {
 ; CHECK:       ## %bb.0: ## %return
 ; CHECK-NEXT:    flds LCPI7_0
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistl %st(0)
+; CHECK-NEXT:    fistl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistpl %st(0)
+; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retl
 return:
@@ -163,7 +163,7 @@ define void @testPR4459(x86_fp80 %a) {
 ; CHECK-NEXT:    fld %st(0)
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistpl %st(0)
+; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fstpt (%esp)
 ; CHECK-NEXT:    calll _test3
@@ -191,7 +191,7 @@ define void @testPR4484(x86_fp80 %a) {
 ; CHECK-NEXT:    calll _test1
 ; CHECK-NEXT:    fldt {{[0-9]+}}(%esp) ## 10-byte Folded Reload
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistpl %st(0)
+; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fstpt (%esp)
 ; CHECK-NEXT:    calll _test3
@@ -211,18 +211,18 @@ define void @testPR4485(x86_fp80* %a) {
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    fldt (%eax)
 ; CHECK-NEXT:    flds LCPI10_0
-; CHECK-NEXT:    fmul %st(0), %st(1)
+; CHECK-NEXT:    fmul %st, %st(1)
 ; CHECK-NEXT:    flds LCPI10_1
-; CHECK-NEXT:    fmul %st(0), %st(2)
+; CHECK-NEXT:    fmul %st, %st(2)
 ; CHECK-NEXT:    fxch %st(2)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistpl %st(0)
+; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fldt (%eax)
-; CHECK-NEXT:    fmulp %st(1)
-; CHECK-NEXT:    fmulp %st(1)
+; CHECK-NEXT:    fmulp %st, %st(1)
+; CHECK-NEXT:    fmulp %st, %st(1)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fistpl %st(0)
+; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retl
 entry:
@@ -422,7 +422,7 @@ define i32 @PR10602() nounwind ssp {
 ; CHECK-NEXT:    fld %st(0)
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    fcomi %st(1), %st(0); pushf; pop %eax
+; CHECK-NEXT:    fcomi %st(1), %st; pushf; pop %eax
 ; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    fstp %st(0)
 ; CHECK-NEXT:    fstp %st(0)
@@ -505,9 +505,9 @@ define double @test_operand_rewrite() {
 ; CHECK-LABEL: test_operand_rewrite:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    ## InlineAsm Start
-; CHECK-NEXT:    foo %st(0), %st(1)
+; CHECK-NEXT:    foo %st, %st(1)
 ; CHECK-NEXT:    ## InlineAsm End
-; CHECK-NEXT:    fsubp %st(1)
+; CHECK-NEXT:    fsubp %st, %st(1)
 ; CHECK-NEXT:    retl
 entry:
   %0 = tail call { double, double } asm sideeffect "foo $0, $1", "={st},={st(1)},~{dirflag},~{fpsr},~{flags}"()
diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll b/llvm/test/CodeGen/X86/insert-prefetch-inline.ll
index 5f8373f9480c96..62c02fa33291d2 100644
--- a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll
+++ b/llvm/test/CodeGen/X86/insert-prefetch-inline.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s
 ;
 ; Verify we can insert prefetch instructions in code belonging to inlined
 ; functions.
diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
index 004fb56a56eb8a..d0c4ac378b63b2 100644
--- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
+++ b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s
 ; ModuleID = 'prefetch.cc'
 source_filename = "prefetch.cc"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/CodeGen/X86/insert-prefetch.ll b/llvm/test/CodeGen/X86/insert-prefetch.ll
index 9e77772df7746b..fe0fd9877f193e 100644
--- a/llvm/test/CodeGen/X86/insert-prefetch.ll
+++ b/llvm/test/CodeGen/X86/insert-prefetch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS
 ;
 ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
 ; int sum(int* arr, int pos1, int pos2) {
diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
index 3e57ef21844267..2a557f2902a21d 100644
--- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
@@ -3,7 +3,7 @@
 target triple = "x86_64-unknown-unknown"
 declare void @bar1()
 define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh
   call void @bar1()
   call void @bar2()
   ret void
diff --git a/llvm/test/CodeGen/X86/pr13577.ll b/llvm/test/CodeGen/X86/pr13577.ll
index e0e90f81bc42c3..3f9e2f953bae37 100644
--- a/llvm/test/CodeGen/X86/pr13577.ll
+++ b/llvm/test/CodeGen/X86/pr13577.ll
@@ -14,7 +14,7 @@ define x86_fp80 @foo(x86_fp80 %a) {
 ; CHECK-NEXT:    testb $-128, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    flds {{.*}}(%rip)
 ; CHECK-NEXT:    flds {{.*}}(%rip)
-; CHECK-NEXT:    fcmovne %st(1), %st(0)
+; CHECK-NEXT:    fcmovne %st(1), %st
 ; CHECK-NEXT:    fstp %st(1)
 ; CHECK-NEXT:    retq
   %1 = tail call x86_fp80 @copysignl(x86_fp80 0xK7FFF8000000000000000, x86_fp80 %a) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll
index 63edae044f87cb..9aa28384f4e81d 100644
--- a/llvm/test/CodeGen/X86/pr33349.ll
+++ b/llvm/test/CodeGen/X86/pr33349.ll
@@ -19,18 +19,18 @@ target triple = "x86_64-unknown-linux-gnu"
 ; KNL-NEXT:    fld1
 ; KNL-NEXT:    fldz
 ; KNL-NEXT:    fld %st(0)
-; KNL-NEXT:    fcmovne %st(2), %st(0)
+; KNL-NEXT:    fcmovne %st(2), %st
 ; KNL-NEXT:    testb $1, %cl
 ; KNL-NEXT:    fld %st(1)
-; KNL-NEXT:    fcmovne %st(3), %st(0)
+; KNL-NEXT:    fcmovne %st(3), %st
 ; KNL-NEXT:    kmovw %k2, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld %st(2)
-; KNL-NEXT:    fcmovne %st(4), %st(0)
+; KNL-NEXT:    fcmovne %st(4), %st
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fxch %st(3)
-; KNL-NEXT:    fcmovne %st(4), %st(0)
+; KNL-NEXT:    fcmovne %st(4), %st
 ; KNL-NEXT:    fstp %st(4)
 ; KNL-NEXT:    fxch %st(3)
 ; KNL-NEXT:    fstpt (%rdi)
@@ -55,18 +55,18 @@ target triple = "x86_64-unknown-linux-gnu"
 ; SKX-NEXT:    fld1
 ; SKX-NEXT:    fldz
 ; SKX-NEXT:    fld %st(0)
-; SKX-NEXT:    fcmovne %st(2), %st(0)
+; SKX-NEXT:    fcmovne %st(2), %st
 ; SKX-NEXT:    testb $1, %cl
 ; SKX-NEXT:    fld %st(1)
-; SKX-NEXT:    fcmovne %st(3), %st(0)
+; SKX-NEXT:    fcmovne %st(3), %st
 ; SKX-NEXT:    kmovd %k2, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fld %st(2)
-; SKX-NEXT:    fcmovne %st(4), %st(0)
+; SKX-NEXT:    fcmovne %st(4), %st
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fxch %st(3)
-; SKX-NEXT:    fcmovne %st(4), %st(0)
+; SKX-NEXT:    fcmovne %st(4), %st
 ; SKX-NEXT:    fstp %st(4)
 ; SKX-NEXT:    fxch %st(3)
 ; SKX-NEXT:    fstpt (%rdi)
diff --git a/llvm/test/CodeGen/X86/pr34080.ll b/llvm/test/CodeGen/X86/pr34080.ll
index a709a4840e5303..0b23ab7d4b5d79 100644
--- a/llvm/test/CodeGen/X86/pr34080.ll
+++ b/llvm/test/CodeGen/X86/pr34080.ll
@@ -27,7 +27,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; SSE2-NEXT:    movsd %xmm0, -32(%rbp)
 ; SSE2-NEXT:    fsubl -32(%rbp)
 ; SSE2-NEXT:    flds {{.*}}(%rip)
-; SSE2-NEXT:    fmul %st(0), %st(1)
+; SSE2-NEXT:    fmul %st, %st(1)
 ; SSE2-NEXT:    fnstcw -2(%rbp)
 ; SSE2-NEXT:    movzwl -2(%rbp), %eax
 ; SSE2-NEXT:    movw $3199, -2(%rbp) ## imm = 0xC7F
@@ -41,7 +41,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; SSE2-NEXT:    movsd %xmm0, -56(%rbp)
 ; SSE2-NEXT:    movsd %xmm0, -24(%rbp)
 ; SSE2-NEXT:    fsubl -24(%rbp)
-; SSE2-NEXT:    fmulp %st(1)
+; SSE2-NEXT:    fmulp %st, %st(1)
 ; SSE2-NEXT:    fstpl -48(%rbp)
 ; SSE2-NEXT:    popq %rbp
 ; SSE2-NEXT:    retq
@@ -65,12 +65,12 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -64(%rbp)
 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -32(%rbp)
 ; SSE2-SCHEDULE-NEXT:    fsubl -32(%rbp)
-; SSE2-SCHEDULE-NEXT:    fnstcw -2(%rbp)
 ; SSE2-SCHEDULE-NEXT:    flds {{.*}}(%rip)
+; SSE2-SCHEDULE-NEXT:    fnstcw -2(%rbp)
+; SSE2-SCHEDULE-NEXT:    fmul %st, %st(1)
 ; SSE2-SCHEDULE-NEXT:    movzwl -2(%rbp), %eax
 ; SSE2-SCHEDULE-NEXT:    movw $3199, -2(%rbp) ## imm = 0xC7F
 ; SSE2-SCHEDULE-NEXT:    fldcw -2(%rbp)
-; SSE2-SCHEDULE-NEXT:    fmul %st(0), %st(1)
 ; SSE2-SCHEDULE-NEXT:    movw %ax, -2(%rbp)
 ; SSE2-SCHEDULE-NEXT:    fxch %st(1)
 ; SSE2-SCHEDULE-NEXT:    fistl -12(%rbp)
@@ -80,7 +80,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -56(%rbp)
 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -24(%rbp)
 ; SSE2-SCHEDULE-NEXT:    fsubl -24(%rbp)
-; SSE2-SCHEDULE-NEXT:    fmulp %st(1)
+; SSE2-SCHEDULE-NEXT:    fmulp %st, %st(1)
 ; SSE2-SCHEDULE-NEXT:    fstpl -48(%rbp)
 ; SSE2-SCHEDULE-NEXT:    popq %rbp
 ; SSE2-SCHEDULE-NEXT:    retq
@@ -100,7 +100,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; SSE3-NEXT:    movsd %xmm0, -24(%rbp)
 ; SSE3-NEXT:    fsubl -24(%rbp)
 ; SSE3-NEXT:    flds {{.*}}(%rip)
-; SSE3-NEXT:    fmul %st(0), %st(1)
+; SSE3-NEXT:    fmul %st, %st(1)
 ; SSE3-NEXT:    fld %st(1)
 ; SSE3-NEXT:    fisttpl -8(%rbp)
 ; SSE3-NEXT:    xorps %xmm0, %xmm0
@@ -109,7 +109,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; SSE3-NEXT:    movsd %xmm0, -16(%rbp)
 ; SSE3-NEXT:    fxch %st(1)
 ; SSE3-NEXT:    fsubl -16(%rbp)
-; SSE3-NEXT:    fmulp %st(1)
+; SSE3-NEXT:    fmulp %st, %st(1)
 ; SSE3-NEXT:    fstpl -32(%rbp)
 ; SSE3-NEXT:    popq %rbp
 ; SSE3-NEXT:    retq
@@ -129,7 +129,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; AVX-NEXT:    vmovsd %xmm0, -24(%rbp)
 ; AVX-NEXT:    fsubl -24(%rbp)
 ; AVX-NEXT:    flds {{.*}}(%rip)
-; AVX-NEXT:    fmul %st(0), %st(1)
+; AVX-NEXT:    fmul %st, %st(1)
 ; AVX-NEXT:    fld %st(1)
 ; AVX-NEXT:    fisttpl -8(%rbp)
 ; AVX-NEXT:    vcvtsi2sdl -8(%rbp), %xmm1, %xmm0
@@ -137,7 +137,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
 ; AVX-NEXT:    vmovsd %xmm0, -16(%rbp)
 ; AVX-NEXT:    fxch %st(1)
 ; AVX-NEXT:    fsubl -16(%rbp)
-; AVX-NEXT:    fmulp %st(1)
+; AVX-NEXT:    fmulp %st, %st(1)
 ; AVX-NEXT:    fstpl -32(%rbp)
 ; AVX-NEXT:    popq %rbp
 ; AVX-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll
index 3fe56277fb4b3e..f6b8dec3c3b39d 100644
--- a/llvm/test/CodeGen/X86/pr34177.ll
+++ b/llvm/test/CodeGen/X86/pr34177.ll
@@ -20,17 +20,17 @@ define void @test(<4x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unna
 ; CHECK-NEXT:    fld1
 ; CHECK-NEXT:    fldz
 ; CHECK-NEXT:    fld %st(0)
-; CHECK-NEXT:    fcmove %st(2), %st(0)
+; CHECK-NEXT:    fcmove %st(2), %st
 ; CHECK-NEXT:    cmpq %rax, %rsi
 ; CHECK-NEXT:    fld %st(1)
-; CHECK-NEXT:    fcmove %st(3), %st(0)
+; CHECK-NEXT:    fcmove %st(3), %st
 ; CHECK-NEXT:    cmpq %rdx, %r9
 ; CHECK-NEXT:    fld %st(2)
-; CHECK-NEXT:    fcmove %st(4), %st(0)
+; CHECK-NEXT:    fcmove %st(4), %st
 ; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:    cmpq %r8, %rax
 ; CHECK-NEXT:    fxch %st(3)
-; CHECK-NEXT:    fcmove %st(4), %st(0)
+; CHECK-NEXT:    fcmove %st(4), %st
 ; CHECK-NEXT:    fstp %st(4)
 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fstpt 70(%rdi)
@@ -40,15 +40,15 @@ define void @test(<4x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unna
 ; CHECK-NEXT:    fstpt 30(%rdi)
 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fstpt 10(%rdi)
-; CHECK-NEXT:    fadd %st(0), %st(0)
+; CHECK-NEXT:    fadd %st, %st(0)
 ; CHECK-NEXT:    fstpt 60(%rdi)
 ; CHECK-NEXT:    fxch %st(1)
-; CHECK-NEXT:    fadd %st(0), %st(0)
+; CHECK-NEXT:    fadd %st, %st(0)
 ; CHECK-NEXT:    fstpt 40(%rdi)
 ; CHECK-NEXT:    fxch %st(1)
-; CHECK-NEXT:    fadd %st(0), %st(0)
+; CHECK-NEXT:    fadd %st, %st(0)
 ; CHECK-NEXT:    fstpt 20(%rdi)
-; CHECK-NEXT:    fadd %st(0), %st(0)
+; CHECK-NEXT:    fadd %st, %st(0)
 ; CHECK-NEXT:    fstpt (%rdi)
   %1 = icmp eq <4 x i64> <i64 0, i64 1, i64 2, i64 3>, %a
   %2 = select <4 x i1> %1, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/pr40529.ll b/llvm/test/CodeGen/X86/pr40529.ll
new file mode 100644
index 00000000000000..9520ac22d7491c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr40529.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64 | FileCheck %s
+
+define x86_fp80 @rem_pio2l_min(x86_fp80 %z) {
+; CHECK-LABEL: rem_pio2l_min:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
+; CHECK-NEXT:    fldcw -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fistl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldcw -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fisubl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    flds {{.*}}(%rip)
+; CHECK-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fmul %st, %st(1)
+; CHECK-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
+; CHECK-NEXT:    fldcw -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fxch %st(1)
+; CHECK-NEXT:    fistl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldcw -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fisubl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fmulp %st, %st(1)
+; CHECK-NEXT:    retq
+entry:
+  %conv = fptosi x86_fp80 %z to i32
+  %conv1 = sitofp i32 %conv to x86_fp80
+  %sub = fsub x86_fp80 %z, %conv1
+  %mul = fmul x86_fp80 %sub, 0xK40178000000000000000
+  %conv2 = fptosi x86_fp80 %mul to i32
+  %conv3 = sitofp i32 %conv2 to x86_fp80
+  %sub4 = fsub x86_fp80 %mul, %conv3
+  %mul5 = fmul x86_fp80 %sub4, 0xK40178000000000000000
+  ret x86_fp80 %mul5
+}
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
index 7ed61f8fdc7d24..92361efa49fac3 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -1028,7 +1028,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_32_WIN-NEXT:    xorl %edx, %edx
 ; AVX512_32_WIN-NEXT:    fxch %st(1)
 ; AVX512_32_WIN-NEXT:    fucompi %st(2)
-; AVX512_32_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; AVX512_32_WIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_32_WIN-NEXT:    fstp %st(1)
 ; AVX512_32_WIN-NEXT:    fisttpll (%esp)
 ; AVX512_32_WIN-NEXT:    setbe %dl
@@ -1049,7 +1049,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_32_LIN-NEXT:    xorl %edx, %edx
 ; AVX512_32_LIN-NEXT:    fxch %st(1)
 ; AVX512_32_LIN-NEXT:    fucompi %st(2)
-; AVX512_32_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; AVX512_32_LIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_32_LIN-NEXT:    fstp %st(1)
 ; AVX512_32_LIN-NEXT:    fisttpll (%esp)
 ; AVX512_32_LIN-NEXT:    setbe %dl
@@ -1069,7 +1069,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_64_WIN-NEXT:    xorl %ecx, %ecx
 ; AVX512_64_WIN-NEXT:    fxch %st(1)
 ; AVX512_64_WIN-NEXT:    fucompi %st(2)
-; AVX512_64_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; AVX512_64_WIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_64_WIN-NEXT:    fstp %st(1)
 ; AVX512_64_WIN-NEXT:    fisttpll (%rsp)
 ; AVX512_64_WIN-NEXT:    setbe %cl
@@ -1090,7 +1090,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_64_LIN-NEXT:    xorl %ecx, %ecx
 ; AVX512_64_LIN-NEXT:    fxch %st(1)
 ; AVX512_64_LIN-NEXT:    fucompi %st(2)
-; AVX512_64_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; AVX512_64_LIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_64_LIN-NEXT:    fstp %st(1)
 ; AVX512_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
 ; AVX512_64_LIN-NEXT:    setbe %cl
@@ -1114,7 +1114,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_32_WIN-NEXT:    xorl %edx, %edx
 ; SSE3_32_WIN-NEXT:    fxch %st(1)
 ; SSE3_32_WIN-NEXT:    fucompi %st(2)
-; SSE3_32_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE3_32_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_32_WIN-NEXT:    fstp %st(1)
 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
 ; SSE3_32_WIN-NEXT:    setbe %dl
@@ -1135,7 +1135,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_32_LIN-NEXT:    xorl %edx, %edx
 ; SSE3_32_LIN-NEXT:    fxch %st(1)
 ; SSE3_32_LIN-NEXT:    fucompi %st(2)
-; SSE3_32_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE3_32_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_32_LIN-NEXT:    fstp %st(1)
 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
 ; SSE3_32_LIN-NEXT:    setbe %dl
@@ -1155,7 +1155,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_64_WIN-NEXT:    xorl %eax, %eax
 ; SSE3_64_WIN-NEXT:    fxch %st(1)
 ; SSE3_64_WIN-NEXT:    fucompi %st(2)
-; SSE3_64_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE3_64_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_64_WIN-NEXT:    fstp %st(1)
 ; SSE3_64_WIN-NEXT:    fisttpll (%rsp)
 ; SSE3_64_WIN-NEXT:    setbe %al
@@ -1173,7 +1173,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_64_LIN-NEXT:    xorl %eax, %eax
 ; SSE3_64_LIN-NEXT:    fxch %st(1)
 ; SSE3_64_LIN-NEXT:    fucompi %st(2)
-; SSE3_64_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE3_64_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_64_LIN-NEXT:    fstp %st(1)
 ; SSE3_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
 ; SSE3_64_LIN-NEXT:    setbe %al
@@ -1194,7 +1194,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_32_WIN-NEXT:    xorl %edx, %edx
 ; SSE2_32_WIN-NEXT:    fxch %st(1)
 ; SSE2_32_WIN-NEXT:    fucompi %st(2)
-; SSE2_32_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE2_32_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_32_WIN-NEXT:    fstp %st(1)
 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
@@ -1221,7 +1221,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_32_LIN-NEXT:    xorl %edx, %edx
 ; SSE2_32_LIN-NEXT:    fxch %st(1)
 ; SSE2_32_LIN-NEXT:    fucompi %st(2)
-; SSE2_32_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE2_32_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_32_LIN-NEXT:    fstp %st(1)
 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
@@ -1247,7 +1247,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_64_WIN-NEXT:    xorl %eax, %eax
 ; SSE2_64_WIN-NEXT:    fxch %st(1)
 ; SSE2_64_WIN-NEXT:    fucompi %st(2)
-; SSE2_64_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE2_64_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_64_WIN-NEXT:    fstp %st(1)
 ; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
 ; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
@@ -1271,7 +1271,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_64_LIN-NEXT:    xorl %eax, %eax
 ; SSE2_64_LIN-NEXT:    fxch %st(1)
 ; SSE2_64_LIN-NEXT:    fucompi %st(2)
-; SSE2_64_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE2_64_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_64_LIN-NEXT:    fstp %st(1)
 ; SSE2_64_LIN-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
 ; SSE2_64_LIN-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 947c95137206de..9429e1854d44c3 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -293,25 +293,25 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
 ; ATHLON-NEXT:    flds 4(%ecx)
 ; ATHLON-NEXT:    flds (%ecx)
 ; ATHLON-NEXT:    flds (%eax)
-; ATHLON-NEXT:    fmul %st(0), %st(0)
+; ATHLON-NEXT:    fmul %st, %st(0)
 ; ATHLON-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; ATHLON-NEXT:    fxch %st(1)
-; ATHLON-NEXT:    fcmove %st(1), %st(0)
+; ATHLON-NEXT:    fcmove %st(1), %st
 ; ATHLON-NEXT:    fstp %st(1)
 ; ATHLON-NEXT:    flds 4(%eax)
-; ATHLON-NEXT:    fmul %st(0), %st(0)
+; ATHLON-NEXT:    fmul %st, %st(0)
 ; ATHLON-NEXT:    fxch %st(2)
-; ATHLON-NEXT:    fcmove %st(2), %st(0)
+; ATHLON-NEXT:    fcmove %st(2), %st
 ; ATHLON-NEXT:    fstp %st(2)
 ; ATHLON-NEXT:    flds 8(%eax)
-; ATHLON-NEXT:    fmul %st(0), %st(0)
+; ATHLON-NEXT:    fmul %st, %st(0)
 ; ATHLON-NEXT:    fxch %st(3)
-; ATHLON-NEXT:    fcmove %st(3), %st(0)
+; ATHLON-NEXT:    fcmove %st(3), %st
 ; ATHLON-NEXT:    fstp %st(3)
 ; ATHLON-NEXT:    flds 12(%eax)
-; ATHLON-NEXT:    fmul %st(0), %st(0)
+; ATHLON-NEXT:    fmul %st, %st(0)
 ; ATHLON-NEXT:    fxch %st(4)
-; ATHLON-NEXT:    fcmove %st(4), %st(0)
+; ATHLON-NEXT:    fcmove %st(4), %st
 ; ATHLON-NEXT:    fstp %st(4)
 ; ATHLON-NEXT:    fxch %st(3)
 ; ATHLON-NEXT:    fstps 12(%ecx)
@@ -332,13 +332,13 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
 ; MCU-NEXT:    flds 4(%ecx)
 ; MCU-NEXT:    flds 8(%ecx)
 ; MCU-NEXT:    flds 12(%ecx)
-; MCU-NEXT:    fmul %st(0), %st(0)
+; MCU-NEXT:    fmul %st, %st(0)
 ; MCU-NEXT:    fxch %st(1)
-; MCU-NEXT:    fmul %st(0), %st(0)
+; MCU-NEXT:    fmul %st, %st(0)
 ; MCU-NEXT:    fxch %st(2)
-; MCU-NEXT:    fmul %st(0), %st(0)
+; MCU-NEXT:    fmul %st, %st(0)
 ; MCU-NEXT:    fxch %st(3)
-; MCU-NEXT:    fmul %st(0), %st(0)
+; MCU-NEXT:    fmul %st, %st(0)
 ; MCU-NEXT:    testl %eax, %eax
 ; MCU-NEXT:    flds (%edx)
 ; MCU-NEXT:    je .LBB5_2
diff --git a/llvm/test/CodeGen/X86/sincos-opt.ll b/llvm/test/CodeGen/X86/sincos-opt.ll
index b4330ea58ea5a3..b64450863427ba 100644
--- a/llvm/test/CodeGen/X86/sincos-opt.ll
+++ b/llvm/test/CodeGen/X86/sincos-opt.ll
@@ -115,13 +115,13 @@ entry:
 ; GNU_SINCOS: callq sincosl
 ; GNU_SINCOS: fldt 16(%rsp)
 ; GNU_SINCOS: fldt 32(%rsp)
-; GNU_SINCOS: faddp %st(1)
+; GNU_SINCOS: faddp %st, %st(1)
 
 ; GNU_SINCOS_FASTMATH-LABEL: test3:
 ; GNU_SINCOS_FASTMATH: callq sincosl
 ; GNU_SINCOS_FASTMATH: fldt 16(%{{[re]}}sp)
 ; GNU_SINCOS_FASTMATH: fldt 32(%{{[re]}}sp)
-; GNU_SINCOS_FASTMATH: faddp %st(1)
+; GNU_SINCOS_FASTMATH: faddp %st, %st(1)
   %call = tail call x86_fp80 @sinl(x86_fp80 %x) readnone
   %call1 = tail call x86_fp80 @cosl(x86_fp80 %x) readnone
   %add = fadd x86_fp80 %call, %call1
diff --git a/llvm/test/CodeGen/X86/x87-schedule.ll b/llvm/test/CodeGen/X86/x87-schedule.ll
index 1921f8c75a3d10..599f313b13025f 100644
--- a/llvm/test/CodeGen/X86/x87-schedule.ll
+++ b/llvm/test/CodeGen/X86/x87-schedule.ll
@@ -180,8 +180,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fadd %st(0), %st(1)
-; GENERIC-NEXT:    fadd %st(2)
+; GENERIC-NEXT:    fadd %st, %st(1)
+; GENERIC-NEXT:    fadd %st(2), %st
 ; GENERIC-NEXT:    fadds (%ecx)
 ; GENERIC-NEXT:    faddl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -192,8 +192,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fadd %st(0), %st(1) # sched: [5:5.00]
-; ATOM-NEXT:    fadd %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fadd %st, %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fadd %st(2), %st # sched: [5:5.00]
 ; ATOM-NEXT:    fadds (%ecx) # sched: [5:5.00]
 ; ATOM-NEXT:    faddl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
@@ -204,8 +204,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; SLM-NEXT:    fadd %st(2) # sched: [3:1.00]
+; SLM-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; SLM-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; SLM-NEXT:    fadds (%ecx) # sched: [6:1.00]
 ; SLM-NEXT:    faddl (%eax) # sched: [6:1.00]
 ; SLM-NEXT:    #NO_APP
@@ -216,8 +216,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; SANDY-NEXT:    fadd %st(2) # sched: [3:1.00]
+; SANDY-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; SANDY-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; SANDY-NEXT:    fadds (%ecx) # sched: [10:1.00]
 ; SANDY-NEXT:    faddl (%eax) # sched: [10:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -228,8 +228,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; HASWELL-NEXT:    fadd %st(2) # sched: [3:1.00]
+; HASWELL-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; HASWELL-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; HASWELL-NEXT:    fadds (%ecx) # sched: [10:1.00]
 ; HASWELL-NEXT:    faddl (%eax) # sched: [10:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -240,8 +240,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT:    fadd %st(2) # sched: [3:1.00]
+; BROADWELL-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; BROADWELL-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; BROADWELL-NEXT:    fadds (%ecx) # sched: [9:1.00]
 ; BROADWELL-NEXT:    faddl (%eax) # sched: [9:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -252,8 +252,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fadd %st(2) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; SKYLAKE-NEXT:    fadds (%ecx) # sched: [10:1.00]
 ; SKYLAKE-NEXT:    faddl (%eax) # sched: [10:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -264,8 +264,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; SKX-NEXT:    fadd %st(2) # sched: [3:1.00]
+; SKX-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; SKX-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; SKX-NEXT:    fadds (%ecx) # sched: [10:1.00]
 ; SKX-NEXT:    faddl (%eax) # sched: [10:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -276,8 +276,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fadd %st(0), %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    fadd %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    fadd %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    fadd %st(2), %st # sched: [5:1.00]
 ; BDVER2-NEXT:    fadds (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    faddl (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -288,8 +288,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; BTVER2-NEXT:    fadd %st(2) # sched: [3:1.00]
+; BTVER2-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; BTVER2-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; BTVER2-NEXT:    fadds (%ecx) # sched: [8:1.00]
 ; BTVER2-NEXT:    faddl (%eax) # sched: [8:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -300,8 +300,8 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fadd %st(0), %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT:    fadd %st(2) # sched: [3:1.00]
+; ZNVER1-NEXT:    fadd %st, %st(1) # sched: [3:1.00]
+; ZNVER1-NEXT:    fadd %st(2), %st # sched: [3:1.00]
 ; ZNVER1-NEXT:    fadds (%ecx) # sched: [10:1.00]
 ; ZNVER1-NEXT:    faddl (%eax) # sched: [10:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -316,8 +316,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    faddp %st(1)
-; GENERIC-NEXT:    faddp %st(2)
+; GENERIC-NEXT:    faddp %st, %st(1)
+; GENERIC-NEXT:    faddp %st, %st(2)
 ; GENERIC-NEXT:    fiadds (%ecx)
 ; GENERIC-NEXT:    fiaddl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -328,8 +328,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    faddp %st(1) # sched: [5:5.00]
-; ATOM-NEXT:    faddp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    faddp %st, %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    faddp %st, %st(2) # sched: [5:5.00]
 ; ATOM-NEXT:    fiadds (%ecx) # sched: [5:5.00]
 ; ATOM-NEXT:    fiaddl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
@@ -340,8 +340,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    faddp %st(1) # sched: [3:1.00]
-; SLM-NEXT:    faddp %st(2) # sched: [3:1.00]
+; SLM-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; SLM-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; SLM-NEXT:    fiadds (%ecx) # sched: [6:1.00]
 ; SLM-NEXT:    fiaddl (%eax) # sched: [6:1.00]
 ; SLM-NEXT:    #NO_APP
@@ -352,8 +352,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    faddp %st(1) # sched: [3:1.00]
-; SANDY-NEXT:    faddp %st(2) # sched: [3:1.00]
+; SANDY-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; SANDY-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; SANDY-NEXT:    fiadds (%ecx) # sched: [13:2.00]
 ; SANDY-NEXT:    fiaddl (%eax) # sched: [13:2.00]
 ; SANDY-NEXT:    #NO_APP
@@ -364,8 +364,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    faddp %st(1) # sched: [3:1.00]
-; HASWELL-NEXT:    faddp %st(2) # sched: [3:1.00]
+; HASWELL-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; HASWELL-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; HASWELL-NEXT:    fiadds (%ecx) # sched: [13:2.00]
 ; HASWELL-NEXT:    fiaddl (%eax) # sched: [13:2.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -376,8 +376,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    faddp %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT:    faddp %st(2) # sched: [3:1.00]
+; BROADWELL-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; BROADWELL-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; BROADWELL-NEXT:    fiadds (%ecx) # sched: [12:2.00]
 ; BROADWELL-NEXT:    fiaddl (%eax) # sched: [12:2.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -388,8 +388,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    faddp %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT:    faddp %st(2) # sched: [3:1.00]
+; SKYLAKE-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; SKYLAKE-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; SKYLAKE-NEXT:    fiadds (%ecx) # sched: [13:2.00]
 ; SKYLAKE-NEXT:    fiaddl (%eax) # sched: [13:2.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -400,8 +400,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    faddp %st(1) # sched: [3:1.00]
-; SKX-NEXT:    faddp %st(2) # sched: [3:1.00]
+; SKX-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; SKX-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; SKX-NEXT:    fiadds (%ecx) # sched: [13:2.00]
 ; SKX-NEXT:    fiaddl (%eax) # sched: [13:2.00]
 ; SKX-NEXT:    #NO_APP
@@ -412,8 +412,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    faddp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    faddp %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    faddp %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    faddp %st, %st(2) # sched: [5:1.00]
 ; BDVER2-NEXT:    fiadds (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    fiaddl (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -424,8 +424,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    faddp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT:    faddp %st(2) # sched: [3:1.00]
+; BTVER2-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; BTVER2-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; BTVER2-NEXT:    fiadds (%ecx) # sched: [8:1.00]
 ; BTVER2-NEXT:    fiaddl (%eax) # sched: [8:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -436,8 +436,8 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    faddp %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT:    faddp %st(2) # sched: [3:1.00]
+; ZNVER1-NEXT:    faddp %st, %st(1) # sched: [3:1.00]
+; ZNVER1-NEXT:    faddp %st, %st(2) # sched: [3:1.00]
 ; ZNVER1-NEXT:    fiadds (%ecx) # sched: [10:1.00]
 ; ZNVER1-NEXT:    fiaddl (%eax) # sched: [10:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -807,154 +807,154 @@ define void @test_fcmov() optsize {
 ; GENERIC-LABEL: test_fcmov:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fcmovb %st(1), %st(0)
-; GENERIC-NEXT:    fcmovbe %st(1), %st(0)
-; GENERIC-NEXT:    fcmove %st(1), %st(0)
-; GENERIC-NEXT:    fcmovnb %st(1), %st(0)
-; GENERIC-NEXT:    fcmovnbe %st(1), %st(0)
-; GENERIC-NEXT:    fcmovne %st(1), %st(0)
-; GENERIC-NEXT:    fcmovnu %st(1), %st(0)
-; GENERIC-NEXT:    fcmovu %st(1), %st(0)
+; GENERIC-NEXT:    fcmovb %st(1), %st
+; GENERIC-NEXT:    fcmovbe %st(1), %st
+; GENERIC-NEXT:    fcmove %st(1), %st
+; GENERIC-NEXT:    fcmovnb %st(1), %st
+; GENERIC-NEXT:    fcmovnbe %st(1), %st
+; GENERIC-NEXT:    fcmovne %st(1), %st
+; GENERIC-NEXT:    fcmovnu %st(1), %st
+; GENERIC-NEXT:    fcmovu %st(1), %st
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retl
 ;
 ; ATOM-LABEL: test_fcmov:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fcmovb %st(1), %st(0) # sched: [9:4.50]
-; ATOM-NEXT:    fcmovbe %st(1), %st(0) # sched: [9:4.50]
-; ATOM-NEXT:    fcmove %st(1), %st(0) # sched: [9:4.50]
-; ATOM-NEXT:    fcmovnb %st(1), %st(0) # sched: [9:4.50]
-; ATOM-NEXT:    fcmovnbe %st(1), %st(0) # sched: [9:4.50]
-; ATOM-NEXT:    fcmovne %st(1), %st(0) # sched: [9:4.50]
-; ATOM-NEXT:    fcmovnu %st(1), %st(0) # sched: [9:4.50]
-; ATOM-NEXT:    fcmovu %st(1), %st(0) # sched: [9:4.50]
+; ATOM-NEXT:    fcmovb %st(1), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcmovbe %st(1), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcmove %st(1), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcmovnb %st(1), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcmovnbe %st(1), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcmovne %st(1), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcmovnu %st(1), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcmovu %st(1), %st # sched: [9:4.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_fcmov:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fcmovb %st(1), %st(0) # sched: [3:1.00]
-; SLM-NEXT:    fcmovbe %st(1), %st(0) # sched: [3:1.00]
-; SLM-NEXT:    fcmove %st(1), %st(0) # sched: [3:1.00]
-; SLM-NEXT:    fcmovnb %st(1), %st(0) # sched: [3:1.00]
-; SLM-NEXT:    fcmovnbe %st(1), %st(0) # sched: [3:1.00]
-; SLM-NEXT:    fcmovne %st(1), %st(0) # sched: [3:1.00]
-; SLM-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:1.00]
-; SLM-NEXT:    fcmovu %st(1), %st(0) # sched: [3:1.00]
+; SLM-NEXT:    fcmovb %st(1), %st # sched: [3:1.00]
+; SLM-NEXT:    fcmovbe %st(1), %st # sched: [3:1.00]
+; SLM-NEXT:    fcmove %st(1), %st # sched: [3:1.00]
+; SLM-NEXT:    fcmovnb %st(1), %st # sched: [3:1.00]
+; SLM-NEXT:    fcmovnbe %st(1), %st # sched: [3:1.00]
+; SLM-NEXT:    fcmovne %st(1), %st # sched: [3:1.00]
+; SLM-NEXT:    fcmovnu %st(1), %st # sched: [3:1.00]
+; SLM-NEXT:    fcmovu %st(1), %st # sched: [3:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_fcmov:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fcmovb %st(1), %st(0) # sched: [3:2.00]
-; SANDY-NEXT:    fcmovbe %st(1), %st(0) # sched: [3:2.00]
-; SANDY-NEXT:    fcmove %st(1), %st(0) # sched: [3:2.00]
-; SANDY-NEXT:    fcmovnb %st(1), %st(0) # sched: [3:2.00]
-; SANDY-NEXT:    fcmovnbe %st(1), %st(0) # sched: [3:2.00]
-; SANDY-NEXT:    fcmovne %st(1), %st(0) # sched: [3:2.00]
-; SANDY-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:2.00]
-; SANDY-NEXT:    fcmovu %st(1), %st(0) # sched: [3:2.00]
+; SANDY-NEXT:    fcmovb %st(1), %st # sched: [3:2.00]
+; SANDY-NEXT:    fcmovbe %st(1), %st # sched: [3:2.00]
+; SANDY-NEXT:    fcmove %st(1), %st # sched: [3:2.00]
+; SANDY-NEXT:    fcmovnb %st(1), %st # sched: [3:2.00]
+; SANDY-NEXT:    fcmovnbe %st(1), %st # sched: [3:2.00]
+; SANDY-NEXT:    fcmovne %st(1), %st # sched: [3:2.00]
+; SANDY-NEXT:    fcmovnu %st(1), %st # sched: [3:2.00]
+; SANDY-NEXT:    fcmovu %st(1), %st # sched: [3:2.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fcmov:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fcmovb %st(1), %st(0) # sched: [3:1.00]
-; HASWELL-NEXT:    fcmovbe %st(1), %st(0) # sched: [3:1.00]
-; HASWELL-NEXT:    fcmove %st(1), %st(0) # sched: [3:1.00]
-; HASWELL-NEXT:    fcmovnb %st(1), %st(0) # sched: [3:1.00]
-; HASWELL-NEXT:    fcmovnbe %st(1), %st(0) # sched: [3:1.00]
-; HASWELL-NEXT:    fcmovne %st(1), %st(0) # sched: [3:1.00]
-; HASWELL-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:1.00]
-; HASWELL-NEXT:    fcmovu %st(1), %st(0) # sched: [3:1.00]
+; HASWELL-NEXT:    fcmovb %st(1), %st # sched: [3:1.00]
+; HASWELL-NEXT:    fcmovbe %st(1), %st # sched: [3:1.00]
+; HASWELL-NEXT:    fcmove %st(1), %st # sched: [3:1.00]
+; HASWELL-NEXT:    fcmovnb %st(1), %st # sched: [3:1.00]
+; HASWELL-NEXT:    fcmovnbe %st(1), %st # sched: [3:1.00]
+; HASWELL-NEXT:    fcmovne %st(1), %st # sched: [3:1.00]
+; HASWELL-NEXT:    fcmovnu %st(1), %st # sched: [3:1.00]
+; HASWELL-NEXT:    fcmovu %st(1), %st # sched: [3:1.00]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retl # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_fcmov:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fcmovb %st(1), %st(0) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcmovbe %st(1), %st(0) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcmove %st(1), %st(0) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcmovnb %st(1), %st(0) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcmovnbe %st(1), %st(0) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcmovne %st(1), %st(0) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcmovu %st(1), %st(0) # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmovb %st(1), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmovbe %st(1), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmove %st(1), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmovnb %st(1), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmovnbe %st(1), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmovne %st(1), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmovnu %st(1), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcmovu %st(1), %st # sched: [3:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKYLAKE-LABEL: test_fcmov:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fcmovb %st(1), %st(0) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fcmovbe %st(1), %st(0) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fcmove %st(1), %st(0) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fcmovnb %st(1), %st(0) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fcmovnbe %st(1), %st(0) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fcmovne %st(1), %st(0) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fcmovu %st(1), %st(0) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmovb %st(1), %st # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmovbe %st(1), %st # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmove %st(1), %st # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmovnb %st(1), %st # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmovnbe %st(1), %st # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmovne %st(1), %st # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmovnu %st(1), %st # sched: [3:1.00]
+; SKYLAKE-NEXT:    fcmovu %st(1), %st # sched: [3:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKX-LABEL: test_fcmov:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fcmovb %st(1), %st(0) # sched: [3:1.00]
-; SKX-NEXT:    fcmovbe %st(1), %st(0) # sched: [3:1.00]
-; SKX-NEXT:    fcmove %st(1), %st(0) # sched: [3:1.00]
-; SKX-NEXT:    fcmovnb %st(1), %st(0) # sched: [3:1.00]
-; SKX-NEXT:    fcmovnbe %st(1), %st(0) # sched: [3:1.00]
-; SKX-NEXT:    fcmovne %st(1), %st(0) # sched: [3:1.00]
-; SKX-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:1.00]
-; SKX-NEXT:    fcmovu %st(1), %st(0) # sched: [3:1.00]
+; SKX-NEXT:    fcmovb %st(1), %st # sched: [3:1.00]
+; SKX-NEXT:    fcmovbe %st(1), %st # sched: [3:1.00]
+; SKX-NEXT:    fcmove %st(1), %st # sched: [3:1.00]
+; SKX-NEXT:    fcmovnb %st(1), %st # sched: [3:1.00]
+; SKX-NEXT:    fcmovnbe %st(1), %st # sched: [3:1.00]
+; SKX-NEXT:    fcmovne %st(1), %st # sched: [3:1.00]
+; SKX-NEXT:    fcmovnu %st(1), %st # sched: [3:1.00]
+; SKX-NEXT:    fcmovu %st(1), %st # sched: [3:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retl # sched: [6:0.50]
 ;
 ; BDVER2-LABEL: test_fcmov:
 ; BDVER2:       # %bb.0:
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fcmovb %st(1), %st(0) # sched: [1:1.00]
-; BDVER2-NEXT:    fcmovbe %st(1), %st(0) # sched: [1:1.00]
-; BDVER2-NEXT:    fcmove %st(1), %st(0) # sched: [1:1.00]
-; BDVER2-NEXT:    fcmovnb %st(1), %st(0) # sched: [1:1.00]
-; BDVER2-NEXT:    fcmovnbe %st(1), %st(0) # sched: [1:1.00]
-; BDVER2-NEXT:    fcmovne %st(1), %st(0) # sched: [1:1.00]
-; BDVER2-NEXT:    fcmovnu %st(1), %st(0) # sched: [1:1.00]
-; BDVER2-NEXT:    fcmovu %st(1), %st(0) # sched: [1:1.00]
+; BDVER2-NEXT:    fcmovb %st(1), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcmovbe %st(1), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcmove %st(1), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcmovnb %st(1), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcmovnbe %st(1), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcmovne %st(1), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcmovnu %st(1), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcmovu %st(1), %st # sched: [1:1.00]
 ; BDVER2-NEXT:    #NO_APP
 ; BDVER2-NEXT:    retl # sched: [5:1.00]
 ;
 ; BTVER2-LABEL: test_fcmov:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fcmovb %st(1), %st(0) # sched: [3:1.00]
-; BTVER2-NEXT:    fcmovbe %st(1), %st(0) # sched: [3:1.00]
-; BTVER2-NEXT:    fcmove %st(1), %st(0) # sched: [3:1.00]
-; BTVER2-NEXT:    fcmovnb %st(1), %st(0) # sched: [3:1.00]
-; BTVER2-NEXT:    fcmovnbe %st(1), %st(0) # sched: [3:1.00]
-; BTVER2-NEXT:    fcmovne %st(1), %st(0) # sched: [3:1.00]
-; BTVER2-NEXT:    fcmovnu %st(1), %st(0) # sched: [3:1.00]
-; BTVER2-NEXT:    fcmovu %st(1), %st(0) # sched: [3:1.00]
+; BTVER2-NEXT:    fcmovb %st(1), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcmovbe %st(1), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcmove %st(1), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcmovnb %st(1), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcmovnbe %st(1), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcmovne %st(1), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcmovnu %st(1), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcmovu %st(1), %st # sched: [3:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retl # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_fcmov:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fcmovb %st(1), %st(0) # sched: [100:0.25]
-; ZNVER1-NEXT:    fcmovbe %st(1), %st(0) # sched: [100:0.25]
-; ZNVER1-NEXT:    fcmove %st(1), %st(0) # sched: [100:0.25]
-; ZNVER1-NEXT:    fcmovnb %st(1), %st(0) # sched: [100:0.25]
-; ZNVER1-NEXT:    fcmovnbe %st(1), %st(0) # sched: [100:0.25]
-; ZNVER1-NEXT:    fcmovne %st(1), %st(0) # sched: [100:0.25]
-; ZNVER1-NEXT:    fcmovnu %st(1), %st(0) # sched: [100:0.25]
-; ZNVER1-NEXT:    fcmovu %st(1), %st(0) # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmovb %st(1), %st # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmovbe %st(1), %st # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmove %st(1), %st # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmovnb %st(1), %st # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmovnbe %st(1), %st # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmovne %st(1), %st # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmovnu %st(1), %st # sched: [100:0.25]
+; ZNVER1-NEXT:    fcmovu %st(1), %st # sched: [100:0.25]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retl # sched: [1:0.50]
   tail call void asm sideeffect "fcmovb %st(1), %st(0) \0A\09 fcmovbe %st(1), %st(0) \0A\09 fcmove %st(1), %st(0) \0A\09 fcmovnb %st(1), %st(0) \0A\09 fcmovnbe %st(1), %st(0) \0A\09 fcmovne %st(1), %st(0) \0A\09 fcmovnu %st(1), %st(0) \0A\09 fcmovu %st(1), %st(0)", ""() nounwind
@@ -1248,88 +1248,88 @@ define void @test_fcomi_fcomip() optsize {
 ; GENERIC-LABEL: test_fcomi_fcomip:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fcomi %st(3)
-; GENERIC-NEXT:    fcompi %st(3)
+; GENERIC-NEXT:    fcomi %st(3), %st
+; GENERIC-NEXT:    fcompi %st(3), %st
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retl
 ;
 ; ATOM-LABEL: test_fcomi_fcomip:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fcomi %st(3) # sched: [9:4.50]
-; ATOM-NEXT:    fcompi %st(3) # sched: [9:4.50]
+; ATOM-NEXT:    fcomi %st(3), %st # sched: [9:4.50]
+; ATOM-NEXT:    fcompi %st(3), %st # sched: [9:4.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_fcomi_fcomip:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fcomi %st(3) # sched: [3:1.00]
-; SLM-NEXT:    fcompi %st(3) # sched: [3:1.00]
+; SLM-NEXT:    fcomi %st(3), %st # sched: [3:1.00]
+; SLM-NEXT:    fcompi %st(3), %st # sched: [3:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_fcomi_fcomip:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fcomi %st(3) # sched: [3:1.00]
-; SANDY-NEXT:    fcompi %st(3) # sched: [3:1.00]
+; SANDY-NEXT:    fcomi %st(3), %st # sched: [3:1.00]
+; SANDY-NEXT:    fcompi %st(3), %st # sched: [3:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fcomi_fcomip:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fcomi %st(3) # sched: [1:0.50]
-; HASWELL-NEXT:    fcompi %st(3) # sched: [1:0.50]
+; HASWELL-NEXT:    fcomi %st(3), %st # sched: [1:0.50]
+; HASWELL-NEXT:    fcompi %st(3), %st # sched: [1:0.50]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retl # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_fcomi_fcomip:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fcomi %st(3) # sched: [3:1.00]
-; BROADWELL-NEXT:    fcompi %st(3) # sched: [3:1.00]
+; BROADWELL-NEXT:    fcomi %st(3), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fcompi %st(3), %st # sched: [3:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKYLAKE-LABEL: test_fcomi_fcomip:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fcomi %st(3) # sched: [2:1.00]
-; SKYLAKE-NEXT:    fcompi %st(3) # sched: [2:1.00]
+; SKYLAKE-NEXT:    fcomi %st(3), %st # sched: [2:1.00]
+; SKYLAKE-NEXT:    fcompi %st(3), %st # sched: [2:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKX-LABEL: test_fcomi_fcomip:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fcomi %st(3) # sched: [2:1.00]
-; SKX-NEXT:    fcompi %st(3) # sched: [2:1.00]
+; SKX-NEXT:    fcomi %st(3), %st # sched: [2:1.00]
+; SKX-NEXT:    fcompi %st(3), %st # sched: [2:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retl # sched: [6:0.50]
 ;
 ; BDVER2-LABEL: test_fcomi_fcomip:
 ; BDVER2:       # %bb.0:
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fcomi %st(3) # sched: [1:1.00]
-; BDVER2-NEXT:    fcompi %st(3) # sched: [1:1.00]
+; BDVER2-NEXT:    fcomi %st(3), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fcompi %st(3), %st # sched: [1:1.00]
 ; BDVER2-NEXT:    #NO_APP
 ; BDVER2-NEXT:    retl # sched: [5:1.00]
 ;
 ; BTVER2-LABEL: test_fcomi_fcomip:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fcomi %st(3) # sched: [3:1.00]
-; BTVER2-NEXT:    fcompi %st(3) # sched: [3:1.00]
+; BTVER2-NEXT:    fcomi %st(3), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fcompi %st(3), %st # sched: [3:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retl # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_fcomi_fcomip:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fcomi %st(3) # sched: [9:0.50]
-; ZNVER1-NEXT:    fcompi %st(3) # sched: [9:0.50]
+; ZNVER1-NEXT:    fcomi %st(3), %st # sched: [9:0.50]
+; ZNVER1-NEXT:    fcompi %st(3), %st # sched: [9:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retl # sched: [1:0.50]
   tail call void asm sideeffect "fcomi %st(3) \0A\09 fcomip %st(3)", ""() nounwind
@@ -1504,8 +1504,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fdiv %st(0), %st(1)
-; GENERIC-NEXT:    fdiv %st(2)
+; GENERIC-NEXT:    fdiv %st, %st(1)
+; GENERIC-NEXT:    fdiv %st(2), %st
 ; GENERIC-NEXT:    fdivs (%ecx)
 ; GENERIC-NEXT:    fdivl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -1516,8 +1516,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdiv %st(0), %st(1) # sched: [34:17.00]
-; ATOM-NEXT:    fdiv %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdiv %st, %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdiv %st(2), %st # sched: [34:17.00]
 ; ATOM-NEXT:    fdivs (%ecx) # sched: [34:17.00]
 ; ATOM-NEXT:    fdivl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
@@ -1528,8 +1528,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdiv %st(0), %st(1) # sched: [19:17.00]
-; SLM-NEXT:    fdiv %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fdiv %st, %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdiv %st(2), %st # sched: [19:17.00]
 ; SLM-NEXT:    fdivs (%ecx) # sched: [22:17.00]
 ; SLM-NEXT:    fdivl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
@@ -1540,8 +1540,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdiv %st(0), %st(1) # sched: [14:14.00]
-; SANDY-NEXT:    fdiv %st(2) # sched: [14:14.00]
+; SANDY-NEXT:    fdiv %st, %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdiv %st(2), %st # sched: [14:14.00]
 ; SANDY-NEXT:    fdivs (%ecx) # sched: [31:1.00]
 ; SANDY-NEXT:    fdivl (%eax) # sched: [31:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -1552,8 +1552,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fdiv %st(0), %st(1) # sched: [24:1.00]
-; HASWELL-NEXT:    fdiv %st(2) # sched: [20:1.00]
+; HASWELL-NEXT:    fdiv %st, %st(1) # sched: [24:1.00]
+; HASWELL-NEXT:    fdiv %st(2), %st # sched: [20:1.00]
 ; HASWELL-NEXT:    fdivs (%ecx) # sched: [31:1.00]
 ; HASWELL-NEXT:    fdivl (%eax) # sched: [31:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -1564,8 +1564,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fdiv %st(0), %st(1) # sched: [15:1.00]
-; BROADWELL-NEXT:    fdiv %st(2) # sched: [20:1.00]
+; BROADWELL-NEXT:    fdiv %st, %st(1) # sched: [15:1.00]
+; BROADWELL-NEXT:    fdiv %st(2), %st # sched: [20:1.00]
 ; BROADWELL-NEXT:    fdivs (%ecx) # sched: [21:1.00]
 ; BROADWELL-NEXT:    fdivl (%eax) # sched: [21:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -1576,8 +1576,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fdiv %st(0), %st(1) # sched: [15:1.00]
-; SKYLAKE-NEXT:    fdiv %st(2) # sched: [20:1.00]
+; SKYLAKE-NEXT:    fdiv %st, %st(1) # sched: [15:1.00]
+; SKYLAKE-NEXT:    fdiv %st(2), %st # sched: [20:1.00]
 ; SKYLAKE-NEXT:    fdivs (%ecx) # sched: [22:1.00]
 ; SKYLAKE-NEXT:    fdivl (%eax) # sched: [22:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -1588,8 +1588,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fdiv %st(0), %st(1) # sched: [15:1.00]
-; SKX-NEXT:    fdiv %st(2) # sched: [20:1.00]
+; SKX-NEXT:    fdiv %st, %st(1) # sched: [15:1.00]
+; SKX-NEXT:    fdiv %st(2), %st # sched: [20:1.00]
 ; SKX-NEXT:    fdivs (%ecx) # sched: [22:1.00]
 ; SKX-NEXT:    fdivl (%eax) # sched: [22:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -1600,8 +1600,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fdiv %st(0), %st(1) # sched: [9:9.50]
-; BDVER2-NEXT:    fdiv %st(2) # sched: [9:9.50]
+; BDVER2-NEXT:    fdiv %st, %st(1) # sched: [9:9.50]
+; BDVER2-NEXT:    fdiv %st(2), %st # sched: [9:9.50]
 ; BDVER2-NEXT:    fdivs (%ecx) # sched: [14:9.50]
 ; BDVER2-NEXT:    fdivl (%eax) # sched: [14:9.50]
 ; BDVER2-NEXT:    #NO_APP
@@ -1612,8 +1612,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fdiv %st(0), %st(1) # sched: [19:19.00]
-; BTVER2-NEXT:    fdiv %st(2) # sched: [19:19.00]
+; BTVER2-NEXT:    fdiv %st, %st(1) # sched: [19:19.00]
+; BTVER2-NEXT:    fdiv %st(2), %st # sched: [19:19.00]
 ; BTVER2-NEXT:    fdivs (%ecx) # sched: [24:19.00]
 ; BTVER2-NEXT:    fdivl (%eax) # sched: [24:19.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -1624,8 +1624,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fdiv %st(0), %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT:    fdiv %st(2) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdiv %st, %st(1) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdiv %st(2), %st # sched: [15:1.00]
 ; ZNVER1-NEXT:    fdivs (%ecx) # sched: [22:1.00]
 ; ZNVER1-NEXT:    fdivl (%eax) # sched: [22:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -1640,8 +1640,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fdivp %st(1)
-; GENERIC-NEXT:    fdivp %st(2)
+; GENERIC-NEXT:    fdivp %st, %st(1)
+; GENERIC-NEXT:    fdivp %st, %st(2)
 ; GENERIC-NEXT:    fidivs (%ecx)
 ; GENERIC-NEXT:    fidivl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -1652,8 +1652,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivp %st(1) # sched: [34:17.00]
-; ATOM-NEXT:    fdivp %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdivp %st, %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivp %st, %st(2) # sched: [34:17.00]
 ; ATOM-NEXT:    fidivs (%ecx) # sched: [34:17.00]
 ; ATOM-NEXT:    fidivl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
@@ -1664,8 +1664,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdivp %st(1) # sched: [19:17.00]
-; SLM-NEXT:    fdivp %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fdivp %st, %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdivp %st, %st(2) # sched: [19:17.00]
 ; SLM-NEXT:    fidivs (%ecx) # sched: [22:17.00]
 ; SLM-NEXT:    fidivl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
@@ -1676,8 +1676,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdivp %st(1) # sched: [14:14.00]
-; SANDY-NEXT:    fdivp %st(2) # sched: [14:14.00]
+; SANDY-NEXT:    fdivp %st, %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdivp %st, %st(2) # sched: [14:14.00]
 ; SANDY-NEXT:    fidivs (%ecx) # sched: [34:1.00]
 ; SANDY-NEXT:    fidivl (%eax) # sched: [34:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -1688,8 +1688,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fdivp %st(1) # sched: [24:1.00]
-; HASWELL-NEXT:    fdivp %st(2) # sched: [24:1.00]
+; HASWELL-NEXT:    fdivp %st, %st(1) # sched: [24:1.00]
+; HASWELL-NEXT:    fdivp %st, %st(2) # sched: [24:1.00]
 ; HASWELL-NEXT:    fidivs (%ecx) # sched: [34:1.00]
 ; HASWELL-NEXT:    fidivl (%eax) # sched: [34:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -1700,8 +1700,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fdivp %st(1) # sched: [15:1.00]
-; BROADWELL-NEXT:    fdivp %st(2) # sched: [15:1.00]
+; BROADWELL-NEXT:    fdivp %st, %st(1) # sched: [15:1.00]
+; BROADWELL-NEXT:    fdivp %st, %st(2) # sched: [15:1.00]
 ; BROADWELL-NEXT:    fidivs (%ecx) # sched: [24:1.00]
 ; BROADWELL-NEXT:    fidivl (%eax) # sched: [24:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -1712,8 +1712,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fdivp %st(1) # sched: [15:1.00]
-; SKYLAKE-NEXT:    fdivp %st(2) # sched: [15:1.00]
+; SKYLAKE-NEXT:    fdivp %st, %st(1) # sched: [15:1.00]
+; SKYLAKE-NEXT:    fdivp %st, %st(2) # sched: [15:1.00]
 ; SKYLAKE-NEXT:    fidivs (%ecx) # sched: [25:1.00]
 ; SKYLAKE-NEXT:    fidivl (%eax) # sched: [25:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -1724,8 +1724,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fdivp %st(1) # sched: [15:1.00]
-; SKX-NEXT:    fdivp %st(2) # sched: [15:1.00]
+; SKX-NEXT:    fdivp %st, %st(1) # sched: [15:1.00]
+; SKX-NEXT:    fdivp %st, %st(2) # sched: [15:1.00]
 ; SKX-NEXT:    fidivs (%ecx) # sched: [25:1.00]
 ; SKX-NEXT:    fidivl (%eax) # sched: [25:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -1736,8 +1736,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fdivp %st(1) # sched: [9:9.50]
-; BDVER2-NEXT:    fdivp %st(2) # sched: [9:9.50]
+; BDVER2-NEXT:    fdivp %st, %st(1) # sched: [9:9.50]
+; BDVER2-NEXT:    fdivp %st, %st(2) # sched: [9:9.50]
 ; BDVER2-NEXT:    fidivs (%ecx) # sched: [14:9.50]
 ; BDVER2-NEXT:    fidivl (%eax) # sched: [14:9.50]
 ; BDVER2-NEXT:    #NO_APP
@@ -1748,8 +1748,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fdivp %st(1) # sched: [19:19.00]
-; BTVER2-NEXT:    fdivp %st(2) # sched: [19:19.00]
+; BTVER2-NEXT:    fdivp %st, %st(1) # sched: [19:19.00]
+; BTVER2-NEXT:    fdivp %st, %st(2) # sched: [19:19.00]
 ; BTVER2-NEXT:    fidivs (%ecx) # sched: [24:19.00]
 ; BTVER2-NEXT:    fidivl (%eax) # sched: [24:19.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -1760,8 +1760,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fdivp %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT:    fdivp %st(2) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdivp %st, %st(1) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdivp %st, %st(2) # sched: [15:1.00]
 ; ZNVER1-NEXT:    fidivs (%ecx) # sched: [22:1.00]
 ; ZNVER1-NEXT:    fidivl (%eax) # sched: [22:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -1776,8 +1776,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fdivr %st(0), %st(1)
-; GENERIC-NEXT:    fdivr %st(2)
+; GENERIC-NEXT:    fdivr %st, %st(1)
+; GENERIC-NEXT:    fdivr %st(2), %st
 ; GENERIC-NEXT:    fdivrs (%ecx)
 ; GENERIC-NEXT:    fdivrl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -1788,8 +1788,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivr %st(0), %st(1) # sched: [34:17.00]
-; ATOM-NEXT:    fdivr %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdivr %st, %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivr %st(2), %st # sched: [34:17.00]
 ; ATOM-NEXT:    fdivrs (%ecx) # sched: [34:17.00]
 ; ATOM-NEXT:    fdivrl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
@@ -1800,8 +1800,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdivr %st(0), %st(1) # sched: [19:17.00]
-; SLM-NEXT:    fdivr %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fdivr %st, %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdivr %st(2), %st # sched: [19:17.00]
 ; SLM-NEXT:    fdivrs (%ecx) # sched: [22:17.00]
 ; SLM-NEXT:    fdivrl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
@@ -1812,8 +1812,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdivr %st(0), %st(1) # sched: [14:14.00]
-; SANDY-NEXT:    fdivr %st(2) # sched: [14:14.00]
+; SANDY-NEXT:    fdivr %st, %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdivr %st(2), %st # sched: [14:14.00]
 ; SANDY-NEXT:    fdivrs (%ecx) # sched: [31:1.00]
 ; SANDY-NEXT:    fdivrl (%eax) # sched: [31:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -1824,8 +1824,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fdivr %st(0), %st(1) # sched: [20:1.00]
-; HASWELL-NEXT:    fdivr %st(2) # sched: [24:1.00]
+; HASWELL-NEXT:    fdivr %st, %st(1) # sched: [20:1.00]
+; HASWELL-NEXT:    fdivr %st(2), %st # sched: [24:1.00]
 ; HASWELL-NEXT:    fdivrs (%ecx) # sched: [27:1.00]
 ; HASWELL-NEXT:    fdivrl (%eax) # sched: [27:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -1836,8 +1836,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fdivr %st(0), %st(1) # sched: [20:1.00]
-; BROADWELL-NEXT:    fdivr %st(2) # sched: [15:1.00]
+; BROADWELL-NEXT:    fdivr %st, %st(1) # sched: [20:1.00]
+; BROADWELL-NEXT:    fdivr %st(2), %st # sched: [15:1.00]
 ; BROADWELL-NEXT:    fdivrs (%ecx) # sched: [26:1.00]
 ; BROADWELL-NEXT:    fdivrl (%eax) # sched: [26:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -1848,8 +1848,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fdivr %st(0), %st(1) # sched: [20:1.00]
-; SKYLAKE-NEXT:    fdivr %st(2) # sched: [15:1.00]
+; SKYLAKE-NEXT:    fdivr %st, %st(1) # sched: [20:1.00]
+; SKYLAKE-NEXT:    fdivr %st(2), %st # sched: [15:1.00]
 ; SKYLAKE-NEXT:    fdivrs (%ecx) # sched: [27:1.00]
 ; SKYLAKE-NEXT:    fdivrl (%eax) # sched: [27:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -1860,8 +1860,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fdivr %st(0), %st(1) # sched: [20:1.00]
-; SKX-NEXT:    fdivr %st(2) # sched: [15:1.00]
+; SKX-NEXT:    fdivr %st, %st(1) # sched: [20:1.00]
+; SKX-NEXT:    fdivr %st(2), %st # sched: [15:1.00]
 ; SKX-NEXT:    fdivrs (%ecx) # sched: [27:1.00]
 ; SKX-NEXT:    fdivrl (%eax) # sched: [27:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -1872,8 +1872,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fdivr %st(0), %st(1) # sched: [9:9.50]
-; BDVER2-NEXT:    fdivr %st(2) # sched: [9:9.50]
+; BDVER2-NEXT:    fdivr %st, %st(1) # sched: [9:9.50]
+; BDVER2-NEXT:    fdivr %st(2), %st # sched: [9:9.50]
 ; BDVER2-NEXT:    fdivrs (%ecx) # sched: [14:9.50]
 ; BDVER2-NEXT:    fdivrl (%eax) # sched: [14:9.50]
 ; BDVER2-NEXT:    #NO_APP
@@ -1884,8 +1884,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fdivr %st(0), %st(1) # sched: [19:19.00]
-; BTVER2-NEXT:    fdivr %st(2) # sched: [19:19.00]
+; BTVER2-NEXT:    fdivr %st, %st(1) # sched: [19:19.00]
+; BTVER2-NEXT:    fdivr %st(2), %st # sched: [19:19.00]
 ; BTVER2-NEXT:    fdivrs (%ecx) # sched: [24:19.00]
 ; BTVER2-NEXT:    fdivrl (%eax) # sched: [24:19.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -1896,8 +1896,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fdivr %st(0), %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT:    fdivr %st(2) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdivr %st, %st(1) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdivr %st(2), %st # sched: [15:1.00]
 ; ZNVER1-NEXT:    fdivrs (%ecx) # sched: [22:1.00]
 ; ZNVER1-NEXT:    fdivrl (%eax) # sched: [22:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -1912,8 +1912,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fdivrp %st(1)
-; GENERIC-NEXT:    fdivrp %st(2)
+; GENERIC-NEXT:    fdivrp %st, %st(1)
+; GENERIC-NEXT:    fdivrp %st, %st(2)
 ; GENERIC-NEXT:    fidivrs (%ecx)
 ; GENERIC-NEXT:    fidivrl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -1924,8 +1924,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivrp %st(1) # sched: [34:17.00]
-; ATOM-NEXT:    fdivrp %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrp %st, %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrp %st, %st(2) # sched: [34:17.00]
 ; ATOM-NEXT:    fidivrs (%ecx) # sched: [34:17.00]
 ; ATOM-NEXT:    fidivrl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
@@ -1936,8 +1936,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdivrp %st(1) # sched: [19:17.00]
-; SLM-NEXT:    fdivrp %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fdivrp %st, %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdivrp %st, %st(2) # sched: [19:17.00]
 ; SLM-NEXT:    fidivrs (%ecx) # sched: [22:17.00]
 ; SLM-NEXT:    fidivrl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
@@ -1948,8 +1948,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdivrp %st(1) # sched: [14:14.00]
-; SANDY-NEXT:    fdivrp %st(2) # sched: [14:14.00]
+; SANDY-NEXT:    fdivrp %st, %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdivrp %st, %st(2) # sched: [14:14.00]
 ; SANDY-NEXT:    fidivrs (%ecx) # sched: [34:1.00]
 ; SANDY-NEXT:    fidivrl (%eax) # sched: [34:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -1960,8 +1960,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fdivrp %st(1) # sched: [20:1.00]
-; HASWELL-NEXT:    fdivrp %st(2) # sched: [20:1.00]
+; HASWELL-NEXT:    fdivrp %st, %st(1) # sched: [20:1.00]
+; HASWELL-NEXT:    fdivrp %st, %st(2) # sched: [20:1.00]
 ; HASWELL-NEXT:    fidivrs (%ecx) # sched: [30:1.00]
 ; HASWELL-NEXT:    fidivrl (%eax) # sched: [30:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -1972,8 +1972,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fdivrp %st(1) # sched: [20:1.00]
-; BROADWELL-NEXT:    fdivrp %st(2) # sched: [20:1.00]
+; BROADWELL-NEXT:    fdivrp %st, %st(1) # sched: [20:1.00]
+; BROADWELL-NEXT:    fdivrp %st, %st(2) # sched: [20:1.00]
 ; BROADWELL-NEXT:    fidivrs (%ecx) # sched: [29:1.00]
 ; BROADWELL-NEXT:    fidivrl (%eax) # sched: [29:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -1984,8 +1984,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fdivrp %st(1) # sched: [20:1.00]
-; SKYLAKE-NEXT:    fdivrp %st(2) # sched: [20:1.00]
+; SKYLAKE-NEXT:    fdivrp %st, %st(1) # sched: [20:1.00]
+; SKYLAKE-NEXT:    fdivrp %st, %st(2) # sched: [20:1.00]
 ; SKYLAKE-NEXT:    fidivrs (%ecx) # sched: [30:1.00]
 ; SKYLAKE-NEXT:    fidivrl (%eax) # sched: [30:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -1996,8 +1996,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fdivrp %st(1) # sched: [20:1.00]
-; SKX-NEXT:    fdivrp %st(2) # sched: [20:1.00]
+; SKX-NEXT:    fdivrp %st, %st(1) # sched: [20:1.00]
+; SKX-NEXT:    fdivrp %st, %st(2) # sched: [20:1.00]
 ; SKX-NEXT:    fidivrs (%ecx) # sched: [30:1.00]
 ; SKX-NEXT:    fidivrl (%eax) # sched: [30:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -2008,8 +2008,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fdivrp %st(1) # sched: [9:9.50]
-; BDVER2-NEXT:    fdivrp %st(2) # sched: [9:9.50]
+; BDVER2-NEXT:    fdivrp %st, %st(1) # sched: [9:9.50]
+; BDVER2-NEXT:    fdivrp %st, %st(2) # sched: [9:9.50]
 ; BDVER2-NEXT:    fidivrs (%ecx) # sched: [14:9.50]
 ; BDVER2-NEXT:    fidivrl (%eax) # sched: [14:9.50]
 ; BDVER2-NEXT:    #NO_APP
@@ -2020,8 +2020,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fdivrp %st(1) # sched: [19:19.00]
-; BTVER2-NEXT:    fdivrp %st(2) # sched: [19:19.00]
+; BTVER2-NEXT:    fdivrp %st, %st(1) # sched: [19:19.00]
+; BTVER2-NEXT:    fdivrp %st, %st(2) # sched: [19:19.00]
 ; BTVER2-NEXT:    fidivrs (%ecx) # sched: [24:19.00]
 ; BTVER2-NEXT:    fidivrl (%eax) # sched: [24:19.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -2032,8 +2032,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fdivrp %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT:    fdivrp %st(2) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdivrp %st, %st(1) # sched: [15:1.00]
+; ZNVER1-NEXT:    fdivrp %st, %st(2) # sched: [15:1.00]
 ; ZNVER1-NEXT:    fidivrs (%ecx) # sched: [22:1.00]
 ; ZNVER1-NEXT:    fidivrl (%eax) # sched: [22:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -3243,8 +3243,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fmul %st(0), %st(1)
-; GENERIC-NEXT:    fmul %st(2)
+; GENERIC-NEXT:    fmul %st, %st(1)
+; GENERIC-NEXT:    fmul %st(2), %st
 ; GENERIC-NEXT:    fmuls (%ecx)
 ; GENERIC-NEXT:    fmull (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -3255,8 +3255,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fmul %st(0), %st(1) # sched: [4:4.00]
-; ATOM-NEXT:    fmul %st(2) # sched: [4:4.00]
+; ATOM-NEXT:    fmul %st, %st(1) # sched: [4:4.00]
+; ATOM-NEXT:    fmul %st(2), %st # sched: [4:4.00]
 ; ATOM-NEXT:    fmuls (%ecx) # sched: [4:4.00]
 ; ATOM-NEXT:    fmull (%eax) # sched: [4:4.00]
 ; ATOM-NEXT:    #NO_APP
@@ -3267,8 +3267,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fmul %st(0), %st(1) # sched: [5:2.00]
-; SLM-NEXT:    fmul %st(2) # sched: [5:2.00]
+; SLM-NEXT:    fmul %st, %st(1) # sched: [5:2.00]
+; SLM-NEXT:    fmul %st(2), %st # sched: [5:2.00]
 ; SLM-NEXT:    fmuls (%ecx) # sched: [8:2.00]
 ; SLM-NEXT:    fmull (%eax) # sched: [8:2.00]
 ; SLM-NEXT:    #NO_APP
@@ -3279,8 +3279,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fmul %st(0), %st(1) # sched: [5:1.00]
-; SANDY-NEXT:    fmul %st(2) # sched: [5:1.00]
+; SANDY-NEXT:    fmul %st, %st(1) # sched: [5:1.00]
+; SANDY-NEXT:    fmul %st(2), %st # sched: [5:1.00]
 ; SANDY-NEXT:    fmuls (%ecx) # sched: [12:1.00]
 ; SANDY-NEXT:    fmull (%eax) # sched: [12:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -3291,8 +3291,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fmul %st(0), %st(1) # sched: [5:1.00]
-; HASWELL-NEXT:    fmul %st(2) # sched: [5:1.00]
+; HASWELL-NEXT:    fmul %st, %st(1) # sched: [5:1.00]
+; HASWELL-NEXT:    fmul %st(2), %st # sched: [5:1.00]
 ; HASWELL-NEXT:    fmuls (%ecx) # sched: [12:1.00]
 ; HASWELL-NEXT:    fmull (%eax) # sched: [12:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -3303,8 +3303,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fmul %st(0), %st(1) # sched: [5:1.00]
-; BROADWELL-NEXT:    fmul %st(2) # sched: [5:1.00]
+; BROADWELL-NEXT:    fmul %st, %st(1) # sched: [5:1.00]
+; BROADWELL-NEXT:    fmul %st(2), %st # sched: [5:1.00]
 ; BROADWELL-NEXT:    fmuls (%ecx) # sched: [11:1.00]
 ; BROADWELL-NEXT:    fmull (%eax) # sched: [11:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -3315,8 +3315,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fmul %st(0), %st(1) # sched: [4:1.00]
-; SKYLAKE-NEXT:    fmul %st(2) # sched: [4:1.00]
+; SKYLAKE-NEXT:    fmul %st, %st(1) # sched: [4:1.00]
+; SKYLAKE-NEXT:    fmul %st(2), %st # sched: [4:1.00]
 ; SKYLAKE-NEXT:    fmuls (%ecx) # sched: [11:1.00]
 ; SKYLAKE-NEXT:    fmull (%eax) # sched: [11:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -3327,8 +3327,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fmul %st(0), %st(1) # sched: [4:1.00]
-; SKX-NEXT:    fmul %st(2) # sched: [4:1.00]
+; SKX-NEXT:    fmul %st, %st(1) # sched: [4:1.00]
+; SKX-NEXT:    fmul %st(2), %st # sched: [4:1.00]
 ; SKX-NEXT:    fmuls (%ecx) # sched: [11:1.00]
 ; SKX-NEXT:    fmull (%eax) # sched: [11:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -3339,8 +3339,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fmul %st(0), %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    fmul %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    fmul %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    fmul %st(2), %st # sched: [5:1.00]
 ; BDVER2-NEXT:    fmuls (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    fmull (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -3351,8 +3351,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fmul %st(0), %st(1) # sched: [2:1.00]
-; BTVER2-NEXT:    fmul %st(2) # sched: [2:1.00]
+; BTVER2-NEXT:    fmul %st, %st(1) # sched: [2:1.00]
+; BTVER2-NEXT:    fmul %st(2), %st # sched: [2:1.00]
 ; BTVER2-NEXT:    fmuls (%ecx) # sched: [7:1.00]
 ; BTVER2-NEXT:    fmull (%eax) # sched: [7:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -3363,8 +3363,8 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fmul %st(0), %st(1) # sched: [3:0.50]
-; ZNVER1-NEXT:    fmul %st(2) # sched: [3:0.50]
+; ZNVER1-NEXT:    fmul %st, %st(1) # sched: [3:0.50]
+; ZNVER1-NEXT:    fmul %st(2), %st # sched: [3:0.50]
 ; ZNVER1-NEXT:    fmuls (%ecx) # sched: [10:0.50]
 ; ZNVER1-NEXT:    fmull (%eax) # sched: [10:0.50]
 ; ZNVER1-NEXT:    #NO_APP
@@ -3379,8 +3379,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fmulp %st(1)
-; GENERIC-NEXT:    fmulp %st(2)
+; GENERIC-NEXT:    fmulp %st, %st(1)
+; GENERIC-NEXT:    fmulp %st, %st(2)
 ; GENERIC-NEXT:    fimuls (%ecx)
 ; GENERIC-NEXT:    fimull (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -3391,8 +3391,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fmulp %st(1) # sched: [4:4.00]
-; ATOM-NEXT:    fmulp %st(2) # sched: [4:4.00]
+; ATOM-NEXT:    fmulp %st, %st(1) # sched: [4:4.00]
+; ATOM-NEXT:    fmulp %st, %st(2) # sched: [4:4.00]
 ; ATOM-NEXT:    fimuls (%ecx) # sched: [4:4.00]
 ; ATOM-NEXT:    fimull (%eax) # sched: [4:4.00]
 ; ATOM-NEXT:    #NO_APP
@@ -3403,8 +3403,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fmulp %st(1) # sched: [5:2.00]
-; SLM-NEXT:    fmulp %st(2) # sched: [5:2.00]
+; SLM-NEXT:    fmulp %st, %st(1) # sched: [5:2.00]
+; SLM-NEXT:    fmulp %st, %st(2) # sched: [5:2.00]
 ; SLM-NEXT:    fimuls (%ecx) # sched: [8:2.00]
 ; SLM-NEXT:    fimull (%eax) # sched: [8:2.00]
 ; SLM-NEXT:    #NO_APP
@@ -3415,8 +3415,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fmulp %st(1) # sched: [5:1.00]
-; SANDY-NEXT:    fmulp %st(2) # sched: [5:1.00]
+; SANDY-NEXT:    fmulp %st, %st(1) # sched: [5:1.00]
+; SANDY-NEXT:    fmulp %st, %st(2) # sched: [5:1.00]
 ; SANDY-NEXT:    fimuls (%ecx) # sched: [15:1.00]
 ; SANDY-NEXT:    fimull (%eax) # sched: [15:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -3427,8 +3427,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fmulp %st(1) # sched: [5:1.00]
-; HASWELL-NEXT:    fmulp %st(2) # sched: [5:1.00]
+; HASWELL-NEXT:    fmulp %st, %st(1) # sched: [5:1.00]
+; HASWELL-NEXT:    fmulp %st, %st(2) # sched: [5:1.00]
 ; HASWELL-NEXT:    fimuls (%ecx) # sched: [15:1.00]
 ; HASWELL-NEXT:    fimull (%eax) # sched: [15:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -3439,8 +3439,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fmulp %st(1) # sched: [5:1.00]
-; BROADWELL-NEXT:    fmulp %st(2) # sched: [5:1.00]
+; BROADWELL-NEXT:    fmulp %st, %st(1) # sched: [5:1.00]
+; BROADWELL-NEXT:    fmulp %st, %st(2) # sched: [5:1.00]
 ; BROADWELL-NEXT:    fimuls (%ecx) # sched: [14:1.00]
 ; BROADWELL-NEXT:    fimull (%eax) # sched: [14:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -3451,8 +3451,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fmulp %st(1) # sched: [4:1.00]
-; SKYLAKE-NEXT:    fmulp %st(2) # sched: [4:1.00]
+; SKYLAKE-NEXT:    fmulp %st, %st(1) # sched: [4:1.00]
+; SKYLAKE-NEXT:    fmulp %st, %st(2) # sched: [4:1.00]
 ; SKYLAKE-NEXT:    fimuls (%ecx) # sched: [14:1.00]
 ; SKYLAKE-NEXT:    fimull (%eax) # sched: [14:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -3463,8 +3463,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fmulp %st(1) # sched: [4:1.00]
-; SKX-NEXT:    fmulp %st(2) # sched: [4:1.00]
+; SKX-NEXT:    fmulp %st, %st(1) # sched: [4:1.00]
+; SKX-NEXT:    fmulp %st, %st(2) # sched: [4:1.00]
 ; SKX-NEXT:    fimuls (%ecx) # sched: [14:1.00]
 ; SKX-NEXT:    fimull (%eax) # sched: [14:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -3475,8 +3475,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fmulp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    fmulp %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    fmulp %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    fmulp %st, %st(2) # sched: [5:1.00]
 ; BDVER2-NEXT:    fimuls (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    fimull (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -3487,8 +3487,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fmulp %st(1) # sched: [2:1.00]
-; BTVER2-NEXT:    fmulp %st(2) # sched: [2:1.00]
+; BTVER2-NEXT:    fmulp %st, %st(1) # sched: [2:1.00]
+; BTVER2-NEXT:    fmulp %st, %st(2) # sched: [2:1.00]
 ; BTVER2-NEXT:    fimuls (%ecx) # sched: [7:1.00]
 ; BTVER2-NEXT:    fimull (%eax) # sched: [7:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -3499,8 +3499,8 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fmulp %st(1) # sched: [3:0.50]
-; ZNVER1-NEXT:    fmulp %st(2) # sched: [3:0.50]
+; ZNVER1-NEXT:    fmulp %st, %st(1) # sched: [3:0.50]
+; ZNVER1-NEXT:    fmulp %st, %st(2) # sched: [3:0.50]
 ; ZNVER1-NEXT:    fimuls (%ecx) # sched: [10:0.50]
 ; ZNVER1-NEXT:    fimull (%eax) # sched: [10:0.50]
 ; ZNVER1-NEXT:    #NO_APP
@@ -4983,8 +4983,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fsub %st(0), %st(1)
-; GENERIC-NEXT:    fsub %st(2)
+; GENERIC-NEXT:    fsub %st, %st(1)
+; GENERIC-NEXT:    fsub %st(2), %st
 ; GENERIC-NEXT:    fsubs (%ecx)
 ; GENERIC-NEXT:    fsubl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -4995,8 +4995,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsub %st(0), %st(1) # sched: [5:5.00]
-; ATOM-NEXT:    fsub %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsub %st, %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsub %st(2), %st # sched: [5:5.00]
 ; ATOM-NEXT:    fsubs (%ecx) # sched: [5:5.00]
 ; ATOM-NEXT:    fsubl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
@@ -5007,8 +5007,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; SLM-NEXT:    fsub %st(2) # sched: [3:1.00]
+; SLM-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; SLM-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; SLM-NEXT:    fsubs (%ecx) # sched: [6:1.00]
 ; SLM-NEXT:    fsubl (%eax) # sched: [6:1.00]
 ; SLM-NEXT:    #NO_APP
@@ -5019,8 +5019,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; SANDY-NEXT:    fsub %st(2) # sched: [3:1.00]
+; SANDY-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; SANDY-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; SANDY-NEXT:    fsubs (%ecx) # sched: [10:1.00]
 ; SANDY-NEXT:    fsubl (%eax) # sched: [10:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -5031,8 +5031,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; HASWELL-NEXT:    fsub %st(2) # sched: [3:1.00]
+; HASWELL-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; HASWELL-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; HASWELL-NEXT:    fsubs (%ecx) # sched: [10:1.00]
 ; HASWELL-NEXT:    fsubl (%eax) # sched: [10:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -5043,8 +5043,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT:    fsub %st(2) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; BROADWELL-NEXT:    fsubs (%ecx) # sched: [9:1.00]
 ; BROADWELL-NEXT:    fsubl (%eax) # sched: [9:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -5055,8 +5055,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fsub %st(2) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; SKYLAKE-NEXT:    fsubs (%ecx) # sched: [10:1.00]
 ; SKYLAKE-NEXT:    fsubl (%eax) # sched: [10:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -5067,8 +5067,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; SKX-NEXT:    fsub %st(2) # sched: [3:1.00]
+; SKX-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; SKX-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; SKX-NEXT:    fsubs (%ecx) # sched: [10:1.00]
 ; SKX-NEXT:    fsubl (%eax) # sched: [10:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -5079,8 +5079,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fsub %st(0), %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    fsub %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    fsub %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    fsub %st(2), %st # sched: [5:1.00]
 ; BDVER2-NEXT:    fsubs (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    fsubl (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -5091,8 +5091,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; BTVER2-NEXT:    fsub %st(2) # sched: [3:1.00]
+; BTVER2-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; BTVER2-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; BTVER2-NEXT:    fsubs (%ecx) # sched: [8:1.00]
 ; BTVER2-NEXT:    fsubl (%eax) # sched: [8:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -5103,8 +5103,8 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fsub %st(0), %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT:    fsub %st(2) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsub %st, %st(1) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsub %st(2), %st # sched: [3:1.00]
 ; ZNVER1-NEXT:    fsubs (%ecx) # sched: [10:1.00]
 ; ZNVER1-NEXT:    fsubl (%eax) # sched: [10:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -5119,8 +5119,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fsubp %st(1)
-; GENERIC-NEXT:    fsubp %st(2)
+; GENERIC-NEXT:    fsubp %st, %st(1)
+; GENERIC-NEXT:    fsubp %st, %st(2)
 ; GENERIC-NEXT:    fisubs (%ecx)
 ; GENERIC-NEXT:    fisubl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -5131,8 +5131,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubp %st(1) # sched: [5:5.00]
-; ATOM-NEXT:    fsubp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsubp %st, %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubp %st, %st(2) # sched: [5:5.00]
 ; ATOM-NEXT:    fisubs (%ecx) # sched: [5:5.00]
 ; ATOM-NEXT:    fisubl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
@@ -5143,8 +5143,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; SLM-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; SLM-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; SLM-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; SLM-NEXT:    fisubs (%ecx) # sched: [6:1.00]
 ; SLM-NEXT:    fisubl (%eax) # sched: [6:1.00]
 ; SLM-NEXT:    #NO_APP
@@ -5155,8 +5155,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; SANDY-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; SANDY-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; SANDY-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; SANDY-NEXT:    fisubs (%ecx) # sched: [13:2.00]
 ; SANDY-NEXT:    fisubl (%eax) # sched: [13:2.00]
 ; SANDY-NEXT:    #NO_APP
@@ -5167,8 +5167,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; HASWELL-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; HASWELL-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; HASWELL-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; HASWELL-NEXT:    fisubs (%ecx) # sched: [13:2.00]
 ; HASWELL-NEXT:    fisubl (%eax) # sched: [13:2.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -5179,8 +5179,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; BROADWELL-NEXT:    fisubs (%ecx) # sched: [12:2.00]
 ; BROADWELL-NEXT:    fisubl (%eax) # sched: [12:2.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -5191,8 +5191,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; SKYLAKE-NEXT:    fisubs (%ecx) # sched: [13:2.00]
 ; SKYLAKE-NEXT:    fisubl (%eax) # sched: [13:2.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -5203,8 +5203,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; SKX-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; SKX-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; SKX-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; SKX-NEXT:    fisubs (%ecx) # sched: [13:2.00]
 ; SKX-NEXT:    fisubl (%eax) # sched: [13:2.00]
 ; SKX-NEXT:    #NO_APP
@@ -5215,8 +5215,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fsubp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    fsubp %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    fsubp %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    fsubp %st, %st(2) # sched: [5:1.00]
 ; BDVER2-NEXT:    fisubs (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    fisubl (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -5227,8 +5227,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; BTVER2-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; BTVER2-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; BTVER2-NEXT:    fisubs (%ecx) # sched: [8:1.00]
 ; BTVER2-NEXT:    fisubl (%eax) # sched: [8:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -5239,8 +5239,8 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fsubp %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT:    fsubp %st(2) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsubp %st, %st(1) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsubp %st, %st(2) # sched: [3:1.00]
 ; ZNVER1-NEXT:    fisubs (%ecx) # sched: [10:1.00]
 ; ZNVER1-NEXT:    fisubl (%eax) # sched: [10:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -5255,8 +5255,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fsubr %st(0), %st(1)
-; GENERIC-NEXT:    fsubr %st(2)
+; GENERIC-NEXT:    fsubr %st, %st(1)
+; GENERIC-NEXT:    fsubr %st(2), %st
 ; GENERIC-NEXT:    fsubrs (%ecx)
 ; GENERIC-NEXT:    fsubrl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -5267,8 +5267,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubr %st(0), %st(1) # sched: [5:5.00]
-; ATOM-NEXT:    fsubr %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsubr %st, %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubr %st(2), %st # sched: [5:5.00]
 ; ATOM-NEXT:    fsubrs (%ecx) # sched: [5:5.00]
 ; ATOM-NEXT:    fsubrl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
@@ -5279,8 +5279,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; SLM-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; SLM-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; SLM-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; SLM-NEXT:    fsubrs (%ecx) # sched: [6:1.00]
 ; SLM-NEXT:    fsubrl (%eax) # sched: [6:1.00]
 ; SLM-NEXT:    #NO_APP
@@ -5291,8 +5291,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; SANDY-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; SANDY-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; SANDY-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; SANDY-NEXT:    fsubrs (%ecx) # sched: [10:1.00]
 ; SANDY-NEXT:    fsubrl (%eax) # sched: [10:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -5303,8 +5303,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; HASWELL-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; HASWELL-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; HASWELL-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; HASWELL-NEXT:    fsubrs (%ecx) # sched: [10:1.00]
 ; HASWELL-NEXT:    fsubrl (%eax) # sched: [10:1.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -5315,8 +5315,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; BROADWELL-NEXT:    fsubrs (%ecx) # sched: [9:1.00]
 ; BROADWELL-NEXT:    fsubrl (%eax) # sched: [9:1.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -5327,8 +5327,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; SKYLAKE-NEXT:    fsubrs (%ecx) # sched: [10:1.00]
 ; SKYLAKE-NEXT:    fsubrl (%eax) # sched: [10:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -5339,8 +5339,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; SKX-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; SKX-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; SKX-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; SKX-NEXT:    fsubrs (%ecx) # sched: [10:1.00]
 ; SKX-NEXT:    fsubrl (%eax) # sched: [10:1.00]
 ; SKX-NEXT:    #NO_APP
@@ -5351,8 +5351,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fsubr %st(0), %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    fsubr %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    fsubr %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    fsubr %st(2), %st # sched: [5:1.00]
 ; BDVER2-NEXT:    fsubrs (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    fsubrl (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -5363,8 +5363,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; BTVER2-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; BTVER2-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; BTVER2-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; BTVER2-NEXT:    fsubrs (%ecx) # sched: [8:1.00]
 ; BTVER2-NEXT:    fsubrl (%eax) # sched: [8:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -5375,8 +5375,8 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fsubr %st(0), %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT:    fsubr %st(2) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsubr %st, %st(1) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsubr %st(2), %st # sched: [3:1.00]
 ; ZNVER1-NEXT:    fsubrs (%ecx) # sched: [10:1.00]
 ; ZNVER1-NEXT:    fsubrl (%eax) # sched: [10:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -5391,8 +5391,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fsubrp %st(1)
-; GENERIC-NEXT:    fsubrp %st(2)
+; GENERIC-NEXT:    fsubrp %st, %st(1)
+; GENERIC-NEXT:    fsubrp %st, %st(2)
 ; GENERIC-NEXT:    fisubrs (%ecx)
 ; GENERIC-NEXT:    fisubrl (%eax)
 ; GENERIC-NEXT:    #NO_APP
@@ -5403,8 +5403,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubrp %st(1) # sched: [5:5.00]
-; ATOM-NEXT:    fsubrp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrp %st, %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrp %st, %st(2) # sched: [5:5.00]
 ; ATOM-NEXT:    fisubrs (%ecx) # sched: [5:5.00]
 ; ATOM-NEXT:    fisubrl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
@@ -5415,8 +5415,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; SLM-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; SLM-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; SLM-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; SLM-NEXT:    fisubrs (%ecx) # sched: [6:1.00]
 ; SLM-NEXT:    fisubrl (%eax) # sched: [6:1.00]
 ; SLM-NEXT:    #NO_APP
@@ -5427,8 +5427,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; SANDY-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; SANDY-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; SANDY-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; SANDY-NEXT:    fisubrs (%ecx) # sched: [13:2.00]
 ; SANDY-NEXT:    fisubrl (%eax) # sched: [13:2.00]
 ; SANDY-NEXT:    #NO_APP
@@ -5439,8 +5439,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; HASWELL-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; HASWELL-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; HASWELL-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; HASWELL-NEXT:    fisubrs (%ecx) # sched: [13:2.00]
 ; HASWELL-NEXT:    fisubrl (%eax) # sched: [13:2.00]
 ; HASWELL-NEXT:    #NO_APP
@@ -5451,8 +5451,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; BROADWELL-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; BROADWELL-NEXT:    fisubrs (%ecx) # sched: [12:2.00]
 ; BROADWELL-NEXT:    fisubrl (%eax) # sched: [12:2.00]
 ; BROADWELL-NEXT:    #NO_APP
@@ -5463,8 +5463,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; SKYLAKE-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; SKYLAKE-NEXT:    fisubrs (%ecx) # sched: [13:2.00]
 ; SKYLAKE-NEXT:    fisubrl (%eax) # sched: [13:2.00]
 ; SKYLAKE-NEXT:    #NO_APP
@@ -5475,8 +5475,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; SKX-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; SKX-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; SKX-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; SKX-NEXT:    fisubrs (%ecx) # sched: [13:2.00]
 ; SKX-NEXT:    fisubrl (%eax) # sched: [13:2.00]
 ; SKX-NEXT:    #NO_APP
@@ -5487,8 +5487,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; BDVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fsubrp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT:    fsubrp %st(2) # sched: [5:1.00]
+; BDVER2-NEXT:    fsubrp %st, %st(1) # sched: [5:1.00]
+; BDVER2-NEXT:    fsubrp %st, %st(2) # sched: [5:1.00]
 ; BDVER2-NEXT:    fisubrs (%ecx) # sched: [10:1.00]
 ; BDVER2-NEXT:    fisubrl (%eax) # sched: [10:1.00]
 ; BDVER2-NEXT:    #NO_APP
@@ -5499,8 +5499,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
 ; BTVER2-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; BTVER2-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; BTVER2-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; BTVER2-NEXT:    fisubrs (%ecx) # sched: [8:1.00]
 ; BTVER2-NEXT:    fisubrl (%eax) # sched: [8:1.00]
 ; BTVER2-NEXT:    #NO_APP
@@ -5511,8 +5511,8 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fsubrp %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT:    fsubrp %st(2) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsubrp %st, %st(1) # sched: [3:1.00]
+; ZNVER1-NEXT:    fsubrp %st, %st(2) # sched: [3:1.00]
 ; ZNVER1-NEXT:    fisubrs (%ecx) # sched: [10:1.00]
 ; ZNVER1-NEXT:    fisubrl (%eax) # sched: [10:1.00]
 ; ZNVER1-NEXT:    #NO_APP
@@ -5731,88 +5731,88 @@ define void @test_fucomi_fucomip() optsize {
 ; GENERIC-LABEL: test_fucomi_fucomip:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    fucomi %st(3)
-; GENERIC-NEXT:    fucompi %st(3)
+; GENERIC-NEXT:    fucomi %st(3), %st
+; GENERIC-NEXT:    fucompi %st(3), %st
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retl
 ;
 ; ATOM-LABEL: test_fucomi_fucomip:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fucomi %st(3) # sched: [9:4.50]
-; ATOM-NEXT:    fucompi %st(3) # sched: [9:4.50]
+; ATOM-NEXT:    fucomi %st(3), %st # sched: [9:4.50]
+; ATOM-NEXT:    fucompi %st(3), %st # sched: [9:4.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_fucomi_fucomip:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fucomi %st(3) # sched: [3:1.00]
-; SLM-NEXT:    fucompi %st(3) # sched: [3:1.00]
+; SLM-NEXT:    fucomi %st(3), %st # sched: [3:1.00]
+; SLM-NEXT:    fucompi %st(3), %st # sched: [3:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_fucomi_fucomip:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fucomi %st(3) # sched: [3:1.00]
-; SANDY-NEXT:    fucompi %st(3) # sched: [3:1.00]
+; SANDY-NEXT:    fucomi %st(3), %st # sched: [3:1.00]
+; SANDY-NEXT:    fucompi %st(3), %st # sched: [3:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_fucomi_fucomip:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    fucomi %st(3) # sched: [1:0.50]
-; HASWELL-NEXT:    fucompi %st(3) # sched: [1:0.50]
+; HASWELL-NEXT:    fucomi %st(3), %st # sched: [1:0.50]
+; HASWELL-NEXT:    fucompi %st(3), %st # sched: [1:0.50]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retl # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_fucomi_fucomip:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fucomi %st(3) # sched: [3:1.00]
-; BROADWELL-NEXT:    fucompi %st(3) # sched: [3:1.00]
+; BROADWELL-NEXT:    fucomi %st(3), %st # sched: [3:1.00]
+; BROADWELL-NEXT:    fucompi %st(3), %st # sched: [3:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKYLAKE-LABEL: test_fucomi_fucomip:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fucomi %st(3) # sched: [2:1.00]
-; SKYLAKE-NEXT:    fucompi %st(3) # sched: [2:1.00]
+; SKYLAKE-NEXT:    fucomi %st(3), %st # sched: [2:1.00]
+; SKYLAKE-NEXT:    fucompi %st(3), %st # sched: [2:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKX-LABEL: test_fucomi_fucomip:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fucomi %st(3) # sched: [2:1.00]
-; SKX-NEXT:    fucompi %st(3) # sched: [2:1.00]
+; SKX-NEXT:    fucomi %st(3), %st # sched: [2:1.00]
+; SKX-NEXT:    fucompi %st(3), %st # sched: [2:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retl # sched: [6:0.50]
 ;
 ; BDVER2-LABEL: test_fucomi_fucomip:
 ; BDVER2:       # %bb.0:
 ; BDVER2-NEXT:    #APP
-; BDVER2-NEXT:    fucomi %st(3) # sched: [1:1.00]
-; BDVER2-NEXT:    fucompi %st(3) # sched: [1:1.00]
+; BDVER2-NEXT:    fucomi %st(3), %st # sched: [1:1.00]
+; BDVER2-NEXT:    fucompi %st(3), %st # sched: [1:1.00]
 ; BDVER2-NEXT:    #NO_APP
 ; BDVER2-NEXT:    retl # sched: [5:1.00]
 ;
 ; BTVER2-LABEL: test_fucomi_fucomip:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fucomi %st(3) # sched: [3:1.00]
-; BTVER2-NEXT:    fucompi %st(3) # sched: [3:1.00]
+; BTVER2-NEXT:    fucomi %st(3), %st # sched: [3:1.00]
+; BTVER2-NEXT:    fucompi %st(3), %st # sched: [3:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retl # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_fucomi_fucomip:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    fucomi %st(3) # sched: [9:0.50]
-; ZNVER1-NEXT:    fucompi %st(3) # sched: [9:0.50]
+; ZNVER1-NEXT:    fucomi %st(3), %st # sched: [9:0.50]
+; ZNVER1-NEXT:    fucompi %st(3), %st # sched: [9:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retl # sched: [1:0.50]
   tail call void asm sideeffect "fucomi %st(3) \0A\09 fucomip %st(3)", ""() nounwind
diff --git a/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll b/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll
index 6aa429adb417b1..8d8af8c5124a9f 100644
--- a/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll
+++ b/llvm/test/DebugInfo/Mips/dwarfdump-tls.ll
@@ -1,12 +1,34 @@
-; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj -o=%t-32.o < %s
+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj \
+; RUN:     -split-dwarf-file=foo.dwo -o=%t-32.o < %s
 ; RUN: llvm-dwarfdump %t-32.o 2>&1 | FileCheck %s
-; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj -o=%t-64.o < %s
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj \
+; RUN:     -split-dwarf-file=foo.dwo -o=%t-64.o < %s
 ; RUN: llvm-dwarfdump %t-64.o 2>&1 | FileCheck %s
 
+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=asm \
+; RUN:     -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM32 %s
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=asm \
+; RUN:     -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM64 %s
+
 @x = thread_local global i32 5, align 4, !dbg !0
 
 ; CHECK-NOT: error: failed to compute relocation: R_MIPS_TLS_DTPREL
 
+; CHECK:      DW_AT_name      ("x")
+; CHECK-NEXT: DW_AT_type      (0x00000025 "int")
+; CHECK-NEXT: DW_AT_external  (true)
+; CHECK-NEXT: DW_AT_decl_file (0x01)
+; CHECK-NEXT: DW_AT_decl_line (1)
+; CHECK-NEXT: DW_AT_location  (DW_OP_GNU_const_index 0x0, {{DW_OP_GNU_push_tls_address|DW_OP_form_tls_address}})
+
+; ASM32:              .section        .debug_addr
+; ASM32-NEXT: $addr_table_base0:
+; ASM32-NEXT:         .4byte  x+32768
+
+; ASM64:              .section        .debug_addr
+; ASM64-NEXT: .Laddr_table_base0:
+; ASM64-NEXT:         .8byte  x+32768
+
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!7, !8}
 
diff --git a/llvm/test/DebugInfo/Mips/eh_frame.ll b/llvm/test/DebugInfo/Mips/eh_frame.ll
index 4687443cb1cff2..122d0a7f6ab2ab 100644
--- a/llvm/test/DebugInfo/Mips/eh_frame.ll
+++ b/llvm/test/DebugInfo/Mips/eh_frame.ll
@@ -1,9 +1,21 @@
-; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -O3 -filetype=obj -o - %s | llvm-readelf -r | FileCheck %s
-
-; CHECK: .rel.eh_frame
-; CHECK: DW.ref.__gxx_personality_v0
-; CHECK-NEXT: .text
-; CHECK-NEXT: .gcc_except_table
+; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=static -O3 -filetype=obj -o - %s | \
+; RUN:     llvm-readelf -r | FileCheck %s --check-prefix=CHECK-READELF
+; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=pic -O3 -filetype=obj -o - %s | \
+; RUN:     llvm-readelf -r | FileCheck %s --check-prefix=CHECK-READELF
+; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=static -O3 -filetype=obj -o - %s | \
+; RUN:     llvm-objdump -s -j .gcc_except_table - | FileCheck %s --check-prefix=CHECK-EXCEPT-TABLE-STATIC
+; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -relocation-model=pic -O3 -filetype=obj -o - %s | \
+; RUN:     llvm-objdump -s -j .gcc_except_table - | FileCheck %s --check-prefix=CHECK-EXCEPT-TABLE-PIC
+
+; CHECK-READELF: .rel.eh_frame
+; CHECK-READELF: DW.ref.__gxx_personality_v0
+; CHECK-READELF-NEXT: .text
+; CHECK-READELF-NEXT: .gcc_except_table
+
+; CHECK-EXCEPT-TABLE-STATIC: 0000 ff9b1501 0c011500 00150e23 01231e00  ...........#.#..
+; CHECK-EXCEPT-TABLE-STATIC: 0010 00010000 00000000
+; CHECK-EXCEPT-TABLE-PIC:    0000 ff9b1501 0c012d00 002d133f 013f2a00 ......-..-.?.?*.
+; CHECK-EXCEPT-TABLE-PIC:    0010 00010000 00000000                    ........
 
 @_ZTIi = external constant i8*
 
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test b/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test
index 669607fe557a36..32f2482b5117c0 100644
--- a/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test
+++ b/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test
@@ -9,7 +9,7 @@
 # CHECK-NEXT:    [0x0000000000000700, 0x0000000000000710): DW_OP_breg5 RDI+0
 
 # CHECK:      .debug_loclists contents:
-# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002c, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 # CHECK-NEXT: 0x00000000:
 # CHECK-NEXT:   [0x0000000000000000, 0x0000000000000010): DW_OP_breg5 RDI+0
 # CHECK-NEXT:   [0x0000000000000530, 0x0000000000000540): DW_OP_breg6 RBP-8, DW_OP_deref
@@ -37,7 +37,7 @@
   .byte  4                       # DW_LLE_offset_pair
   .uleb128 0x0                   #   starting offset
   .uleb128 0x10                  #   ending offset
-  .short  2                      # Loc expr size
+  .byte  2                       # Loc expr size
   .byte  117                     # DW_OP_breg5
   .byte  0                       # 0
   
@@ -47,7 +47,7 @@
   .byte  4                       # DW_LLE_offset_pair
   .uleb128 0x30                  #   starting offset
   .uleb128 0x40                  #   ending offset
-  .short  3                      # Loc expr size
+  .byte  3                       # Loc expr size
   .byte  118                     # DW_OP_breg6
   .byte  120                     # -8
   .byte  6                       # DW_OP_deref
@@ -55,7 +55,7 @@
   .byte  8                       # DW_LLE_start_length
   .quad  0x700                   # Some address
   .uleb128 0x10                  #   length
-  .short  2                      # Loc expr size
+  .byte  2                       # Loc expr size
   .byte  117                     # DW_OP_breg5
   .byte  0                       # 0
   
diff --git a/llvm/test/Instrumentation/MemorySanitizer/global_ctors_2to3.ll b/llvm/test/Instrumentation/MemorySanitizer/global_ctors_2to3.ll
new file mode 100644
index 00000000000000..d841c6c05c9b76
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/global_ctors_2to3.ll
@@ -0,0 +1,18 @@
+; MSan converts 2-element global_ctors to 3-element when adding the new entry.
+; RUN: opt < %s -msan-with-comdat -S -passes=msan 2>&1 | FileCheck %s
+; RUN: opt < %s -msan -msan-with-comdat -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: $msan.module_ctor = comdat any
+; CHECK: @llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @msan.module_ctor, i8* bitcast (void ()* @msan.module_ctor to i8*) }]
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
+
+define internal void @f() {
+entry:
+  ret void
+}
+
+; CHECK: define internal void @msan.module_ctor() comdat {
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
index f4cbc637ef1aac..569c2320c5c914 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -9,7 +9,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: @llvm.global_ctors {{.*}} { i32 0, void ()* @__msan_init, i8* null }
+; CHECK: @llvm.global_ctors {{.*}} { i32 0, void ()* @msan.module_ctor, i8* null }
 
 ; Check the presence and the linkage type of __msan_track_origins and
 ; other interface symbols.
@@ -991,4 +991,5 @@ define i8* @MismatchingCallMustTailCall(i32 %a) sanitize_memory {
 ; CHECK-NEXT: ret i8*
 
 
-; CHECK: declare void @__msan_init()
+; CHECK-LABEL: define internal void @msan.module_ctor() {
+; CHECK: call void @__msan_init()
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll
index b7847db06ac27c..4f316be2357987 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll
@@ -1,6 +1,9 @@
 ; Make sure MSan doesn't insert shadow checks for @llvm.is.constant.* arguments.
 
+; RUN: opt < %s -msan-kernel=1 -S -passes=msan 2>&1 | FileCheck                \
+; RUN: -check-prefixes=CHECK %s
 ; RUN: opt < %s -msan -msan-kernel=1 -S | FileCheck -check-prefixes=CHECK %s
+; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck -check-prefixes=CHECK %s
 ; RUN: opt < %s -msan -S | FileCheck -check-prefixes=CHECK %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/MC/Disassembler/X86/fp-stack.txt b/llvm/test/MC/Disassembler/X86/fp-stack.txt
index 8c4ad47eb8732a..1b1687b2a204b6 100644
--- a/llvm/test/MC/Disassembler/X86/fp-stack.txt
+++ b/llvm/test/MC/Disassembler/X86/fp-stack.txt
@@ -1,52 +1,52 @@
 # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
 # RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
 
-# CHECK: fadd %st(0)
+# CHECK: fadd %st(0), %st
 0xd8,0xc0
 
-# CHECK: fadd %st(1)
+# CHECK: fadd %st(1), %st
 0xd8,0xc1
 
-# CHECK: fadd %st(2)
+# CHECK: fadd %st(2), %st
 0xd8,0xc2
 
-# CHECK: fadd %st(3)
+# CHECK: fadd %st(3), %st
 0xd8,0xc3
 
-# CHECK: fadd %st(4)
+# CHECK: fadd %st(4), %st
 0xd8,0xc4
 
-# CHECK: fadd %st(5)
+# CHECK: fadd %st(5), %st
 0xd8,0xc5
 
-# CHECK: fadd %st(6)
+# CHECK: fadd %st(6), %st
 0xd8,0xc6
 
-# CHECK: fadd %st(7)
+# CHECK: fadd %st(7), %st
 0xd8,0xc7
 
-# CHECK: fmul %st(0)
+# CHECK: fmul %st(0), %st
 0xd8,0xc8
 
-# CHECK: fmul %st(1)
+# CHECK: fmul %st(1), %st
 0xd8,0xc9
 
-# CHECK: fmul %st(2)
+# CHECK: fmul %st(2), %st
 0xd8,0xca
 
-# CHECK: fmul %st(3)
+# CHECK: fmul %st(3), %st
 0xd8,0xcb
 
-# CHECK: fmul %st(4)
+# CHECK: fmul %st(4), %st
 0xd8,0xcc
 
-# CHECK: fmul %st(5)
+# CHECK: fmul %st(5), %st
 0xd8,0xcd
 
-# CHECK: fmul %st(6)
+# CHECK: fmul %st(6), %st
 0xd8,0xce
 
-# CHECK: fmul %st(7)
+# CHECK: fmul %st(7), %st
 0xd8,0xcf
 
 # CHECK: fcom %st(0)
@@ -97,100 +97,100 @@
 # CHECK: fcomp %st(7)
 0xd8,0xdf
 
-# CHECK: fsub %st(0)
+# CHECK: fsub %st(0), %st
 0xd8,0xe0
 
-# CHECK: fsub %st(1)
+# CHECK: fsub %st(1), %st
 0xd8,0xe1
 
-# CHECK: fsub %st(2)
+# CHECK: fsub %st(2), %st
 0xd8,0xe2
 
-# CHECK: fsub %st(3)
+# CHECK: fsub %st(3), %st
 0xd8,0xe3
 
-# CHECK: fsub %st(4)
+# CHECK: fsub %st(4), %st
 0xd8,0xe4
 
-# CHECK: fsub %st(5)
+# CHECK: fsub %st(5), %st
 0xd8,0xe5
 
-# CHECK: fsub %st(6)
+# CHECK: fsub %st(6), %st
 0xd8,0xe6
 
-# CHECK: fsub %st(7)
+# CHECK: fsub %st(7), %st
 0xd8,0xe7
 
-# CHECK: fsubr %st(0)
+# CHECK: fsubr %st(0), %st
 0xd8,0xe8
 
-# CHECK: fsubr %st(1)
+# CHECK: fsubr %st(1), %st
 0xd8,0xe9
 
-# CHECK: fsubr %st(2)
+# CHECK: fsubr %st(2), %st
 0xd8,0xea
 
-# CHECK: fsubr %st(3)
+# CHECK: fsubr %st(3), %st
 0xd8,0xeb
 
-# CHECK: fsubr %st(4)
+# CHECK: fsubr %st(4), %st
 0xd8,0xec
 
-# CHECK: fsubr %st(5)
+# CHECK: fsubr %st(5), %st
 0xd8,0xed
 
-# CHECK: fsubr %st(6)
+# CHECK: fsubr %st(6), %st
 0xd8,0xee
 
-# CHECK: fsubr %st(7)
+# CHECK: fsubr %st(7), %st
 0xd8,0xef
 
-# CHECK: fdiv %st(0)
+# CHECK: fdiv %st(0), %st
 0xd8,0xf0
 
-# CHECK: fdiv %st(1)
+# CHECK: fdiv %st(1), %st
 0xd8,0xf1
 
-# CHECK: fdiv %st(2)
+# CHECK: fdiv %st(2), %st
 0xd8,0xf2
 
-# CHECK: fdiv %st(3)
+# CHECK: fdiv %st(3), %st
 0xd8,0xf3
 
-# CHECK: fdiv %st(4)
+# CHECK: fdiv %st(4), %st
 0xd8,0xf4
 
-# CHECK: fdiv %st(5)
+# CHECK: fdiv %st(5), %st
 0xd8,0xf5
 
-# CHECK: fdiv %st(6)
+# CHECK: fdiv %st(6), %st
 0xd8,0xf6
 
-# CHECK: fdiv %st(7)
+# CHECK: fdiv %st(7), %st
 0xd8,0xf7
 
-# CHECK: fdivr %st(0)
+# CHECK: fdivr %st(0), %st
 0xd8,0xf8
 
-# CHECK: fdivr %st(1)
+# CHECK: fdivr %st(1), %st
 0xd8,0xf9
 
-# CHECK: fdivr %st(2)
+# CHECK: fdivr %st(2), %st
 0xd8,0xfa
 
-# CHECK: fdivr %st(3)
+# CHECK: fdivr %st(3), %st
 0xd8,0xfb
 
-# CHECK: fdivr %st(4)
+# CHECK: fdivr %st(4), %st
 0xd8,0xfc
 
-# CHECK: fdivr %st(5)
+# CHECK: fdivr %st(5), %st
 0xd8,0xfd
 
-# CHECK: fdivr %st(6)
+# CHECK: fdivr %st(6), %st
 0xd8,0xfe
 
-# CHECK: fdivr %st(7)
+# CHECK: fdivr %st(7), %st
 0xd8,0xff
 
 # CHECK: fld %st(0)
@@ -325,199 +325,199 @@
 # CHECK: fcos
 0xd9,0xff
 
-# CHECK: fcmovb %st(0), %st(0)
+# CHECK: fcmovb %st(0), %st
 0xda,0xc0
 
-# CHECK: fcmovb %st(1), %st(0)
+# CHECK: fcmovb %st(1), %st
 0xda,0xc1
 
-# CHECK: fcmovb %st(2), %st(0)
+# CHECK: fcmovb %st(2), %st
 0xda,0xc2
 
-# CHECK: fcmovb %st(3), %st(0)
+# CHECK: fcmovb %st(3), %st
 0xda,0xc3
 
-# CHECK: fcmovb %st(4), %st(0)
+# CHECK: fcmovb %st(4), %st
 0xda,0xc4
 
-# CHECK: fcmovb %st(5), %st(0)
+# CHECK: fcmovb %st(5), %st
 0xda,0xc5
 
-# CHECK: fcmovb %st(6), %st(0)
+# CHECK: fcmovb %st(6), %st
 0xda,0xc6
 
-# CHECK: fcmovb %st(7), %st(0)
+# CHECK: fcmovb %st(7), %st
 0xda,0xc7
 
-# CHECK: fcmove %st(0), %st(0)
+# CHECK: fcmove %st(0), %st
 0xda,0xc8
 
-# CHECK: fcmove %st(1), %st(0)
+# CHECK: fcmove %st(1), %st
 0xda,0xc9
 
-# CHECK: fcmove %st(2), %st(0)
+# CHECK: fcmove %st(2), %st
 0xda,0xca
 
-# CHECK: fcmove %st(3), %st(0)
+# CHECK: fcmove %st(3), %st
 0xda,0xcb
 
-# CHECK: fcmove %st(4), %st(0)
+# CHECK: fcmove %st(4), %st
 0xda,0xcc
 
-# CHECK: fcmove %st(5), %st(0)
+# CHECK: fcmove %st(5), %st
 0xda,0xcd
 
-# CHECK: fcmove %st(6), %st(0)
+# CHECK: fcmove %st(6), %st
 0xda,0xce
 
-# CHECK: fcmove %st(7), %st(0)
+# CHECK: fcmove %st(7), %st
 0xda,0xcf
 
-# CHECK: fcmovbe %st(0), %st(0)
+# CHECK: fcmovbe %st(0), %st
 0xda,0xd0
 
-# CHECK: fcmovbe %st(1), %st(0)
+# CHECK: fcmovbe %st(1), %st
 0xda,0xd1
 
-# CHECK: fcmovbe %st(2), %st(0)
+# CHECK: fcmovbe %st(2), %st
 0xda,0xd2
 
-# CHECK: fcmovbe %st(3), %st(0)
+# CHECK: fcmovbe %st(3), %st
 0xda,0xd3
 
-# CHECK: fcmovbe %st(4), %st(0)
+# CHECK: fcmovbe %st(4), %st
 0xda,0xd4
 
-# CHECK: fcmovbe %st(5), %st(0)
+# CHECK: fcmovbe %st(5), %st
 0xda,0xd5
 
-# CHECK: fcmovbe %st(6), %st(0)
+# CHECK: fcmovbe %st(6), %st
 0xda,0xd6
 
-# CHECK: fcmovbe %st(7), %st(0)
+# CHECK: fcmovbe %st(7), %st
 0xda,0xd7
 
-# CHECK: fcmovu %st(0), %st(0)
+# CHECK: fcmovu %st(0), %st
 0xda,0xd8
 
-# CHECK: fcmovu %st(1), %st(0)
+# CHECK: fcmovu %st(1), %st
 0xda,0xd9
 
-# CHECK: fcmovu %st(2), %st(0)
+# CHECK: fcmovu %st(2), %st
 0xda,0xda
 
-# CHECK: fcmovu %st(3), %st(0)
+# CHECK: fcmovu %st(3), %st
 0xda,0xdb
 
-# CHECK: fcmovu %st(4), %st(0)
+# CHECK: fcmovu %st(4), %st
 0xda,0xdc
 
-# CHECK: fcmovu %st(5), %st(0)
+# CHECK: fcmovu %st(5), %st
 0xda,0xdd
 
-# CHECK: fcmovu %st(6), %st(0)
+# CHECK: fcmovu %st(6), %st
 0xda,0xde
 
-# CHECK: fcmovu %st(7), %st(0)
+# CHECK: fcmovu %st(7), %st
 0xda,0xdf
 
 # CHECK: fucompp
 0xda,0xe9
 
-# CHECK: fcmovnb %st(0), %st(0)
+# CHECK: fcmovnb %st(0), %st
 0xdb,0xc0
 
-# CHECK: fcmovnb %st(1), %st(0)
+# CHECK: fcmovnb %st(1), %st
 0xdb,0xc1
 
-# CHECK: fcmovnb %st(2), %st(0)
+# CHECK: fcmovnb %st(2), %st
 0xdb,0xc2
 
-# CHECK: fcmovnb %st(3), %st(0)
+# CHECK: fcmovnb %st(3), %st
 0xdb,0xc3
 
-# CHECK: fcmovnb %st(4), %st(0)
+# CHECK: fcmovnb %st(4), %st
 0xdb,0xc4
 
-# CHECK: fcmovnb %st(5), %st(0)
+# CHECK: fcmovnb %st(5), %st
 0xdb,0xc5
 
-# CHECK: fcmovnb %st(6), %st(0)
+# CHECK: fcmovnb %st(6), %st
 0xdb,0xc6
 
-# CHECK: fcmovnb %st(7), %st(0)
+# CHECK: fcmovnb %st(7), %st
 0xdb,0xc7
 
-# CHECK: fcmovne %st(0), %st(0)
+# CHECK: fcmovne %st(0), %st
 0xdb,0xc8
 
-# CHECK: fcmovne %st(1), %st(0)
+# CHECK: fcmovne %st(1), %st
 0xdb,0xc9
 
-# CHECK: fcmovne %st(2), %st(0)
+# CHECK: fcmovne %st(2), %st
 0xdb,0xca
 
-# CHECK: fcmovne %st(3), %st(0)
+# CHECK: fcmovne %st(3), %st
 0xdb,0xcb
 
-# CHECK: fcmovne %st(4), %st(0)
+# CHECK: fcmovne %st(4), %st
 0xdb,0xcc
 
-# CHECK: fcmovne %st(5), %st(0)
+# CHECK: fcmovne %st(5), %st
 0xdb,0xcd
 
-# CHECK: fcmovne %st(6), %st(0)
+# CHECK: fcmovne %st(6), %st
 0xdb,0xce
 
-# CHECK: fcmovne %st(7), %st(0)
+# CHECK: fcmovne %st(7), %st
 0xdb,0xcf
 
-# CHECK: fcmovnbe %st(0), %st(0)
+# CHECK: fcmovnbe %st(0), %st
 0xdb,0xd0
 
-# CHECK: fcmovnbe %st(1), %st(0)
+# CHECK: fcmovnbe %st(1), %st
 0xdb,0xd1
 
-# CHECK: fcmovnbe %st(2), %st(0)
+# CHECK: fcmovnbe %st(2), %st
 0xdb,0xd2
 
-# CHECK: fcmovnbe %st(3), %st(0)
+# CHECK: fcmovnbe %st(3), %st
 0xdb,0xd3
 
-# CHECK: fcmovnbe %st(4), %st(0)
+# CHECK: fcmovnbe %st(4), %st
 0xdb,0xd4
 
-# CHECK: fcmovnbe %st(5), %st(0)
+# CHECK: fcmovnbe %st(5), %st
 0xdb,0xd5
 
-# CHECK: fcmovnbe %st(6), %st(0)
+# CHECK: fcmovnbe %st(6), %st
 0xdb,0xd6
 
-# CHECK: fcmovnbe %st(7), %st(0)
+# CHECK: fcmovnbe %st(7), %st
 0xdb,0xd7
 
-# CHECK: fcmovnu %st(0), %st(0)
+# CHECK: fcmovnu %st(0), %st
 0xdb,0xd8
 
-# CHECK: fcmovnu %st(1), %st(0)
+# CHECK: fcmovnu %st(1), %st
 0xdb,0xd9
 
-# CHECK: fcmovnu %st(2), %st(0)
+# CHECK: fcmovnu %st(2), %st
 0xdb,0xda
 
-# CHECK: fcmovnu %st(3), %st(0)
+# CHECK: fcmovnu %st(3), %st
 0xdb,0xdb
 
-# CHECK: fcmovnu %st(4), %st(0)
+# CHECK: fcmovnu %st(4), %st
 0xdb,0xdc
 
-# CHECK: fcmovnu %st(5), %st(0)
+# CHECK: fcmovnu %st(5), %st
 0xdb,0xdd
 
-# CHECK: fcmovnu %st(6), %st(0)
+# CHECK: fcmovnu %st(6), %st
 0xdb,0xde
 
-# CHECK: fcmovnu %st(7), %st(0)
+# CHECK: fcmovnu %st(7), %st
 0xdb,0xdf
 
 # CHECK: fnclex
@@ -574,148 +574,148 @@
 # CHECK: fcomi %st(7)
 0xdb,0xf7
 
-# CHECK: fadd %st(0), %st(0)
+# CHECK: fadd %st, %st(0)
 0xdc,0xc0
 
-# CHECK: fadd %st(0), %st(1)
+# CHECK: fadd %st, %st(1)
 0xdc,0xc1
 
-# CHECK: fadd %st(0), %st(2)
+# CHECK: fadd %st, %st(2)
 0xdc,0xc2
 
-# CHECK: fadd %st(0), %st(3)
+# CHECK: fadd %st, %st(3)
 0xdc,0xc3
 
-# CHECK: fadd %st(0), %st(4)
+# CHECK: fadd %st, %st(4)
 0xdc,0xc4
 
-# CHECK: fadd %st(0), %st(5)
+# CHECK: fadd %st, %st(5)
 0xdc,0xc5
 
-# CHECK: fadd %st(0), %st(6)
+# CHECK: fadd %st, %st(6)
 0xdc,0xc6
 
-# CHECK: fadd %st(0), %st(7)
+# CHECK: fadd %st, %st(7)
 0xdc,0xc7
 
-# CHECK: fmul %st(0), %st(0)
+# CHECK: fmul %st, %st(0)
 0xdc,0xc8
 
-# CHECK: fmul %st(0), %st(1)
+# CHECK: fmul %st, %st(1)
 0xdc,0xc9
 
-# CHECK: fmul %st(0), %st(2)
+# CHECK: fmul %st, %st(2)
 0xdc,0xca
 
-# CHECK: fmul %st(0), %st(3)
+# CHECK: fmul %st, %st(3)
 0xdc,0xcb
 
-# CHECK: fmul %st(0), %st(4)
+# CHECK: fmul %st, %st(4)
 0xdc,0xcc
 
-# CHECK: fmul %st(0), %st(5)
+# CHECK: fmul %st, %st(5)
 0xdc,0xcd
 
-# CHECK: fmul %st(0), %st(6)
+# CHECK: fmul %st, %st(6)
 0xdc,0xce
 
-# CHECK: fmul %st(0), %st(7)
+# CHECK: fmul %st, %st(7)
 0xdc,0xcf
 
-# CHECK: fsub %st(0), %st(0)
+# CHECK: fsub %st, %st(0)
 0xdc,0xe0
 
-# CHECK: fsub %st(0), %st(1)
+# CHECK: fsub %st, %st(1)
 0xdc,0xe1
 
-# CHECK: fsub %st(0), %st(2)
+# CHECK: fsub %st, %st(2)
 0xdc,0xe2
 
-# CHECK: fsub %st(0), %st(3)
+# CHECK: fsub %st, %st(3)
 0xdc,0xe3
 
-# CHECK: fsub %st(0), %st(4)
+# CHECK: fsub %st, %st(4)
 0xdc,0xe4
 
-# CHECK: fsub %st(0), %st(5)
+# CHECK: fsub %st, %st(5)
 0xdc,0xe5
 
-# CHECK: fsub %st(0), %st(6)
+# CHECK: fsub %st, %st(6)
 0xdc,0xe6
 
-# CHECK: fsub %st(0), %st(7)
+# CHECK: fsub %st, %st(7)
 0xdc,0xe7
 
-# CHECK: fsubr %st(0), %st(0)
+# CHECK: fsubr %st, %st(0)
 0xdc,0xe8
 
-# CHECK: fsubr %st(0), %st(1)
+# CHECK: fsubr %st, %st(1)
 0xdc,0xe9
 
-# CHECK: fsubr %st(0), %st(2)
+# CHECK: fsubr %st, %st(2)
 0xdc,0xea
 
-# CHECK: fsubr %st(0), %st(3)
+# CHECK: fsubr %st, %st(3)
 0xdc,0xeb
 
-# CHECK: fsubr %st(0), %st(4)
+# CHECK: fsubr %st, %st(4)
 0xdc,0xec
 
-# CHECK: fsubr %st(0), %st(5)
+# CHECK: fsubr %st, %st(5)
 0xdc,0xed
 
-# CHECK: fsubr %st(0), %st(6)
+# CHECK: fsubr %st, %st(6)
 0xdc,0xee
 
-# CHECK: fsubr %st(0), %st(7)
+# CHECK: fsubr %st, %st(7)
 0xdc,0xef
 
-# CHECK: fdiv %st(0), %st(0)
+# CHECK: fdiv %st, %st(0)
 0xdc,0xf0
 
-# CHECK: fdiv %st(0), %st(1)
+# CHECK: fdiv %st, %st(1)
 0xdc,0xf1
 
-# CHECK: fdiv %st(0), %st(2)
+# CHECK: fdiv %st, %st(2)
 0xdc,0xf2
 
-# CHECK: fdiv %st(0), %st(3)
+# CHECK: fdiv %st, %st(3)
 0xdc,0xf3
 
-# CHECK: fdiv %st(0), %st(4)
+# CHECK: fdiv %st, %st(4)
 0xdc,0xf4
 
-# CHECK: fdiv %st(0), %st(5)
+# CHECK: fdiv %st, %st(5)
 0xdc,0xf5
 
-# CHECK: fdiv %st(0), %st(6)
+# CHECK: fdiv %st, %st(6)
 0xdc,0xf6
 
-# CHECK: fdiv %st(0), %st(7)
+# CHECK: fdiv %st, %st(7)
 0xdc,0xf7
 
-# CHECK: fdivr %st(0), %st(0)
+# CHECK: fdivr %st, %st(0)
 0xdc,0xf8
 
-# CHECK: fdivr %st(0), %st(1)
+# CHECK: fdivr %st, %st(1)
 0xdc,0xf9
 
-# CHECK: fdivr %st(0), %st(2)
+# CHECK: fdivr %st, %st(2)
 0xdc,0xfa
 
-# CHECK: fdivr %st(0), %st(3)
+# CHECK: fdivr %st, %st(3)
 0xdc,0xfb
 
-# CHECK: fdivr %st(0), %st(4)
+# CHECK: fdivr %st, %st(4)
 0xdc,0xfc
 
-# CHECK: fdivr %st(0), %st(5)
+# CHECK: fdivr %st, %st(5)
 0xdc,0xfd
 
-# CHECK: fdivr %st(0), %st(6)
+# CHECK: fdivr %st, %st(6)
 0xdc,0xfe
 
-# CHECK: fdivr %st(0), %st(7)
+# CHECK: fdivr %st, %st(7)
 0xdc,0xff
 
 # CHECK: ffree %st(0)
@@ -838,151 +838,151 @@
 # CHECK: fucomp %st(7)
 0xdd,0xef
 
-# CHECK: faddp %st(0)
+# CHECK: faddp %st, %st(0)
 0xde,0xc0
 
-# CHECK: faddp %st(1)
+# CHECK: faddp %st, %st(1)
 0xde,0xc1
 
-# CHECK: faddp %st(2)
+# CHECK: faddp %st, %st(2)
 0xde,0xc2
 
-# CHECK: faddp %st(3)
+# CHECK: faddp %st, %st(3)
 0xde,0xc3
 
-# CHECK: faddp %st(4)
+# CHECK: faddp %st, %st(4)
 0xde,0xc4
 
-# CHECK: faddp %st(5)
+# CHECK: faddp %st, %st(5)
 0xde,0xc5
 
-# CHECK: faddp %st(6)
+# CHECK: faddp %st, %st(6)
 0xde,0xc6
 
-# CHECK: faddp %st(7)
+# CHECK: faddp %st, %st(7)
 0xde,0xc7
 
-# CHECK: fmulp %st(0)
+# CHECK: fmulp %st, %st(0)
 0xde,0xc8
 
-# CHECK: fmulp %st(1)
+# CHECK: fmulp %st, %st(1)
 0xde,0xc9
 
-# CHECK: fmulp %st(2)
+# CHECK: fmulp %st, %st(2)
 0xde,0xca
 
-# CHECK: fmulp %st(3)
+# CHECK: fmulp %st, %st(3)
 0xde,0xcb
 
-# CHECK: fmulp %st(4)
+# CHECK: fmulp %st, %st(4)
 0xde,0xcc
 
-# CHECK: fmulp %st(5)
+# CHECK: fmulp %st, %st(5)
 0xde,0xcd
 
-# CHECK: fmulp %st(6)
+# CHECK: fmulp %st, %st(6)
 0xde,0xce
 
-# CHECK: fmulp %st(7)
+# CHECK: fmulp %st, %st(7)
 0xde,0xcf
 
 # CHECK: fcompp
 0xde,0xd9
 
-# CHECK: fsubp %st(0)
+# CHECK: fsubp %st, %st(0)
 0xde,0xe0
 
-# CHECK: fsubp %st(1)
+# CHECK: fsubp %st, %st(1)
 0xde,0xe1
 
-# CHECK: fsubp %st(2)
+# CHECK: fsubp %st, %st(2)
 0xde,0xe2
 
-# CHECK: fsubp %st(3)
+# CHECK: fsubp %st, %st(3)
 0xde,0xe3
 
-# CHECK: fsubp %st(4)
+# CHECK: fsubp %st, %st(4)
 0xde,0xe4
 
-# CHECK: fsubp %st(5)
+# CHECK: fsubp %st, %st(5)
 0xde,0xe5
 
-# CHECK: fsubp %st(6)
+# CHECK: fsubp %st, %st(6)
 0xde,0xe6
 
-# CHECK: fsubp %st(7)
+# CHECK: fsubp %st, %st(7)
 0xde,0xe7
 
-# CHECK: fsubrp %st(0)
+# CHECK: fsubrp %st, %st(0)
 0xde,0xe8
 
-# CHECK: fsubrp %st(1)
+# CHECK: fsubrp %st, %st(1)
 0xde,0xe9
 
-# CHECK: fsubrp %st(2)
+# CHECK: fsubrp %st, %st(2)
 0xde,0xea
 
-# CHECK: fsubrp %st(3)
+# CHECK: fsubrp %st, %st(3)
 0xde,0xeb
 
-# CHECK: fsubrp %st(4)
+# CHECK: fsubrp %st, %st(4)
 0xde,0xec
 
-# CHECK: fsubrp %st(5)
+# CHECK: fsubrp %st, %st(5)
 0xde,0xed
 
-# CHECK: fsubrp %st(6)
+# CHECK: fsubrp %st, %st(6)
 0xde,0xee
 
-# CHECK: fsubrp %st(7)
+# CHECK: fsubrp %st, %st(7)
 0xde,0xef
 
-# CHECK: fdivp %st(0)
+# CHECK: fdivp %st, %st(0)
 0xde,0xf0
 
-# CHECK: fdivp %st(1)
+# CHECK: fdivp %st, %st(1)
 0xde,0xf1
 
-# CHECK: fdivp %st(2)
+# CHECK: fdivp %st, %st(2)
 0xde,0xf2
 
-# CHECK: fdivp %st(3)
+# CHECK: fdivp %st, %st(3)
 0xde,0xf3
 
-# CHECK: fdivp %st(4)
+# CHECK: fdivp %st, %st(4)
 0xde,0xf4
 
-# CHECK: fdivp %st(5)
+# CHECK: fdivp %st, %st(5)
 0xde,0xf5
 
-# CHECK: fdivp %st(6)
+# CHECK: fdivp %st, %st(6)
 0xde,0xf6
 
-# CHECK: fdivp %st(7)
+# CHECK: fdivp %st, %st(7)
 0xde,0xf7
 
-# CHECK: fdivrp %st(0)
+# CHECK: fdivrp %st, %st(0)
 0xde,0xf8
 
-# CHECK: fdivrp %st(1)
+# CHECK: fdivrp %st, %st(1)
 0xde,0xf9
 
-# CHECK: fdivrp %st(2)
+# CHECK: fdivrp %st, %st(2)
 0xde,0xfa
 
-# CHECK: fdivrp %st(3)
+# CHECK: fdivrp %st, %st(3)
 0xde,0xfb
 
-# CHECK: fdivrp %st(4)
+# CHECK: fdivrp %st, %st(4)
 0xde,0xfc
 
-# CHECK: fdivrp %st(5)
+# CHECK: fdivrp %st, %st(5)
 0xde,0xfd
 
-# CHECK: fdivrp %st(6)
+# CHECK: fdivrp %st, %st(6)
 0xde,0xfe
 
-# CHECK: fdivrp %st(7)
+# CHECK: fdivrp %st, %st(7)
 0xde,0xff
 
 # CHECK: ffreep %st(0)
diff --git a/llvm/test/MC/Disassembler/X86/x86-16.txt b/llvm/test/MC/Disassembler/X86/x86-16.txt
index 43cd09516c3b16..286aa88489cb48 100644
--- a/llvm/test/MC/Disassembler/X86/x86-16.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-16.txt
@@ -759,10 +759,10 @@
 # CHECK: strl %eax
 0x66 0x0f 0x00 0xc8
 
-# CHECK: fsubp %st(1)
+# CHECK: fsubp %st, %st(1)
 0xde 0xe1
 
-# CHECK: fsubp %st(2)
+# CHECK: fsubp %st, %st(2)
 0xde 0xe2
 
 # CHECKX: nop
diff --git a/llvm/test/MC/MachO/file-single.s b/llvm/test/MC/MachO/file-single.s
deleted file mode 100644
index 747af22750af7b..00000000000000
--- a/llvm/test/MC/MachO/file-single.s
+++ /dev/null
@@ -1,8 +0,0 @@
-// RUN: not llvm-mc -triple i386-apple-darwin9 %s -o /dev/null 2>&1 | FileCheck %s
-
-// Previously this crashed MC.
-
-// CHECK: error: target does not support '.file' without a number
-
-        .file "dir/foo"
-        nop
diff --git a/llvm/test/MC/MachO/file.s b/llvm/test/MC/MachO/file.s
index 3ddfb2efe22468..eddbb599d97a27 100644
--- a/llvm/test/MC/MachO/file.s
+++ b/llvm/test/MC/MachO/file.s
@@ -1,5 +1,8 @@
 // RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -section-data | FileCheck %s
 
+// This number-less file directive is ignored on MachO.
+        .file "bar/baz.s"
+
         .file	1 "dir/foo"
         nop
 
diff --git a/llvm/test/MC/WebAssembly/external-func-address.ll b/llvm/test/MC/WebAssembly/external-func-address.ll
index 60ec23a1a8ed3d..8e36c76e84f3b9 100644
--- a/llvm/test/MC/WebAssembly/external-func-address.ll
+++ b/llvm/test/MC/WebAssembly/external-func-address.ll
@@ -8,7 +8,7 @@ target triple = "wasm32-unknown-unknown"
 declare void @f0(i32) #0
 @ptr_to_f0 = hidden global void (i32)* @f0, align 4
 
-attributes #0 = { "wasm-import-module"="somewhere" }
+attributes #0 = { "wasm-import-module"="somewhere" "wasm-import-name"="something" }
 
 declare void @f1(i32) #1
 @ptr_to_f1 = hidden global void (i32)* @f1, align 4
@@ -47,7 +47,7 @@ define void @call(i32) {
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         SigIndex:        1
 ; CHECK:            - Module:          somewhere
-; CHECK-NEXT:         Field:           f0
+; CHECK-NEXT:         Field:           something
 ; CHECK:            - Module:          env
 ; CHECK-NEXT:         Field:           f1
 ; CHECK-NEXT:         Kind:            FUNCTION
diff --git a/llvm/test/MC/WebAssembly/import-module.ll b/llvm/test/MC/WebAssembly/import-module.ll
new file mode 100644
index 00000000000000..461d5c20ae9b78
--- /dev/null
+++ b/llvm/test/MC/WebAssembly/import-module.ll
@@ -0,0 +1,31 @@
+; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define void @test() {
+  call void @foo()
+  call void @plain()
+  ret void
+}
+
+declare void @foo() #0
+declare void @plain()
+
+attributes #0 = { "wasm-import-module"="bar" "wasm-import-name"="qux" }
+
+; CHECK:        - Type:            IMPORT
+; CHECK-NEXT:     Imports:         
+; CHECK:            - Module:          bar
+; CHECK-NEXT:         Field:           qux
+; CHECK-NEXT:         Kind:            FUNCTION
+
+; CHECK:            - Module:          env
+; CHECK-NEXT:         Field:           plain
+; CHECK-NEXT:         Kind:            FUNCTION
+
+; CHECK:        - Type:            CUSTOM
+; CHECK:              Name:            foo
+; CHECK-NEXT:         Flags:           [ UNDEFINED ]
+
+; CHECK:              Name:            plain
+; CHECK-NEXT:         Flags:           [ UNDEFINED ]
diff --git a/llvm/test/MC/X86/PPRO-32.s b/llvm/test/MC/X86/PPRO-32.s
index bbd933e58af093..6deea6de9c449b 100644
--- a/llvm/test/MC/X86/PPRO-32.s
+++ b/llvm/test/MC/X86/PPRO-32.s
@@ -64,37 +64,37 @@ cmovpl %eax, %eax
 // CHECK: encoding: [0x0f,0x48,0xc0]        
 cmovsl %eax, %eax 
 
-// CHECK: fcmovbe %st(4), %st(0) 
+// CHECK: fcmovbe %st(4), %st 
 // CHECK: encoding: [0xda,0xd4]        
-fcmovbe %st(4), %st(0) 
+fcmovbe %st(4), %st 
 
-// CHECK: fcmovb %st(4), %st(0) 
+// CHECK: fcmovb %st(4), %st 
 // CHECK: encoding: [0xda,0xc4]        
-fcmovb %st(4), %st(0) 
+fcmovb %st(4), %st 
 
-// CHECK: fcmove %st(4), %st(0) 
+// CHECK: fcmove %st(4), %st 
 // CHECK: encoding: [0xda,0xcc]        
-fcmove %st(4), %st(0) 
+fcmove %st(4), %st 
 
-// CHECK: fcmovnbe %st(4), %st(0) 
+// CHECK: fcmovnbe %st(4), %st 
 // CHECK: encoding: [0xdb,0xd4]        
-fcmovnbe %st(4), %st(0) 
+fcmovnbe %st(4), %st 
 
-// CHECK: fcmovnb %st(4), %st(0) 
+// CHECK: fcmovnb %st(4), %st 
 // CHECK: encoding: [0xdb,0xc4]        
-fcmovnb %st(4), %st(0) 
+fcmovnb %st(4), %st 
 
-// CHECK: fcmovne %st(4), %st(0) 
+// CHECK: fcmovne %st(4), %st 
 // CHECK: encoding: [0xdb,0xcc]        
-fcmovne %st(4), %st(0) 
+fcmovne %st(4), %st 
 
-// CHECK: fcmovnu %st(4), %st(0) 
+// CHECK: fcmovnu %st(4), %st 
 // CHECK: encoding: [0xdb,0xdc]        
-fcmovnu %st(4), %st(0) 
+fcmovnu %st(4), %st 
 
-// CHECK: fcmovu %st(4), %st(0) 
+// CHECK: fcmovu %st(4), %st 
 // CHECK: encoding: [0xda,0xdc]        
-fcmovu %st(4), %st(0) 
+fcmovu %st(4), %st 
 
 // CHECK: fcomi %st(4) 
 // CHECK: encoding: [0xdb,0xf4]         
diff --git a/llvm/test/MC/X86/PPRO-64.s b/llvm/test/MC/X86/PPRO-64.s
index a41d4a0f93cc98..8004772e8f3def 100644
--- a/llvm/test/MC/X86/PPRO-64.s
+++ b/llvm/test/MC/X86/PPRO-64.s
@@ -64,37 +64,37 @@ cmovpl %r13d, %r13d
 // CHECK: encoding: [0x45,0x0f,0x48,0xed]        
 cmovsl %r13d, %r13d 
 
-// CHECK: fcmovbe %st(4), %st(0) 
+// CHECK: fcmovbe %st(4), %st 
 // CHECK: encoding: [0xda,0xd4]        
-fcmovbe %st(4), %st(0) 
+fcmovbe %st(4), %st 
 
-// CHECK: fcmovb %st(4), %st(0) 
+// CHECK: fcmovb %st(4), %st 
 // CHECK: encoding: [0xda,0xc4]        
-fcmovb %st(4), %st(0) 
+fcmovb %st(4), %st 
 
-// CHECK: fcmove %st(4), %st(0) 
+// CHECK: fcmove %st(4), %st 
 // CHECK: encoding: [0xda,0xcc]        
-fcmove %st(4), %st(0) 
+fcmove %st(4), %st 
 
-// CHECK: fcmovnbe %st(4), %st(0) 
+// CHECK: fcmovnbe %st(4), %st 
 // CHECK: encoding: [0xdb,0xd4]        
-fcmovnbe %st(4), %st(0) 
+fcmovnbe %st(4), %st 
 
-// CHECK: fcmovnb %st(4), %st(0) 
+// CHECK: fcmovnb %st(4), %st 
 // CHECK: encoding: [0xdb,0xc4]        
-fcmovnb %st(4), %st(0) 
+fcmovnb %st(4), %st 
 
-// CHECK: fcmovne %st(4), %st(0) 
+// CHECK: fcmovne %st(4), %st 
 // CHECK: encoding: [0xdb,0xcc]        
-fcmovne %st(4), %st(0) 
+fcmovne %st(4), %st 
 
-// CHECK: fcmovnu %st(4), %st(0) 
+// CHECK: fcmovnu %st(4), %st 
 // CHECK: encoding: [0xdb,0xdc]        
-fcmovnu %st(4), %st(0) 
+fcmovnu %st(4), %st 
 
-// CHECK: fcmovu %st(4), %st(0) 
+// CHECK: fcmovu %st(4), %st 
 // CHECK: encoding: [0xda,0xdc]        
-fcmovu %st(4), %st(0) 
+fcmovu %st(4), %st 
 
 // CHECK: fcomi %st(4) 
 // CHECK: encoding: [0xdb,0xf4]         
diff --git a/llvm/test/MC/X86/X87-32.s b/llvm/test/MC/X86/X87-32.s
index 967763de93e057..d414ab65b7dc45 100755
--- a/llvm/test/MC/X86/X87-32.s
+++ b/llvm/test/MC/X86/X87-32.s
@@ -31,7 +31,7 @@ faddl 64(%edx,%eax)
 // CHECK: encoding: [0xdc,0x02]        
 faddl (%edx) 
 
-// CHECK: faddp %st(4) 
+// CHECK: faddp %st, %st(4) 
 // CHECK: encoding: [0xde,0xc4]        
 faddp %st(4) 
 
@@ -59,11 +59,11 @@ fadds 64(%edx,%eax)
 // CHECK: encoding: [0xd8,0x02]        
 fadds (%edx) 
 
-// CHECK: fadd %st(0), %st(4) 
+// CHECK: fadd %st, %st(4) 
 // CHECK: encoding: [0xdc,0xc4]       
-fadd %st(0), %st(4) 
+fadd %st, %st(4) 
 
-// CHECK: fadd %st(4) 
+// CHECK: fadd %st(4), %st
 // CHECK: encoding: [0xd8,0xc4]        
 fadd %st(4) 
 
@@ -259,7 +259,7 @@ fdivl 64(%edx,%eax)
 // CHECK: encoding: [0xdc,0x32]        
 fdivl (%edx) 
 
-// CHECK: fdivp %st(4) 
+// CHECK: fdivp %st, %st(4) 
 // CHECK: encoding: [0xde,0xf4]        
 fdivp %st(4) 
 
@@ -287,7 +287,7 @@ fdivrl 64(%edx,%eax)
 // CHECK: encoding: [0xdc,0x3a]        
 fdivrl (%edx) 
 
-// CHECK: fdivrp %st(4) 
+// CHECK: fdivrp %st, %st(4) 
 // CHECK: encoding: [0xde,0xfc]        
 fdivrp %st(4) 
 
@@ -315,11 +315,11 @@ fdivrs 64(%edx,%eax)
 // CHECK: encoding: [0xd8,0x3a]        
 fdivrs (%edx) 
 
-// CHECK: fdivr %st(0), %st(4) 
+// CHECK: fdivr %st, %st(4) 
 // CHECK: encoding: [0xdc,0xfc]       
-fdivr %st(0), %st(4) 
+fdivr %st, %st(4) 
 
-// CHECK: fdivr %st(4) 
+// CHECK: fdivr %st(4), %st
 // CHECK: encoding: [0xd8,0xfc]        
 fdivr %st(4) 
 
@@ -347,11 +347,11 @@ fdivs 64(%edx,%eax)
 // CHECK: encoding: [0xd8,0x32]        
 fdivs (%edx) 
 
-// CHECK: fdiv %st(0), %st(4) 
+// CHECK: fdiv %st, %st(4) 
 // CHECK: encoding: [0xdc,0xf4]       
-fdiv %st(0), %st(4) 
+fdiv %st, %st(4) 
 
-// CHECK: fdiv %st(4) 
+// CHECK: fdiv %st(4), %st
 // CHECK: encoding: [0xd8,0xf4]        
 fdiv %st(4) 
 
@@ -1119,7 +1119,7 @@ fmull 64(%edx,%eax)
 // CHECK: encoding: [0xdc,0x0a]        
 fmull (%edx) 
 
-// CHECK: fmulp %st(4) 
+// CHECK: fmulp %st, %st(4) 
 // CHECK: encoding: [0xde,0xcc]        
 fmulp %st(4) 
 
@@ -1147,11 +1147,11 @@ fmuls 64(%edx,%eax)
 // CHECK: encoding: [0xd8,0x0a]        
 fmuls (%edx) 
 
-// CHECK: fmul %st(0), %st(4) 
+// CHECK: fmul %st, %st(4) 
 // CHECK: encoding: [0xdc,0xcc]       
-fmul %st(0), %st(4) 
+fmul %st, %st(4) 
 
-// CHECK: fmul %st(4) 
+// CHECK: fmul %st(4), %st
 // CHECK: encoding: [0xd8,0xcc]        
 fmul %st(4) 
 
@@ -1479,7 +1479,7 @@ fsubl 64(%edx,%eax)
 // CHECK: encoding: [0xdc,0x22]        
 fsubl (%edx) 
 
-// CHECK: fsubp %st(4) 
+// CHECK: fsubp %st, %st(4) 
 // CHECK: encoding: [0xde,0xe4]        
 fsubp %st(4) 
 
@@ -1507,7 +1507,7 @@ fsubrl 64(%edx,%eax)
 // CHECK: encoding: [0xdc,0x2a]        
 fsubrl (%edx) 
 
-// CHECK: fsubrp %st(4) 
+// CHECK: fsubrp %st, %st(4) 
 // CHECK: encoding: [0xde,0xec]        
 fsubrp %st(4) 
 
@@ -1535,11 +1535,11 @@ fsubrs 64(%edx,%eax)
 // CHECK: encoding: [0xd8,0x2a]        
 fsubrs (%edx) 
 
-// CHECK: fsubr %st(0), %st(4) 
+// CHECK: fsubr %st, %st(4) 
 // CHECK: encoding: [0xdc,0xec]       
-fsubr %st(0), %st(4) 
+fsubr %st, %st(4) 
 
-// CHECK: fsubr %st(4) 
+// CHECK: fsubr %st(4), %st
 // CHECK: encoding: [0xd8,0xec]        
 fsubr %st(4) 
 
@@ -1567,11 +1567,11 @@ fsubs 64(%edx,%eax)
 // CHECK: encoding: [0xd8,0x22]        
 fsubs (%edx) 
 
-// CHECK: fsub %st(0), %st(4) 
+// CHECK: fsub %st, %st(4) 
 // CHECK: encoding: [0xdc,0xe4]       
-fsub %st(0), %st(4) 
+fsub %st, %st(4) 
 
-// CHECK: fsub %st(4) 
+// CHECK: fsub %st(4), %st
 // CHECK: encoding: [0xd8,0xe4]        
 fsub %st(4) 
 
diff --git a/llvm/test/MC/X86/X87-64.s b/llvm/test/MC/X86/X87-64.s
index a3b76b4e4b0ad4..1103f326ea047a 100755
--- a/llvm/test/MC/X86/X87-64.s
+++ b/llvm/test/MC/X86/X87-64.s
@@ -31,7 +31,7 @@ faddl 64(%rdx,%rax)
 // CHECK: encoding: [0xdc,0x02]        
 faddl (%rdx) 
 
-// CHECK: faddp %st(4) 
+// CHECK: faddp %st, %st(4) 
 // CHECK: encoding: [0xde,0xc4]        
 faddp %st(4) 
 
@@ -59,11 +59,11 @@ fadds 64(%rdx,%rax)
 // CHECK: encoding: [0xd8,0x02]        
 fadds (%rdx) 
 
-// CHECK: fadd %st(0), %st(4) 
+// CHECK: fadd %st, %st(4) 
 // CHECK: encoding: [0xdc,0xc4]       
-fadd %st(0), %st(4) 
+fadd %st, %st(4) 
 
-// CHECK: fadd %st(4) 
+// CHECK: fadd %st(4), %st
 // CHECK: encoding: [0xd8,0xc4]        
 fadd %st(4) 
 
@@ -259,7 +259,7 @@ fdivl 64(%rdx,%rax)
 // CHECK: encoding: [0xdc,0x32]        
 fdivl (%rdx) 
 
-// CHECK: fdivp %st(4) 
+// CHECK: fdivp %st, %st(4) 
 // CHECK: encoding: [0xde,0xf4]        
 fdivp %st(4) 
 
@@ -287,7 +287,7 @@ fdivrl 64(%rdx,%rax)
 // CHECK: encoding: [0xdc,0x3a]        
 fdivrl (%rdx) 
 
-// CHECK: fdivrp %st(4) 
+// CHECK: fdivrp %st, %st(4) 
 // CHECK: encoding: [0xde,0xfc]        
 fdivrp %st(4) 
 
@@ -315,11 +315,11 @@ fdivrs 64(%rdx,%rax)
 // CHECK: encoding: [0xd8,0x3a]        
 fdivrs (%rdx) 
 
-// CHECK: fdivr %st(0), %st(4) 
+// CHECK: fdivr %st, %st(4) 
 // CHECK: encoding: [0xdc,0xfc]       
-fdivr %st(0), %st(4) 
+fdivr %st, %st(4) 
 
-// CHECK: fdivr %st(4) 
+// CHECK: fdivr %st(4), %st
 // CHECK: encoding: [0xd8,0xfc]        
 fdivr %st(4) 
 
@@ -347,11 +347,11 @@ fdivs 64(%rdx,%rax)
 // CHECK: encoding: [0xd8,0x32]        
 fdivs (%rdx) 
 
-// CHECK: fdiv %st(0), %st(4) 
+// CHECK: fdiv %st, %st(4) 
 // CHECK: encoding: [0xdc,0xf4]       
-fdiv %st(0), %st(4) 
+fdiv %st, %st(4) 
 
-// CHECK: fdiv %st(4) 
+// CHECK: fdiv %st(4), %st
 // CHECK: encoding: [0xd8,0xf4]        
 fdiv %st(4) 
 
@@ -1119,7 +1119,7 @@ fmull 64(%rdx,%rax)
 // CHECK: encoding: [0xdc,0x0a]        
 fmull (%rdx) 
 
-// CHECK: fmulp %st(4) 
+// CHECK: fmulp %st, %st(4) 
 // CHECK: encoding: [0xde,0xcc]        
 fmulp %st(4) 
 
@@ -1147,9 +1147,9 @@ fmuls 64(%rdx,%rax)
 // CHECK: encoding: [0xd8,0x0a]        
 fmuls (%rdx) 
 
-// CHECK: fmul %st(0), %st(4) 
+// CHECK: fmul %st, %st(4) 
 // CHECK: encoding: [0xdc,0xcc]       
-fmul %st(0), %st(4) 
+fmul %st, %st(4) 
 
 // CHECK: fmul %st(4) 
 // CHECK: encoding: [0xd8,0xcc]        
@@ -1479,7 +1479,7 @@ fsubl 64(%rdx,%rax)
 // CHECK: encoding: [0xdc,0x22]        
 fsubl (%rdx) 
 
-// CHECK: fsubp %st(4) 
+// CHECK: fsubp %st, %st(4) 
 // CHECK: encoding: [0xde,0xe4]        
 fsubp %st(4) 
 
@@ -1507,7 +1507,7 @@ fsubrl 64(%rdx,%rax)
 // CHECK: encoding: [0xdc,0x2a]        
 fsubrl (%rdx) 
 
-// CHECK: fsubrp %st(4) 
+// CHECK: fsubrp %st, %st(4) 
 // CHECK: encoding: [0xde,0xec]        
 fsubrp %st(4) 
 
@@ -1535,11 +1535,11 @@ fsubrs 64(%rdx,%rax)
 // CHECK: encoding: [0xd8,0x2a]        
 fsubrs (%rdx) 
 
-// CHECK: fsubr %st(0), %st(4) 
+// CHECK: fsubr %st, %st(4) 
 // CHECK: encoding: [0xdc,0xec]       
-fsubr %st(0), %st(4) 
+fsubr %st, %st(4) 
 
-// CHECK: fsubr %st(4) 
+// CHECK: fsubr %st(4), %st
 // CHECK: encoding: [0xd8,0xec]        
 fsubr %st(4) 
 
@@ -1567,11 +1567,11 @@ fsubs 64(%rdx,%rax)
 // CHECK: encoding: [0xd8,0x22]        
 fsubs (%rdx) 
 
-// CHECK: fsub %st(0), %st(4) 
+// CHECK: fsub %st, %st(4) 
 // CHECK: encoding: [0xdc,0xe4]       
-fsub %st(0), %st(4) 
+fsub %st, %st(4) 
 
-// CHECK: fsub %st(4) 
+// CHECK: fsub %st(4), %st
 // CHECK: encoding: [0xd8,0xe4]        
 fsub %st(4) 
 
diff --git a/llvm/test/MC/X86/intel-syntax-2.s b/llvm/test/MC/X86/intel-syntax-2.s
index aead5766db4d52..b23965ae52e88e 100644
--- a/llvm/test/MC/X86/intel-syntax-2.s
+++ b/llvm/test/MC/X86/intel-syntax-2.s
@@ -18,14 +18,14 @@ _test2:
 
 _test3:
 fadd 
-// CHECK: faddp %st(1)
+// CHECK: faddp %st, %st(1)
 fmul
-// CHECK: fmulp %st(1)
+// CHECK: fmulp %st, %st(1)
 fsub
-// CHECK: fsubp %st(1)
+// CHECK: fsubp %st, %st(1)
 fsubr
-// CHECK: fsubrp %st(1)
+// CHECK: fsubrp %st, %st(1)
 fdiv
-// CHECK: fdivp %st(1)
+// CHECK: fdivp %st, %st(1)
 fdivr
-// CHECK: fdivrp %st(1)
+// CHECK: fdivrp %st, %st(1)
diff --git a/llvm/test/MC/X86/intel-syntax.s b/llvm/test/MC/X86/intel-syntax.s
index 428a7e4ec41f1f..171357e1f5212e 100644
--- a/llvm/test/MC/X86/intel-syntax.s
+++ b/llvm/test/MC/X86/intel-syntax.s
@@ -556,12 +556,12 @@ fnstsw
 fnstsw AX
 fnstsw WORD PTR [EAX]
 
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fdivrp %st(1)
-// CHECK: fdivp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
 faddp ST(1), ST(0)
 fmulp ST(1), ST(0)
 fsubp ST(1), ST(0)
@@ -569,12 +569,12 @@ fsubrp ST(1), ST(0)
 fdivp ST(1), ST(0)
 fdivrp ST(1), ST(0)
 
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fdivrp %st(1)
-// CHECK: fdivp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
 faddp ST(0), ST(1)
 fmulp ST(0), ST(1)
 fsubp ST(0), ST(1)
@@ -582,12 +582,12 @@ fsubrp ST(0), ST(1)
 fdivp ST(0), ST(1)
 fdivrp ST(0), ST(1)
 
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fdivrp %st(1)
-// CHECK: fdivp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
 faddp ST(1)
 fmulp ST(1)
 fsubp ST(1)
@@ -596,12 +596,12 @@ fdivp ST(1)
 fdivrp ST(1)
 
 
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fdivrp %st(1)
-// CHECK: fdivp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
 fadd 
 fmul
 fsub
@@ -609,12 +609,12 @@ fsubr
 fdiv
 fdivr
 
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fdivrp %st(1)
-// CHECK: fdivp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
 faddp
 fmulp
 fsubp
@@ -622,12 +622,12 @@ fsubrp
 fdivp
 fdivrp
 
-// CHECK: fadd %st(1)
-// CHECK: fmul %st(1)
-// CHECK: fsub %st(1)
-// CHECK: fsubr %st(1)
-// CHECK: fdiv %st(1)
-// CHECK: fdivr %st(1)
+// CHECK: fadd %st(1), %st
+// CHECK: fmul %st(1), %st
+// CHECK: fsub %st(1), %st
+// CHECK: fsubr %st(1), %st
+// CHECK: fdiv %st(1), %st
+// CHECK: fdivr %st(1), %st
 fadd ST(0), ST(1)
 fmul ST(0), ST(1)
 fsub ST(0), ST(1)
@@ -635,12 +635,12 @@ fsubr ST(0), ST(1)
 fdiv ST(0), ST(1)
 fdivr ST(0), ST(1)
 
-// CHECK: fadd %st(0), %st(1)
-// CHECK: fmul %st(0), %st(1)
-// CHECK: fsubr %st(0), %st(1)
-// CHECK: fsub %st(0), %st(1)
-// CHECK: fdivr %st(0), %st(1)
-// CHECK: fdiv %st(0), %st(1)
+// CHECK: fadd %st, %st(1)
+// CHECK: fmul %st, %st(1)
+// CHECK: fsubr %st, %st(1)
+// CHECK: fsub %st, %st(1)
+// CHECK: fdivr %st, %st(1)
+// CHECK: fdiv %st, %st(1)
 fadd ST(1), ST(0)
 fmul ST(1), ST(0)
 fsub ST(1), ST(0)
@@ -648,12 +648,12 @@ fsubr ST(1), ST(0)
 fdiv ST(1), ST(0)
 fdivr ST(1), ST(0)
 
-// CHECK: fadd %st(1)
-// CHECK: fmul %st(1)
-// CHECK: fsub %st(1)
-// CHECK: fsubr %st(1)
-// CHECK: fdiv %st(1)
-// CHECK: fdivr %st(1)
+// CHECK: fadd %st(1), %st
+// CHECK: fmul %st(1), %st
+// CHECK: fsub %st(1), %st
+// CHECK: fsubr %st(1), %st
+// CHECK: fdiv %st(1), %st
+// CHECK: fdivr %st(1), %st
 fadd ST(1)
 fmul ST(1)
 fsub ST(1)
diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s
index 50263beaa67567..88ee77a6d4143a 100644
--- a/llvm/test/MC/X86/x86-16.s
+++ b/llvm/test/MC/X86/x86-16.s
@@ -920,11 +920,11 @@ pshufw $90, %mm4, %mm0
 	str %eax
 
 
-// CHECK: fsubp
+// CHECK: fsubp %st, %st(1)
 // CHECK: encoding: [0xde,0xe1]
 fsubp %st,%st(1)
 
-// CHECK: fsubp	%st(2)
+// CHECK: fsubp %st, %st(2)
 // CHECK: encoding: [0xde,0xe2]
 fsubp   %st, %st(2)
 
diff --git a/llvm/test/MC/X86/x86-32-coverage.s b/llvm/test/MC/X86/x86-32-coverage.s
index 32281f41216445..ba59fb357b9741 100644
--- a/llvm/test/MC/X86/x86-32-coverage.s
+++ b/llvm/test/MC/X86/x86-32-coverage.s
@@ -2472,11 +2472,11 @@
 // CHECK:  encoding: [0xda,0x05,0x78,0x56,0x34,0x12]
         	fiaddl	0x12345678
 
-// CHECK: faddp	%st(2)
+// CHECK: faddp	%st, %st(2)
 // CHECK:  encoding: [0xde,0xc2]
         	faddp	%st(2)
 
-// CHECK: fsub	%st(2)
+// CHECK: fsub	%st(2), %st
 // CHECK:  encoding: [0xd8,0xe2]
         	fsub	%st(2)
 
@@ -2504,11 +2504,11 @@
 // CHECK:  encoding: [0xda,0x25,0x78,0x56,0x34,0x12]
         	fisubl	0x12345678
 
-// CHECK: fsubp	%st(2)
+// CHECK: fsubp	%st, %st(2)
 // CHECK:  encoding: [0xde,0xe2]
         	fsubp	%st(2)
 
-// CHECK: fsubr	%st(2)
+// CHECK: fsubr	%st(2), %st
 // CHECK:  encoding: [0xd8,0xea]
         	fsubr	%st(2)
 
@@ -2536,11 +2536,11 @@
 // CHECK:  encoding: [0xda,0x2d,0x78,0x56,0x34,0x12]
         	fisubrl	0x12345678
 
-// CHECK: fsubrp	%st(2)
+// CHECK: fsubrp	%st, %st(2)
 // CHECK:  encoding: [0xde,0xea]
         	fsubrp	%st(2)
 
-// CHECK: fmul	%st(2)
+// CHECK: fmul	%st(2), %st
 // CHECK:  encoding: [0xd8,0xca]
         	fmul	%st(2)
 
@@ -2568,11 +2568,11 @@
 // CHECK:  encoding: [0xda,0x0d,0x78,0x56,0x34,0x12]
         	fimull	0x12345678
 
-// CHECK: fmulp	%st(2)
+// CHECK: fmulp	%st, %st(2)
 // CHECK:  encoding: [0xde,0xca]
         	fmulp	%st(2)
 
-// CHECK: fdiv	%st(2)
+// CHECK: fdiv	%st(2), %st
 // CHECK:  encoding: [0xd8,0xf2]
         	fdiv	%st(2)
 
@@ -2600,11 +2600,11 @@
 // CHECK:  encoding: [0xda,0x35,0x78,0x56,0x34,0x12]
         	fidivl	0x12345678
 
-// CHECK: fdivp	%st(2)
+// CHECK: fdivp	%st, %st(2)
 // CHECK:  encoding: [0xde,0xf2]
         	fdivp	%st(2)
 
-// CHECK: fdivr	%st(2)
+// CHECK: fdivr	%st(2), %st
 // CHECK:  encoding: [0xd8,0xfa]
         	fdivr	%st(2)
 
@@ -2632,7 +2632,7 @@
 // CHECK:  encoding: [0xda,0x3d,0x78,0x56,0x34,0x12]
         	fidivrl	0x12345678
 
-// CHECK: fdivrp	%st(2)
+// CHECK: fdivrp	%st, %st(2)
 // CHECK:  encoding: [0xde,0xfa]
         	fdivrp	%st(2)
 
@@ -2876,35 +2876,35 @@
 // CHECK:  encoding: [0x0f,0x0b]
         	ud2
 
-// CHECK: fcmovb	%st(2), %st(0)
+// CHECK: fcmovb	%st(2), %st
 // CHECK:  encoding: [0xda,0xc2]
         	fcmovb	%st(2),%st
 
-// CHECK: fcmove	%st(2), %st(0)
+// CHECK: fcmove	%st(2), %st
 // CHECK:  encoding: [0xda,0xca]
         	fcmove	%st(2),%st
 
-// CHECK: fcmovbe	%st(2), %st(0)
+// CHECK: fcmovbe	%st(2), %st
 // CHECK:  encoding: [0xda,0xd2]
         	fcmovbe	%st(2),%st
 
-// CHECK: fcmovu	 %st(2), %st(0)
+// CHECK: fcmovu	 %st(2), %st
 // CHECK:  encoding: [0xda,0xda]
         	fcmovu	%st(2),%st
 
-// CHECK: fcmovnb	%st(2), %st(0)
+// CHECK: fcmovnb	%st(2), %st
 // CHECK:  encoding: [0xdb,0xc2]
         	fcmovnb	%st(2),%st
 
-// CHECK: fcmovne	%st(2), %st(0)
+// CHECK: fcmovne	%st(2), %st
 // CHECK:  encoding: [0xdb,0xca]
         	fcmovne	%st(2),%st
 
-// CHECK: fcmovnbe	%st(2), %st(0)
+// CHECK: fcmovnbe	%st(2), %st
 // CHECK:  encoding: [0xdb,0xd2]
         	fcmovnbe	%st(2),%st
 
-// CHECK: fcmovnu	%st(2), %st(0)
+// CHECK: fcmovnu	%st(2), %st
 // CHECK:  encoding: [0xdb,0xda]
         	fcmovnu	%st(2),%st
 
diff --git a/llvm/test/MC/X86/x86-32.s b/llvm/test/MC/X86/x86-32.s
index 69d36032593453..1593c26b9d0297 100644
--- a/llvm/test/MC/X86/x86-32.s
+++ b/llvm/test/MC/X86/x86-32.s
@@ -1055,7 +1055,7 @@ pshufw $90, %mm4, %mm0
 fsubp %st,%st(1)
 
 // PR9164
-// CHECK: fsubp	%st(2)
+// CHECK: fsubp %st, %st(2)
 // CHECK: encoding: [0xde,0xe2]
 fsubp   %st, %st(2)
 
diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s
index df811a6c4cf2ee..e35ba19fc4f2ec 100644
--- a/llvm/test/MC/X86/x86-64.s
+++ b/llvm/test/MC/X86/x86-64.s
@@ -307,13 +307,13 @@ insl	(%dx), %es:(%rdi)
 // CHECK: fxch %st(1)
 // CHECK: fucom %st(1)
 // CHECK: fucomp %st(1)
-// CHECK: faddp %st(1)
-// CHECK: faddp	%st(0)
-// CHECK: fsubp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fdivp %st(1)
-// CHECK: fdivrp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: faddp %st, %st(0)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
 
 fxch
 fucom
@@ -416,21 +416,21 @@ enter $0x7ace,$0x7f
 mov %cs, %ax
 
 // rdar://8456391
-fcmovb %st(1), %st(0)   // CHECK: fcmovb	%st(1), %st(0)
-fcmove %st(1), %st(0)   // CHECK: fcmove	%st(1), %st(0)
-fcmovbe %st(1), %st(0)  // CHECK: fcmovbe	%st(1), %st(0)
-fcmovu %st(1), %st(0)   // CHECK: fcmovu	 %st(1), %st(0)
+fcmovb %st(1), %st   // CHECK: fcmovb	%st(1), %st
+fcmove %st(1), %st   // CHECK: fcmove	%st(1), %st
+fcmovbe %st(1), %st  // CHECK: fcmovbe	%st(1), %st
+fcmovu %st(1), %st   // CHECK: fcmovu	 %st(1), %st
 
-fcmovnb %st(1), %st(0)  // CHECK: fcmovnb	%st(1), %st(0)
-fcmovne %st(1), %st(0)  // CHECK: fcmovne	%st(1), %st(0)
-fcmovnbe %st(1), %st(0) // CHECK: fcmovnbe	%st(1), %st(0)
-fcmovnu %st(1), %st(0)  // CHECK: fcmovnu	%st(1), %st(0)
+fcmovnb %st(1), %st  // CHECK: fcmovnb	%st(1), %st
+fcmovne %st(1), %st  // CHECK: fcmovne	%st(1), %st
+fcmovnbe %st(1), %st // CHECK: fcmovnbe	%st(1), %st
+fcmovnu %st(1), %st  // CHECK: fcmovnu	%st(1), %st
 
-fcmovnae %st(1), %st(0) // CHECK: fcmovb	%st(1), %st(0)
-fcmovna %st(1), %st(0)  // CHECK: fcmovbe	%st(1), %st(0)
+fcmovnae %st(1), %st // CHECK: fcmovb	%st(1), %st
+fcmovna %st(1), %st  // CHECK: fcmovbe	%st(1), %st
 
-fcmovae %st(1), %st(0)  // CHECK: fcmovnb	%st(1), %st(0)
-fcmova %st(1), %st(0)   // CHECK: fcmovnbe	%st(1), %st(0)
+fcmovae %st(1), %st  // CHECK: fcmovnb	%st(1), %st
+fcmova %st(1), %st   // CHECK: fcmovnbe	%st(1), %st
 
 // rdar://8456417
 .byte (88 + 1) & 15  // CHECK: .byte	9
@@ -456,20 +456,20 @@ mov %rdx, %db15
 // CHECK: encoding: [0x44,0x0f,0x23,0xfa]
 
 // rdar://8456371 - Handle commutable instructions written backward.
-// CHECK: 	faddp	%st(1)
-// CHECK:	fmulp	%st(2)
+// CHECK: 	faddp	%st, %st(1)
+// CHECK:	fmulp	%st, %st(2)
 faddp %st, %st(1)
 fmulp %st, %st(2)
 
 // rdar://8468087 - Encode these accurately, they are not synonyms.
-// CHECK: fmul	%st(0), %st(1)
+// CHECK: fmul	%st, %st(1)
 // CHECK: encoding: [0xdc,0xc9]
 // CHECK: fmul	%st(1)
 // CHECK: encoding: [0xd8,0xc9]
 fmul %st, %st(1)
 fmul %st(1), %st
 
-// CHECK: fadd	%st(0), %st(1)
+// CHECK: fadd	%st, %st(1)
 // CHECK: encoding: [0xdc,0xc1]
 // CHECK: fadd	%st(1)
 // CHECK: encoding: [0xd8,0xc1]
@@ -582,15 +582,15 @@ movmskpd	%xmm6, %eax
 // CHECK: encoding: [0x66,0x0f,0x50,0xc6]
 
 // rdar://8491845 - Gas supports commuted forms of non-commutable instructions.
-fdivrp %st(0), %st(1) // CHECK: encoding: [0xde,0xf9]
-fdivrp %st(1), %st(0) // CHECK: encoding: [0xde,0xf9]
+fdivrp %st, %st(1) // CHECK: encoding: [0xde,0xf9]
+fdivrp %st(1), %st // CHECK: encoding: [0xde,0xf9]
 
-fsubrp %st(0), %st(1) // CHECK: encoding: [0xde,0xe9]
-fsubrp %st(1), %st(0) // CHECK: encoding: [0xde,0xe9]
+fsubrp %st, %st(1) // CHECK: encoding: [0xde,0xe9]
+fsubrp %st(1), %st // CHECK: encoding: [0xde,0xe9]
 
 // also PR8861
-fdivp %st(0), %st(1) // CHECK: encoding: [0xde,0xf1]
-fdivp %st(1), %st(0) // CHECK: encoding: [0xde,0xf1]
+fdivp %st, %st(1) // CHECK: encoding: [0xde,0xf1]
+fdivp %st(1), %st // CHECK: encoding: [0xde,0xf1]
 
 
 movl	foo(%rip), %eax
@@ -1391,38 +1391,38 @@ clac
 // CHECK: encoding: [0x0f,0x01,0xcb]
 stac
 
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fdivp %st(1)
-// CHECK: fdivrp %st(1)
-faddp %st(0), %st(1)
-fmulp %st(0), %st(1)
-fsubp %st(0), %st(1)
-fsubrp %st(0), %st(1)
-fdivp %st(0), %st(1)
-fdivrp %st(0), %st(1)
-
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fdivp %st(1)
-// CHECK: fdivrp %st(1)
-faddp %st(1), %st(0)
-fmulp %st(1), %st(0)
-fsubp %st(1), %st(0)
-fsubrp %st(1), %st(0)
-fdivp %st(1), %st(0)
-fdivrp %st(1), %st(0)
-
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fdivp %st(1)
-// CHECK: fdivrp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
+faddp %st, %st(1)
+fmulp %st, %st(1)
+fsubp %st, %st(1)
+fsubrp %st, %st(1)
+fdivp %st, %st(1)
+fdivrp %st, %st(1)
+
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
+faddp %st(1), %st
+fmulp %st(1), %st
+fsubp %st(1), %st
+fsubrp %st(1), %st
+fdivp %st(1), %st
+fdivrp %st(1), %st
+
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
 faddp %st(1)
 fmulp %st(1)
 fsubp %st(1)
@@ -1430,12 +1430,12 @@ fsubrp %st(1)
 fdivp %st(1)
 fdivrp %st(1)
 
-// CHECK: faddp %st(1)
-// CHECK: fmulp %st(1)
-// CHECK: fsubp %st(1)
-// CHECK: fsubrp %st(1)
-// CHECK: fdivp %st(1)
-// CHECK: fdivrp %st(1)
+// CHECK: faddp %st, %st(1)
+// CHECK: fmulp %st, %st(1)
+// CHECK: fsubp %st, %st(1)
+// CHECK: fsubrp %st, %st(1)
+// CHECK: fdivp %st, %st(1)
+// CHECK: fdivrp %st, %st(1)
 faddp
 fmulp
 fsubp
@@ -1449,25 +1449,25 @@ fdivrp
 // CHECK: fsubr %st(1)
 // CHECK: fdiv %st(1)
 // CHECK: fdivr %st(1)
-fadd %st(1), %st(0)
-fmul %st(1), %st(0)
-fsub %st(1), %st(0)
-fsubr %st(1), %st(0)
-fdiv %st(1), %st(0)
-fdivr %st(1), %st(0)
-
-// CHECK: fadd %st(0), %st(1)
-// CHECK: fmul %st(0), %st(1)
-// CHECK: fsub %st(0), %st(1)
-// CHECK: fsubr %st(0), %st(1)
-// CHECK: fdiv %st(0), %st(1)
-// CHECK: fdivr %st(0), %st(1)
-fadd %st(0), %st(1)
-fmul %st(0), %st(1)
-fsub %st(0), %st(1)
-fsubr %st(0), %st(1)
-fdiv %st(0), %st(1)
-fdivr %st(0), %st(1)
+fadd %st(1), %st
+fmul %st(1), %st
+fsub %st(1), %st
+fsubr %st(1), %st
+fdiv %st(1), %st
+fdivr %st(1), %st
+
+// CHECK: fadd %st, %st(1)
+// CHECK: fmul %st, %st(1)
+// CHECK: fsub %st, %st(1)
+// CHECK: fsubr %st, %st(1)
+// CHECK: fdiv %st, %st(1)
+// CHECK: fdivr %st, %st(1)
+fadd %st, %st(1)
+fmul %st, %st(1)
+fsub %st, %st(1)
+fsubr %st, %st(1)
+fdiv %st, %st(1)
+fdivr %st, %st(1)
 
 // CHECK: fadd %st(1)
 // CHECK: fmul %st(1)
diff --git a/llvm/test/Transforms/FunctionImport/Inputs/comdat.ll b/llvm/test/Transforms/FunctionImport/Inputs/comdat.ll
new file mode 100644
index 00000000000000..1df6f25351e587
--- /dev/null
+++ b/llvm/test/Transforms/FunctionImport/Inputs/comdat.ll
@@ -0,0 +1,10 @@
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+define void @main() {
+entry:
+  call i8* @lwt_fun()
+  ret void
+}
+
+declare i8* @lwt_fun()
diff --git a/llvm/test/Transforms/FunctionImport/comdat.ll b/llvm/test/Transforms/FunctionImport/comdat.ll
new file mode 100644
index 00000000000000..29e8cb538ab665
--- /dev/null
+++ b/llvm/test/Transforms/FunctionImport/comdat.ll
@@ -0,0 +1,32 @@
+; Test to ensure that comdat is renamed consistently when comdat leader is
+; promoted and renamed due to an import. Required by COFF.
+
+; REQUIRES: x86-registered-target
+
+; RUN: opt -thinlto-bc -o %t1.bc %s
+; RUN: opt -thinlto-bc -o %t2.bc %S/Inputs/comdat.ll
+; RUN: llvm-lto2 run -save-temps -o %t3 %t1.bc %t2.bc \
+; RUN:          -r %t1.bc,lwt_fun,plx \
+; RUN:          -r %t2.bc,main,plx \
+; RUN:          -r %t2.bc,lwt_fun,
+; RUN: llvm-dis -o - %t3.1.3.import.bc | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+; CHECK: $lwt.llvm.[[HASH:[0-9]+]] = comdat any
+$lwt = comdat any
+
+; CHECK: @lwt_aliasee = private unnamed_addr global {{.*}}, comdat($lwt.llvm.[[HASH]])
+@lwt_aliasee = private unnamed_addr global [1 x i8*] [i8* null], comdat($lwt)
+
+; CHECK: @lwt.llvm.[[HASH]] = hidden unnamed_addr alias
+@lwt = internal unnamed_addr alias [1 x i8*], [1 x i8*]* @lwt_aliasee
+
+; Below function should get imported into other module, resulting in @lwt being
+; promoted and renamed.
+define i8* @lwt_fun() {
+  %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* @lwt, i32 0, i32 0
+  %2 = load i8*, i8** %1
+  ret i8* %2
+}
diff --git a/llvm/test/Transforms/InstCombine/sink-alloca.ll b/llvm/test/Transforms/InstCombine/sink-alloca.ll
new file mode 100644
index 00000000000000..f2de74ff533ba2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink-alloca.ll
@@ -0,0 +1,52 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i686-unknown-linux-gnu"
+
+; Check that instcombine doesn't sink dynamic allocas across llvm.stacksave.
+
+; Helper to generate branch conditions.
+declare i1 @cond()
+
+declare i32* @use_and_return(i32*)
+
+declare i8* @llvm.stacksave() #0
+
+declare void @llvm.stackrestore(i8*) #0
+
+define void @foo(i32 %x) {
+entry:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %ret, label %nonentry
+
+nonentry:                                         ; preds = %entry
+  %argmem = alloca i32, i32 %x, align 4
+  %sp = call i8* @llvm.stacksave()
+  %c2 = call i1 @cond()
+  br i1 %c2, label %ret, label %sinktarget
+
+sinktarget:                                       ; preds = %nonentry
+  ; Arrange for there to be a single use of %argmem by returning it.
+  %p = call i32* @use_and_return(i32* nonnull %argmem)
+  store i32 13, i32* %p, align 4
+  call void @llvm.stackrestore(i8* %sp)
+  %0 = call i32* @use_and_return(i32* %p)
+  br label %ret
+
+ret:                                              ; preds = %sinktarget, %nonentry, %entry
+  ret void
+}
+
+; CHECK-LABEL: define void @foo(i32 %x)
+; CHECK: nonentry:
+; CHECK:   %argmem = alloca i32, i32 %x
+; CHECK:   %sp = call i8* @llvm.stacksave()
+; CHECK:   %c2 = call i1 @cond()
+; CHECK:   br i1 %c2, label %ret, label %sinktarget
+; CHECK: sinktarget:
+; CHECK:   %p = call i32* @use_and_return(i32* nonnull %argmem)
+; CHECK:   store i32 13, i32* %p
+; CHECK:   call void @llvm.stackrestore(i8* %sp)
+; CHECK:   %0 = call i32* @use_and_return(i32* %p)
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll b/llvm/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll
new file mode 100644
index 00000000000000..77d09ad53f7887
--- /dev/null
+++ b/llvm/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll
@@ -0,0 +1,33 @@
+; RUN: opt -transform-warning -disable-output < %s 2>&1 | FileCheck -allow-empty %s
+;
+; llvm.org/PR40546
+; Do not warn about about leftover llvm.loop.vectorize.enable for already
+; vectorized loops.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i32 %n) {
+entry:
+  %cmp = icmp eq i32 %n, 0
+  br i1 %cmp, label %simd.if.end, label %omp.inner.for.body.preheader
+
+omp.inner.for.body.preheader:
+  %wide.trip.count = zext i32 %n to i64
+  br label %omp.inner.for.body
+
+omp.inner.for.body:
+  %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !0
+
+simd.if.end:
+  ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.isvectorized"}
+
+
+; CHECK-NOT: loop not vectorized
diff --git a/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
new file mode 100644
index 00000000000000..424ef3846224a3
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -transform-warning -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
+
+; This test is a copy of no_switch.ll, with the "llvm.loop.vectorize.enable" metadata set to false.
+; It tests that vectorization is explicitly disabled and no warnings are emitted.
+
+; CHECK-NOT: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; CHECK-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; NOANALYSIS-NOT: remark: {{.*}}
+; NOANALYSIS-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly disabled
+; MOREINFO-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; CHECK: _Z11test_switchPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+entry:
+  %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
+  br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !14
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  switch i32 %0, label %for.inc [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb3
+  ], !dbg !14
+
+sw.bb:                                            ; preds = %for.body
+  %1 = trunc i64 %indvars.iv to i32, !dbg !20
+  %mul = shl nsw i32 %1, 1, !dbg !20
+  br label %for.inc, !dbg !22
+
+sw.bb3:                                           ; preds = %for.body
+  %2 = trunc i64 %indvars.iv to i32, !dbg !23
+  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  br label %for.inc, !dbg !23
+
+for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
+  %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
+  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !24
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.5.0"}
+!10 = !DILocation(line: 3, column: 8, scope: !11)
+!11 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!12 = !{!12, !13, !13}
+!13 = !{!"llvm.loop.vectorize.enable", i1 false}
+!14 = !DILocation(line: 4, column: 5, scope: !15)
+!15 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !11)
+!16 = !{!17, !17, i64 0}
+!17 = !{!"int", !18, i64 0}
+!18 = !{!"omnipotent char", !19, i64 0}
+!19 = !{!"Simple C/C++ TBAA"}
+!20 = !DILocation(line: 6, column: 7, scope: !21)
+!21 = distinct !DILexicalBlock(line: 4, column: 18, file: !1, scope: !15)
+!22 = !DILocation(line: 7, column: 5, scope: !21)
+!23 = !DILocation(line: 9, column: 7, scope: !21)
+!24 = !DILocation(line: 14, column: 1, scope: !4)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
index 8405117090b46f..67717a54659c36 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -3,93 +3,185 @@
 ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-8 -slp-min-tree-size=6 | FileCheck %s --check-prefixes=ALL,FORCE_REDUCTION
 
 define void @Test(i32) {
-; ALL-LABEL: @Test(
-; ALL-NEXT:  entry:
-; ALL-NEXT:    br label [[LOOP:%.*]]
-; ALL:       loop:
-; ALL-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; ALL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; ALL-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
-; ALL-NEXT:    [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
-; ALL-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
-; ALL-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
-; ALL-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
-; ALL-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
-; ALL-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
-; ALL-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
-; ALL-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
-; ALL-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
-; ALL-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
-; ALL-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
-; ALL-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
-; ALL-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
-; ALL-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
-; ALL-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
-; ALL-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
-; ALL-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
-; ALL-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
-; ALL-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
-; ALL-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
-; ALL-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
-; ALL-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
-; ALL-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
-; ALL-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
-; ALL-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
-; ALL-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
-; ALL-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
-; ALL-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
-; ALL-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
-; ALL-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
-; ALL-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
-; ALL-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
-; ALL-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
-; ALL-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
-; ALL-NEXT:    [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
-; ALL-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
-; ALL-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1
-; ALL-NEXT:    [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
-; ALL-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
-; ALL-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
-; ALL-NEXT:    [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]]
-; ALL-NEXT:    [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]]
-; ALL-NEXT:    [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
-; ALL-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
-; ALL-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT:    [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
-; ALL-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
-; ALL-NEXT:    [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; ALL-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
-; ALL-NEXT:    [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]]
-; ALL-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
-; ALL-NEXT:    br label [[LOOP]]
+; CHECK-LABEL: @Test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
+; CHECK-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; CHECK-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; CHECK-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; CHECK-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; CHECK-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; CHECK-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
+; CHECK-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
+; CHECK-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
+; CHECK-NEXT:    [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
+; CHECK-NEXT:    [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1
+; CHECK-NEXT:    br label [[LOOP]]
+;
+; FORCE_REDUCTION-LABEL: @Test(
+; FORCE_REDUCTION-NEXT:  entry:
+; FORCE_REDUCTION-NEXT:    br label [[LOOP:%.*]]
+; FORCE_REDUCTION:       loop:
+; FORCE_REDUCTION-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = add <4 x i32> <i32 0, i32 55, i32 285, i32 1240>, [[SHUFFLE]]
+; FORCE_REDUCTION-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; FORCE_REDUCTION-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
+; FORCE_REDUCTION-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
+; FORCE_REDUCTION-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
+; FORCE_REDUCTION-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
+; FORCE_REDUCTION-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT:    [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]]
+; FORCE_REDUCTION-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT:    [[BIN_RDX2:%.*]] = and <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; FORCE_REDUCTION-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
+; FORCE_REDUCTION-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]]
+; FORCE_REDUCTION-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
+; FORCE_REDUCTION-NEXT:    [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]]
+; FORCE_REDUCTION-NEXT:    [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
+; FORCE_REDUCTION-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_41]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]]
+; FORCE_REDUCTION-NEXT:    [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]]
+; FORCE_REDUCTION-NEXT:    [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 3>
+; FORCE_REDUCTION-NEXT:    br label [[LOOP]]
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
index 74e62e0e4ba2da..ad1434146a5b36 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
@@ -7,7 +7,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 undef>, i32 [[PARAM:%.*]], i32 1
 ; CHECK-NEXT:    br label [[BCI_15:%.*]]
 ; CHECK:       bci_15:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
@@ -28,13 +28,6 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
 ; CHECK-NEXT:    [[V38:%.*]] = and i32 undef, [[V36]]
 ; CHECK-NEXT:    [[V40:%.*]] = and i32 undef, [[V38]]
 ; CHECK-NEXT:    [[V42:%.*]] = and i32 undef, [[V40]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <16 x i32> [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP6]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> <i32 16, i32 undef>, i32 [[V42]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = and <2 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -43,9 +36,12 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP2]]
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[V43:%.*]] = and i32 undef, [[V42]]
+; CHECK-NEXT:    [[V44:%.*]] = add i32 [[TMP2]], 16
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0
+; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
 ; CHECK-NEXT:    br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]]
 ; CHECK:       loopexit:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll
index a72da6399f22c1..2c5829d6fcecbe 100644
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -1745,6 +1745,55 @@ entry:
   ret void
 }
 
+declare void @llvm.lifetime.start.isVoid.i64.p0i8(i64, [10 x float]* nocapture)
+declare void @llvm.lifetime.end.isVoid.i64.p0i8(i64, [10 x float]* nocapture)
+@array = dso_local global [10 x float] undef, align 4
+
+define void @test29(i32 %num, i32 %tid) {
+; CHECK-LABEL: @test29(
+; CHECK-NOT: alloca [10 x float]
+; CHECK: ret void
+
+entry:
+  %ra = alloca [10 x float], align 4
+  call void @llvm.lifetime.start.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra)
+
+  %cmp1 = icmp sgt i32 %num, 0
+  br i1 %cmp1, label %bb1, label %bb7
+
+bb1:
+  %tobool = icmp eq i32 %tid, 0
+  %conv.i = zext i32 %tid to i64
+  %0 = bitcast [10 x float]* %ra to i32*
+  %1 = load i32, i32* %0, align 4
+  %arrayidx5 = getelementptr inbounds [10 x float], [10 x float]* @array, i64 0, i64 %conv.i
+  %2 = bitcast float* %arrayidx5 to i32*
+  br label %bb2
+
+bb2:
+  %i.02 = phi i32 [ %num, %bb1 ], [ %sub, %bb5 ]
+  br i1 %tobool, label %bb3, label %bb4
+
+bb3:
+  br label %bb5
+
+bb4:
+  store i32 %1, i32* %2, align 4
+  br label %bb5
+
+bb5:
+  %sub = add i32 %i.02, -1
+  %cmp = icmp sgt i32 %sub, 0
+  br i1 %cmp, label %bb2, label %bb6
+
+bb6:
+  br label %bb7
+
+bb7:
+  call void @llvm.lifetime.end.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra)
+  ret void
+}
+
 !0 = !{!1, !1, i64 0, i64 1}
 !1 = !{!2, i64 1, !"type_0"}
 !2 = !{!"root"}
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
index 07c68ab2618f15..0b2ae5f8e7a4fb 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
@@ -6,7 +6,7 @@
 # the final version which uses ULEB128 and not the U32.
 
 # CHECK:         .debug_loclists contents:
-# CHECK-NEXT:    0x00000000: locations list header: length = 0x0000000f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+# CHECK-NEXT:    0x00000000: locations list header: length = 0x0000000e, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 # CHECK-NEXT:    0x00000000:
 # CHECK-NEXT:    Addr idx 1 (w/ length 16): DW_OP_reg5 RDI
 
@@ -21,7 +21,7 @@
  .byte 3          # DW_LLE_startx_length
  .byte 0x01       # Index
  .uleb128 0x10    # Length
- .short 1         # Loc expr size
+ .byte 1          # Loc expr size
  .byte 85         # DW_OP_reg5
  .byte 0          # DW_LLE_end_of_list
 .Ldebug_loclist_table_end0:
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s
index c7a990a892eb67..eda0fdebb7d3c2 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      99    49.50                 U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      5     5.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      5     5.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      5     5.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      5     5.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  1      5     5.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  1      5     5.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      5     5.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      5     5.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      5     5.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      5     5.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  1      5     5.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  1      5     5.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.50                  U     fbld	(%ecx)
 # CHECK-NEXT:  1      100   0.50                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  1      25    12.50                 U     fnclex
-# CHECK-NEXT:  1      9     4.50                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      9     4.50                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      9     4.50                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      9     4.50                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      9     4.50                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      9     4.50                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      9     4.50                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      9     4.50                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      9     4.50                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      5     5.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      5     5.00                  U     fcom	%st(3)
 # CHECK-NEXT:  1      5     5.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  1      5     5.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  1      5     5.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fcompp
-# CHECK-NEXT:  1      9     4.50                  U     fcomi	%st(3)
-# CHECK-NEXT:  1      9     4.50                  U     fcompi	%st(3)
+# CHECK-NEXT:  1      9     4.50                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  1      9     4.50                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      174   87.00                 U     fcos
 # CHECK-NEXT:  1      1     0.50                  U     fdecstp
-# CHECK-NEXT:  1      34    17.00                 U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      34    17.00                 U     fdiv	%st(2)
+# CHECK-NEXT:  1      34    17.00                 U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      34    17.00                 U     fdiv	%st(2), %st
 # CHECK-NEXT:  1      34    17.00   *             U     fdivs	(%ecx)
 # CHECK-NEXT:  1      34    17.00   *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      34    17.00                 U     fdivp	%st(1)
-# CHECK-NEXT:  1      34    17.00                 U     fdivp	%st(2)
+# CHECK-NEXT:  1      34    17.00                 U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      34    17.00                 U     fdivp	%st, %st(2)
 # CHECK-NEXT:  1      34    17.00   *             U     fidivs	(%ecx)
 # CHECK-NEXT:  1      34    17.00   *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      34    17.00                 U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      34    17.00                 U     fdivr	%st(2)
+# CHECK-NEXT:  1      34    17.00                 U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      34    17.00                 U     fdivr	%st(2), %st
 # CHECK-NEXT:  1      34    17.00   *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  1      34    17.00   *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      34    17.00                 U     fdivrp	%st(1)
-# CHECK-NEXT:  1      34    17.00                 U     fdivrp	%st(2)
+# CHECK-NEXT:  1      34    17.00                 U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      34    17.00                 U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  1      34    17.00   *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  1      34    17.00   *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  1      10    5.00                  U     fldln2
 # CHECK-NEXT:  1      10    5.00                  U     fldpi
 # CHECK-NEXT:  1      1     0.50                  U     fldz
-# CHECK-NEXT:  1      4     4.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      4     4.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      4     4.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      4     4.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  1      4     4.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  1      4     4.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      4     4.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      4     4.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      4     4.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      4     4.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  1      4     4.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  1      4     4.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.50                  U     frstor	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     wait
 # CHECK-NEXT:  1      100   0.50                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      5     5.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      5     5.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      5     5.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      5     5.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  1      5     5.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  1      5     5.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      5     5.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      5     5.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      5     5.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      5     5.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  1      5     5.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  1      5     5.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      5     5.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      5     5.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      5     5.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      5     5.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  1      5     5.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  1      5     5.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      5     5.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      5     5.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      5     5.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      5     5.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  1      5     5.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  1      5     5.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      9     4.50                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      1     1.00                  U     fucompp
-# CHECK-NEXT:  1      9     4.50                  U     fucomi	%st(3)
-# CHECK-NEXT:  1      9     4.50                  U     fucompi	%st(3)
+# CHECK-NEXT:  1      9     4.50                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  1      9     4.50                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  1      1     0.50                  U     wait
 # CHECK-NEXT:  1      1     1.00                  U     fxam
 # CHECK-NEXT:  1      1     1.00                  U     fxch	%st(1)
@@ -367,26 +367,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    Instructions:
 # CHECK-NEXT: 49.50  49.50  f2xm1
 # CHECK-NEXT:  -     1.00   fabs
-# CHECK-NEXT: 5.00    -     fadd	%st(0), %st(1)
-# CHECK-NEXT: 5.00    -     fadd	%st(2)
+# CHECK-NEXT: 5.00    -     fadd	%st, %st(1)
+# CHECK-NEXT: 5.00    -     fadd	%st(2), %st
 # CHECK-NEXT: 5.00    -     fadds	(%ecx)
 # CHECK-NEXT: 5.00    -     faddl	(%ecx)
-# CHECK-NEXT: 5.00    -     faddp	%st(1)
-# CHECK-NEXT: 5.00    -     faddp	%st(2)
+# CHECK-NEXT: 5.00    -     faddp	%st, %st(1)
+# CHECK-NEXT: 5.00    -     faddp	%st, %st(2)
 # CHECK-NEXT: 5.00    -     fiadds	(%ecx)
 # CHECK-NEXT: 5.00    -     fiaddl	(%ecx)
 # CHECK-NEXT: 0.50   0.50   fbld	(%ecx)
 # CHECK-NEXT: 0.50   0.50   fbstp	(%eax)
 # CHECK-NEXT:  -     1.00   fchs
 # CHECK-NEXT: 12.50  12.50  fnclex
-# CHECK-NEXT: 4.50   4.50   fcmovb	%st(1), %st(0)
-# CHECK-NEXT: 4.50   4.50   fcmovbe	%st(1), %st(0)
-# CHECK-NEXT: 4.50   4.50   fcmove	%st(1), %st(0)
-# CHECK-NEXT: 4.50   4.50   fcmovnb	%st(1), %st(0)
-# CHECK-NEXT: 4.50   4.50   fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT: 4.50   4.50   fcmovne	%st(1), %st(0)
-# CHECK-NEXT: 4.50   4.50   fcmovnu	%st(1), %st(0)
-# CHECK-NEXT: 4.50   4.50   fcmovu	%st(1), %st(0)
+# CHECK-NEXT: 4.50   4.50   fcmovb	%st(1), %st
+# CHECK-NEXT: 4.50   4.50   fcmovbe	%st(1), %st
+# CHECK-NEXT: 4.50   4.50   fcmove	%st(1), %st
+# CHECK-NEXT: 4.50   4.50   fcmovnb	%st(1), %st
+# CHECK-NEXT: 4.50   4.50   fcmovnbe	%st(1), %st
+# CHECK-NEXT: 4.50   4.50   fcmovne	%st(1), %st
+# CHECK-NEXT: 4.50   4.50   fcmovnu	%st(1), %st
+# CHECK-NEXT: 4.50   4.50   fcmovu	%st(1), %st
 # CHECK-NEXT: 5.00    -     fcom	%st(1)
 # CHECK-NEXT: 5.00    -     fcom	%st(3)
 # CHECK-NEXT: 5.00    -     fcoms	(%ecx)
@@ -396,24 +396,24 @@ fyl2xp1
 # CHECK-NEXT: 5.00    -     fcomps	(%ecx)
 # CHECK-NEXT: 5.00    -     fcompl	(%eax)
 # CHECK-NEXT:  -     1.00   fcompp
-# CHECK-NEXT: 4.50   4.50   fcomi	%st(3)
-# CHECK-NEXT: 4.50   4.50   fcompi	%st(3)
+# CHECK-NEXT: 4.50   4.50   fcomi	%st(3), %st
+# CHECK-NEXT: 4.50   4.50   fcompi	%st(3), %st
 # CHECK-NEXT: 87.00  87.00  fcos
 # CHECK-NEXT: 0.50   0.50   fdecstp
-# CHECK-NEXT: 17.00  17.00  fdiv	%st(0), %st(1)
-# CHECK-NEXT: 17.00  17.00  fdiv	%st(2)
+# CHECK-NEXT: 17.00  17.00  fdiv	%st, %st(1)
+# CHECK-NEXT: 17.00  17.00  fdiv	%st(2), %st
 # CHECK-NEXT: 17.00  17.00  fdivs	(%ecx)
 # CHECK-NEXT: 17.00  17.00  fdivl	(%eax)
-# CHECK-NEXT: 17.00  17.00  fdivp	%st(1)
-# CHECK-NEXT: 17.00  17.00  fdivp	%st(2)
+# CHECK-NEXT: 17.00  17.00  fdivp	%st, %st(1)
+# CHECK-NEXT: 17.00  17.00  fdivp	%st, %st(2)
 # CHECK-NEXT: 17.00  17.00  fidivs	(%ecx)
 # CHECK-NEXT: 17.00  17.00  fidivl	(%eax)
-# CHECK-NEXT: 17.00  17.00  fdivr	%st(0), %st(1)
-# CHECK-NEXT: 17.00  17.00  fdivr	%st(2)
+# CHECK-NEXT: 17.00  17.00  fdivr	%st, %st(1)
+# CHECK-NEXT: 17.00  17.00  fdivr	%st(2), %st
 # CHECK-NEXT: 17.00  17.00  fdivrs	(%ecx)
 # CHECK-NEXT: 17.00  17.00  fdivrl	(%eax)
-# CHECK-NEXT: 17.00  17.00  fdivrp	%st(1)
-# CHECK-NEXT: 17.00  17.00  fdivrp	%st(2)
+# CHECK-NEXT: 17.00  17.00  fdivrp	%st, %st(1)
+# CHECK-NEXT: 17.00  17.00  fdivrp	%st, %st(2)
 # CHECK-NEXT: 17.00  17.00  fidivrs	(%ecx)
 # CHECK-NEXT: 17.00  17.00  fidivrl	(%eax)
 # CHECK-NEXT: 0.50   0.50   ffree	%st(0)
@@ -447,12 +447,12 @@ fyl2xp1
 # CHECK-NEXT: 5.00   5.00   fldln2
 # CHECK-NEXT: 5.00   5.00   fldpi
 # CHECK-NEXT: 0.50   0.50   fldz
-# CHECK-NEXT: 4.00    -     fmul	%st(0), %st(1)
-# CHECK-NEXT: 4.00    -     fmul	%st(2)
+# CHECK-NEXT: 4.00    -     fmul	%st, %st(1)
+# CHECK-NEXT: 4.00    -     fmul	%st(2), %st
 # CHECK-NEXT: 4.00    -     fmuls	(%ecx)
 # CHECK-NEXT: 4.00    -     fmull	(%eax)
-# CHECK-NEXT: 4.00    -     fmulp	%st(1)
-# CHECK-NEXT: 4.00    -     fmulp	%st(2)
+# CHECK-NEXT: 4.00    -     fmulp	%st, %st(1)
+# CHECK-NEXT: 4.00    -     fmulp	%st, %st(2)
 # CHECK-NEXT: 4.00    -     fimuls	(%ecx)
 # CHECK-NEXT: 4.00    -     fimull	(%eax)
 # CHECK-NEXT: 0.50   0.50   fnop
@@ -480,20 +480,20 @@ fyl2xp1
 # CHECK-NEXT: 0.50   0.50   frstor	(%eax)
 # CHECK-NEXT: 0.50   0.50   wait
 # CHECK-NEXT: 0.50   0.50   fnsave	(%eax)
-# CHECK-NEXT: 5.00    -     fsub	%st(0), %st(1)
-# CHECK-NEXT: 5.00    -     fsub	%st(2)
+# CHECK-NEXT: 5.00    -     fsub	%st, %st(1)
+# CHECK-NEXT: 5.00    -     fsub	%st(2), %st
 # CHECK-NEXT: 5.00    -     fsubs	(%ecx)
 # CHECK-NEXT: 5.00    -     fsubl	(%eax)
-# CHECK-NEXT: 5.00    -     fsubp	%st(1)
-# CHECK-NEXT: 5.00    -     fsubp	%st(2)
+# CHECK-NEXT: 5.00    -     fsubp	%st, %st(1)
+# CHECK-NEXT: 5.00    -     fsubp	%st, %st(2)
 # CHECK-NEXT: 5.00    -     fisubs	(%ecx)
 # CHECK-NEXT: 5.00    -     fisubl	(%eax)
-# CHECK-NEXT: 5.00    -     fsubr	%st(0), %st(1)
-# CHECK-NEXT: 5.00    -     fsubr	%st(2)
+# CHECK-NEXT: 5.00    -     fsubr	%st, %st(1)
+# CHECK-NEXT: 5.00    -     fsubr	%st(2), %st
 # CHECK-NEXT: 5.00    -     fsubrs	(%ecx)
 # CHECK-NEXT: 5.00    -     fsubrl	(%eax)
-# CHECK-NEXT: 5.00    -     fsubrp	%st(1)
-# CHECK-NEXT: 5.00    -     fsubrp	%st(2)
+# CHECK-NEXT: 5.00    -     fsubrp	%st, %st(1)
+# CHECK-NEXT: 5.00    -     fsubrp	%st, %st(2)
 # CHECK-NEXT: 5.00    -     fisubrs	(%ecx)
 # CHECK-NEXT: 5.00    -     fisubrl	(%eax)
 # CHECK-NEXT: 4.50   4.50   ftst
@@ -502,8 +502,8 @@ fyl2xp1
 # CHECK-NEXT:  -     1.00   fucomp	%st(1)
 # CHECK-NEXT:  -     1.00   fucomp	%st(3)
 # CHECK-NEXT:  -     1.00   fucompp
-# CHECK-NEXT: 4.50   4.50   fucomi	%st(3)
-# CHECK-NEXT: 4.50   4.50   fucompi	%st(3)
+# CHECK-NEXT: 4.50   4.50   fucomi	%st(3), %st
+# CHECK-NEXT: 4.50   4.50   fucompi	%st(3), %st
 # CHECK-NEXT: 0.50   0.50   wait
 # CHECK-NEXT: 1.00    -     fxam
 # CHECK-NEXT: 1.00   1.00   fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s
index ad72714c74c1bb..4cdddf01104b04 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.50                  U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      5     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  1      10    1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      5     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  1      10    1.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.50                  U     fbld	(%ecx)
 # CHECK-NEXT:  1      100   0.50                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  1      100   0.50                  U     fnclex
-# CHECK-NEXT:  1      1     1.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      1     1.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      1     1.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      1     1.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      1     1.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      1     1.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      1     1.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      1     1.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      1     1.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      1     1.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      1     1.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      1     1.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      1     1.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      1     1.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      1     1.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      1     1.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  2      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  2      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  1      6     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  1      6     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  1      6     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   0.50                  U     fcompp
-# CHECK-NEXT:  2      1     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  2      1     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  2      1     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  2      1     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.50                  U     fcos
 # CHECK-NEXT:  1      100   0.50                  U     fdecstp
-# CHECK-NEXT:  1      9     9.50                  U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      9     9.50                  U     fdiv	%st(2)
+# CHECK-NEXT:  1      9     9.50                  U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      9     9.50                  U     fdiv	%st(2), %st
 # CHECK-NEXT:  1      14    9.50    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  1      14    9.50    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      9     9.50                  U     fdivp	%st(1)
-# CHECK-NEXT:  1      9     9.50                  U     fdivp	%st(2)
+# CHECK-NEXT:  1      9     9.50                  U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      9     9.50                  U     fdivp	%st, %st(2)
 # CHECK-NEXT:  1      14    9.50    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  1      14    9.50    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      9     9.50                  U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      9     9.50                  U     fdivr	%st(2)
+# CHECK-NEXT:  1      9     9.50                  U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      9     9.50                  U     fdivr	%st(2), %st
 # CHECK-NEXT:  1      14    9.50    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  1      14    9.50    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      9     9.50                  U     fdivrp	%st(1)
-# CHECK-NEXT:  1      9     9.50                  U     fdivrp	%st(2)
+# CHECK-NEXT:  1      9     9.50                  U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      9     9.50                  U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  1      14    9.50    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  1      14    9.50    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      100   0.50                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  1      3     1.00                  U     fldln2
 # CHECK-NEXT:  1      3     1.00                  U     fldpi
 # CHECK-NEXT:  1      3     1.00                  U     fldz
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  1      10    1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  1      10    1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.50                  U     frstor	(%eax)
 # CHECK-NEXT:  1      100   0.50                  U     wait
 # CHECK-NEXT:  1      100   0.50                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  1      10    1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  1      10    1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  2      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      1     1.00                  U     fucompp
-# CHECK-NEXT:  2      1     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  2      1     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  2      1     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  2      1     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.50                  U     wait
 # CHECK-NEXT:  1      100   0.50                  U     fxam
 # CHECK-NEXT:  1      1     0.50                  U     fxch	%st(1)
@@ -388,26 +388,26 @@ fyl2xp1
 # CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16.0] [16.1] [17]   [18]   Instructions:
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     f2xm1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fabs
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fadd	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fadds	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     faddl	(%ecx)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fiadds	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fiaddl	(%ecx)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fbld	(%ecx)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fbstp	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fchs
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fnclex
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fcoms	(%ecx)
@@ -417,24 +417,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fcompl	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fcompp
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fcos
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fdecstp
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fdivs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fdivl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fidivs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fidivl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fdivrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fdivrl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fidivrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     9.50   9.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fidivrl	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     ffree	%st(0)
@@ -468,12 +468,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fldln2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fldpi
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fldz
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fmuls	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fmull	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fimuls	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -     0.50   0.50    -      -     fimull	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fnop
@@ -501,20 +501,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     frstor	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     wait
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsub	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fsubs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fsubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fisubs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fisubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fsubrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fisubrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -     0.50   0.50    -      -     fisubrl	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     ftst
@@ -523,8 +523,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -     1.00    -      -      -      -      -      -      -     fucompp
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     0.50   0.50    -      -      -      -     1.00    -      -      -      -      -      -      -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     wait
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fxam
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s
index 5cb92be47eabb0..2f3a69da99d074 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.25                  U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  2      9     1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  2      9     1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  3      12    2.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  3      12    2.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.25                  U     fbld	(%ecx)
 # CHECK-NEXT:  2      1     1.00                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  4      4     1.00                  U     fnclex
-# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  2      7     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  2      7     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  2      7     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   0.25                  U     fcompp
-# CHECK-NEXT:  1      3     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  1      3     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  1      3     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.25                  U     fcos
 # CHECK-NEXT:  2      2     1.00                  U     fdecstp
-# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2), %st
 # CHECK-NEXT:  2      21    1.00    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  2      21    1.00    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(2)
 # CHECK-NEXT:  3      24    1.00    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  3      24    1.00    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2), %st
 # CHECK-NEXT:  2      26    1.00    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  2      26    1.00    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  3      29    1.00    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  3      29    1.00    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      100   0.25                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fldln2
 # CHECK-NEXT:  2      1     1.00                  U     fldpi
 # CHECK-NEXT:  1      1     0.50                  U     fldz
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  2      11    1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  2      11    1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  3      14    1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  3      14    1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.25                  U     frstor	(%eax)
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  1      100   0.25                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  2      9     1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  2      9     1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  3      12    2.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  3      12    2.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  2      9     1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  2      9     1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  3      12    2.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  3      12    2.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      3     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      3     1.00                  U     fucompp
-# CHECK-NEXT:  1      3     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  1      3     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  1      3     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  1      3     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  1      100   0.25                  U     fxam
 # CHECK-NEXT:  12     14    4.00                  U     fxch	%st(1)
@@ -375,26 +375,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     f2xm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fabs
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fadds	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     faddl	(%ecx)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fiaddl	(%ecx)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fbld	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   fbstp	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fchs
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00   1.00    -     fnclex
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fcoms	(%ecx)
@@ -404,24 +404,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fcompl	(%eax)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fcompp
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fcos
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     fdecstp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivrl	(%eax)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     ffree	%st(0)
@@ -455,12 +455,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     fldln2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     fldpi
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fldz
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmull	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fimuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fimull	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fnop
@@ -488,20 +488,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     frstor	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubrl	(%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     ftst
@@ -510,8 +510,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucompp
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fxam
 # CHECK-NEXT:  -      -     3.25   2.25    -      -      -     1.25   5.25    -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s
index a0e431f6dfe45d..2b6b2c49727315 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.50                  U     f2xm1
 # CHECK-NEXT:  1      2     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  1      8     1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  1      8     1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  1      8     1.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  1      8     1.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.50                  U     fbld	(%ecx)
 # CHECK-NEXT:  1      100   0.50                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      2     1.00                  U     fchs
 # CHECK-NEXT:  1      100   0.50                  U     fnclex
-# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      3     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      3     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  1      8     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  1      8     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  1      8     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   0.50                  U     fcompp
-# CHECK-NEXT:  1      3     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  1      3     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  1      3     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.50                  U     fcos
 # CHECK-NEXT:  1      100   0.50                  U     fdecstp
-# CHECK-NEXT:  1      19    19.00                 U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      19    19.00                 U     fdiv	%st(2)
+# CHECK-NEXT:  1      19    19.00                 U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      19    19.00                 U     fdiv	%st(2), %st
 # CHECK-NEXT:  1      24    19.00   *             U     fdivs	(%ecx)
 # CHECK-NEXT:  1      24    19.00   *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      19    19.00                 U     fdivp	%st(1)
-# CHECK-NEXT:  1      19    19.00                 U     fdivp	%st(2)
+# CHECK-NEXT:  1      19    19.00                 U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      19    19.00                 U     fdivp	%st, %st(2)
 # CHECK-NEXT:  1      24    19.00   *             U     fidivs	(%ecx)
 # CHECK-NEXT:  1      24    19.00   *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      19    19.00                 U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      19    19.00                 U     fdivr	%st(2)
+# CHECK-NEXT:  1      19    19.00                 U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      19    19.00                 U     fdivr	%st(2), %st
 # CHECK-NEXT:  1      24    19.00   *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  1      24    19.00   *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      19    19.00                 U     fdivrp	%st(1)
-# CHECK-NEXT:  1      19    19.00                 U     fdivrp	%st(2)
+# CHECK-NEXT:  1      19    19.00                 U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      19    19.00                 U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  1      24    19.00   *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  1      24    19.00   *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      100   0.50                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  1      3     1.00                  U     fldln2
 # CHECK-NEXT:  1      3     1.00                  U     fldpi
 # CHECK-NEXT:  1      3     1.00                  U     fldz
-# CHECK-NEXT:  1      2     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      2     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      2     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      2     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  1      7     1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  1      7     1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      2     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      2     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      2     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      2     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  1      7     1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  1      7     1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.50                  U     frstor	(%eax)
 # CHECK-NEXT:  1      100   0.50                  U     wait
 # CHECK-NEXT:  1      100   0.50                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  1      8     1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  1      8     1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  1      8     1.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  1      8     1.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  1      8     1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  1      8     1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  1      8     1.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  1      8     1.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      3     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      3     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      3     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      3     1.00                  U     fucompp
-# CHECK-NEXT:  1      3     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  1      3     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  1      3     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  1      3     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.50                  U     wait
 # CHECK-NEXT:  1      100   0.50                  U     fxam
 # CHECK-NEXT:  1      1     0.50                  U     fxch	%st(1)
@@ -379,26 +379,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     f2xm1
 # CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fabs
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fadds	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     faddl	(%ecx)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fiaddl	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fbld	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fbstp	(%eax)
 # CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fchs
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fnclex
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcom	%st(1)
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcom	%st(3)
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fcoms	(%ecx)
@@ -408,24 +408,24 @@ fyl2xp1
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fcomps	(%ecx)
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fcompl	(%eax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fcompp
-# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcomi	%st(3)
-# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcompi	%st(3)
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcomi	%st(3), %st
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fcompi	%st(3), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fcos
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fdecstp
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fdivs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fdivl	(%eax)
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fidivs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fidivl	(%eax)
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fdivrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fdivrl	(%eax)
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     19.00   -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fidivrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     19.00   -     1.00   1.00    -      -      -      -      -      -     fidivrl	(%eax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     ffree	%st(0)
@@ -459,12 +459,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -     fldln2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -     fldpi
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -     fldz
-# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     1.00    -     1.00   1.00    -      -      -      -      -      -     fmuls	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -     1.00   1.00    -      -      -      -      -      -     fmull	(%eax)
-# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     1.00    -     1.00   1.00    -      -      -      -      -      -     fimuls	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -     1.00   1.00    -      -      -      -      -      -     fimull	(%eax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fnop
@@ -492,20 +492,20 @@ fyl2xp1
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     frstor	(%eax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     wait
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fsubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fisubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     fisubrl	(%eax)
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     ftst
@@ -514,8 +514,8 @@ fyl2xp1
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fucomp	%st(1)
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fucomp	%st(3)
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fucompp
-# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fucomi	%st(3)
-# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fucompi	%st(3)
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fucomi	%st(3), %st
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     fucompi	%st(3), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     wait
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fxam
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s
index 1cba9a7d77fc28..1f3e51e58b33c0 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.33                  U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.33                  U     fbld	(%ecx)
 # CHECK-NEXT:  1      100   0.33                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  1      100   0.33                  U     fnclex
-# CHECK-NEXT:  3      3     2.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  3      3     2.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  2      8     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  2      8     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  2      8     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   0.33                  U     fcompp
-# CHECK-NEXT:  3      3     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  3      3     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  3      3     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  3      3     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.33                  U     fcos
 # CHECK-NEXT:  1      1     1.00                  U     fdecstp
-# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st(2), %st
 # CHECK-NEXT:  2      31    1.00    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  2      31    1.00    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st, %st(2)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st(2), %st
 # CHECK-NEXT:  2      31    1.00    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  2      31    1.00    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fldln2
 # CHECK-NEXT:  2      1     1.00                  U     fldpi
 # CHECK-NEXT:  1      1     1.00                  U     fldz
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  2      12    1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  2      12    1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  3      15    1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  3      15    1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.33                  U     frstor	(%eax)
 # CHECK-NEXT:  1      100   0.33                  U     wait
 # CHECK-NEXT:  1      100   0.33                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      3     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      3     1.00                  U     fucompp
-# CHECK-NEXT:  3      3     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  3      3     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  3      3     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  3      3     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.33                  U     wait
 # CHECK-NEXT:  1      100   0.33                  U     fxam
 # CHECK-NEXT:  1      1     0.33                  U     fxch	%st(1)
@@ -373,26 +373,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     f2xm1
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fabs
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fadds	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   faddl	(%ecx)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fiaddl	(%ecx)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fbld	(%ecx)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fbstp	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fchs
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fnclex
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fcoms	(%ecx)
@@ -402,24 +402,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fcompl	(%eax)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fcompp
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fcos
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fdecstp
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivl	(%eax)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivl	(%eax)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivrl	(%eax)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivrl	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     ffree	%st(0)
@@ -453,12 +453,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -     fldln2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -     fldpi
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fldz
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fmuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fmull	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fimuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fimull	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fnop
@@ -486,20 +486,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     frstor	(%eax)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     wait
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubrl	(%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     ftst
@@ -508,8 +508,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fucompp
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     wait
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fxam
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s
index 53006bbc3296e1..7da8b2802a9a16 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.25                  U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  43     47    10.75                 U     fbld	(%ecx)
 # CHECK-NEXT:  2      1     1.00                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  4      4     1.00                  U     fnclex
-# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  2      8     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  2      8     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  2      8     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  2      1     0.50                  U     fcompp
-# CHECK-NEXT:  3      1     0.50                  U     fcomi	%st(3)
-# CHECK-NEXT:  3      1     0.50                  U     fcompi	%st(3)
+# CHECK-NEXT:  3      1     0.50                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  3      1     0.50                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.25                  U     fcos
 # CHECK-NEXT:  2      2     1.00                  U     fdecstp
-# CHECK-NEXT:  1      24    1.00                  U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2)
+# CHECK-NEXT:  1      24    1.00                  U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2), %st
 # CHECK-NEXT:  2      31    1.00    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  2      31    1.00    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      24    1.00                  U     fdivp	%st(1)
-# CHECK-NEXT:  1      24    1.00                  U     fdivp	%st(2)
+# CHECK-NEXT:  1      24    1.00                  U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      24    1.00                  U     fdivp	%st, %st(2)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      24    1.00                  U     fdivr	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      24    1.00                  U     fdivr	%st(2), %st
 # CHECK-NEXT:  2      27    1.00    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  2      27    1.00    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  3      30    1.00    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  3      30    1.00    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fldln2
 # CHECK-NEXT:  2      1     1.00                  U     fldpi
 # CHECK-NEXT:  1      1     0.50                  U     fldz
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  2      12    1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  2      12    1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  3      15    1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  3      15    1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  90     1     22.50                 U     frstor	(%eax)
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  147    1     36.75                 U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  2      1     0.50                  U     fucompp
-# CHECK-NEXT:  3      1     0.50                  U     fucomi	%st(3)
-# CHECK-NEXT:  3      1     0.50                  U     fucompi	%st(3)
+# CHECK-NEXT:  3      1     0.50                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  3      1     0.50                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  2      1     2.00                  U     fxam
 # CHECK-NEXT:  15     17    4.00                  U     fxch	%st(1)
@@ -375,26 +375,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     f2xm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fabs
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fadds	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     faddl	(%ecx)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fiaddl	(%ecx)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     fbld	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   fbstp	(%eax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fchs
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00   1.00    -     fnclex
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fcoms	(%ecx)
@@ -404,24 +404,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fcompl	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fcompp
-# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fcos
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     fdecstp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fidivrl	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     ffree	%st(0)
@@ -455,12 +455,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     fldln2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     fldpi
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fldz
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmull	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fimuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     fimull	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fnop
@@ -488,20 +488,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     frstor	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00   0.50   0.50    -      -      -      -     fisubrl	(%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     ftst
@@ -510,8 +510,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fucompp
-# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -      -     2.00    -      -      -      -      -      -     fxam
 # CHECK-NEXT:  -      -     4.00   3.00    -      -      -     3.00   5.00    -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s
index fe5de61296f569..d6d42e957bfe34 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   1.00                  U     f2xm1
 # CHECK-NEXT:  1      1     0.50                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  1      6     1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  1      6     1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  1      6     1.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  1      6     1.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   1.00                  U     fbld	(%ecx)
 # CHECK-NEXT:  1      100   1.00                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fchs
 # CHECK-NEXT:  1      100   1.00                  U     fnclex
-# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      3     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      3     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  1      6     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  1      6     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  1      6     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   1.00                  U     fcompp
-# CHECK-NEXT:  1      3     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  1      3     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  1      3     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   1.00                  U     fcos
 # CHECK-NEXT:  1      100   1.00                  U     fdecstp
-# CHECK-NEXT:  1      19    17.00                 U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      19    17.00                 U     fdiv	%st(2)
+# CHECK-NEXT:  1      19    17.00                 U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      19    17.00                 U     fdiv	%st(2), %st
 # CHECK-NEXT:  1      22    17.00   *             U     fdivs	(%ecx)
 # CHECK-NEXT:  1      22    17.00   *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      19    17.00                 U     fdivp	%st(1)
-# CHECK-NEXT:  1      19    17.00                 U     fdivp	%st(2)
+# CHECK-NEXT:  1      19    17.00                 U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      19    17.00                 U     fdivp	%st, %st(2)
 # CHECK-NEXT:  1      22    17.00   *             U     fidivs	(%ecx)
 # CHECK-NEXT:  1      22    17.00   *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      19    17.00                 U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      19    17.00                 U     fdivr	%st(2)
+# CHECK-NEXT:  1      19    17.00                 U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      19    17.00                 U     fdivr	%st(2), %st
 # CHECK-NEXT:  1      22    17.00   *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  1      22    17.00   *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      19    17.00                 U     fdivrp	%st(1)
-# CHECK-NEXT:  1      19    17.00                 U     fdivrp	%st(2)
+# CHECK-NEXT:  1      19    17.00                 U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      19    17.00                 U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  1      22    17.00   *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  1      22    17.00   *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      100   1.00                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fldln2
 # CHECK-NEXT:  2      1     1.00                  U     fldpi
 # CHECK-NEXT:  1      1     0.50                  U     fldz
-# CHECK-NEXT:  1      5     2.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      5     2.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      5     2.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      5     2.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  1      8     2.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  1      8     2.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      5     2.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      5     2.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      5     2.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      5     2.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  1      8     2.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  1      8     2.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   1.00                  U     frstor	(%eax)
 # CHECK-NEXT:  1      100   1.00                  U     wait
 # CHECK-NEXT:  1      100   1.00                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  1      6     1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  1      6     1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  1      6     1.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  1      6     1.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  1      6     1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  1      6     1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  1      6     1.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  1      6     1.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      3     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      3     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      3     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      3     1.00                  U     fucompp
-# CHECK-NEXT:  1      3     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  1      3     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  1      3     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  1      3     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  1      100   1.00                  U     wait
 # CHECK-NEXT:  1      100   1.00                  U     fxam
 # CHECK-NEXT:  1      1     0.50                  U     fxch	%st(1)
@@ -373,26 +373,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     f2xm1
 # CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     fabs
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fadds	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   faddl	(%ecx)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fiaddl	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fbld	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fbstp	(%eax)
 # CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     fchs
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fnclex
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fcoms	(%ecx)
@@ -402,24 +402,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fcompl	(%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fcompp
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fcos
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fdecstp
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fdivs	(%ecx)
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fdivl	(%eax)
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fidivs	(%ecx)
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fidivl	(%eax)
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fdivrs	(%ecx)
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fdivrl	(%eax)
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fidivrs	(%ecx)
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00   fidivrl	(%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     ffree	%st(0)
@@ -453,12 +453,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00   1.00    -      -      -     fldln2
 # CHECK-NEXT:  -      -      -     1.00   1.00    -      -      -     fldpi
 # CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     fldz
-# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00   fmuls	(%ecx)
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00   fmull	(%eax)
-# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00   fimuls	(%ecx)
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00   fimull	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     fnop
@@ -486,20 +486,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     frstor	(%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     wait
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fsubl	(%eax)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fisubl	(%eax)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   fisubrl	(%eax)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     ftst
@@ -508,8 +508,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fucompp
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     wait
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fxam
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s
index 332f365f1fdd3a..1bed53326ced68 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.33                  U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.33                  U     fbld	(%ecx)
 # CHECK-NEXT:  1      100   0.33                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  1      100   0.33                  U     fnclex
-# CHECK-NEXT:  3      3     2.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  3      3     2.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  3      3     2.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  3      3     2.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  2      8     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  2      8     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  2      8     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   0.33                  U     fcompp
-# CHECK-NEXT:  3      3     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  3      3     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  3      3     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  3      3     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.33                  U     fcos
 # CHECK-NEXT:  1      1     1.00                  U     fdecstp
-# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdiv	%st(2), %st
 # CHECK-NEXT:  2      31    1.00    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  2      31    1.00    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdivp	%st, %st(2)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdivr	%st(2), %st
 # CHECK-NEXT:  2      31    1.00    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  2      31    1.00    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st(1)
-# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st(2)
+# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      14    14.00                 U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  3      34    1.00    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fldln2
 # CHECK-NEXT:  2      1     1.00                  U     fldpi
 # CHECK-NEXT:  1      1     1.00                  U     fldz
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  2      12    1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  2      12    1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      5     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  3      15    1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  3      15    1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.33                  U     frstor	(%eax)
 # CHECK-NEXT:  1      100   0.33                  U     wait
 # CHECK-NEXT:  1      100   0.33                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      3     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      3     1.00                  U     fucompp
-# CHECK-NEXT:  3      3     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  3      3     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  3      3     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  3      3     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.33                  U     wait
 # CHECK-NEXT:  1      100   0.33                  U     fxam
 # CHECK-NEXT:  1      1     0.33                  U     fxch	%st(1)
@@ -373,26 +373,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     f2xm1
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fabs
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fadds	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   faddl	(%ecx)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fiaddl	(%ecx)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fbld	(%ecx)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fbstp	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fchs
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fnclex
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -     0.50    -      -     2.50    -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fcoms	(%ecx)
@@ -402,24 +402,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fcompl	(%eax)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fcompp
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fcos
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fdecstp
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivl	(%eax)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivl	(%eax)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fdivrl	(%eax)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fidivrl	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     ffree	%st(0)
@@ -453,12 +453,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -     fldln2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -     fldpi
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fldz
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fmuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   fmull	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fimuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50   fimull	(%eax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     fnop
@@ -486,20 +486,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     frstor	(%eax)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     wait
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -     2.00    -      -     0.50   0.50   fisubrl	(%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     ftst
@@ -508,8 +508,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     fucompp
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     wait
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fxam
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s
index 7be9d699573ac6..6cd4439a25c1f3 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.25                  U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.25                  U     fbld	(%ecx)
 # CHECK-NEXT:  2      1     1.00                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  4      4     1.00                  U     fnclex
-# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  2      8     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  2      8     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  2      8     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   0.25                  U     fcompp
-# CHECK-NEXT:  1      2     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  1      2     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  1      2     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  1      2     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.25                  U     fcos
 # CHECK-NEXT:  2      2     1.00                  U     fdecstp
-# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2), %st
 # CHECK-NEXT:  2      22    1.00    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  2      22    1.00    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(2)
 # CHECK-NEXT:  3      25    1.00    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  3      25    1.00    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2), %st
 # CHECK-NEXT:  2      27    1.00    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  2      27    1.00    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  3      30    1.00    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  3      30    1.00    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      100   0.25                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fldln2
 # CHECK-NEXT:  2      1     1.00                  U     fldpi
 # CHECK-NEXT:  1      1     0.50                  U     fldz
-# CHECK-NEXT:  1      4     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      4     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      4     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      4     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  2      11    1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  2      11    1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  3      14    1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  3      14    1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.25                  U     frstor	(%eax)
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  1      100   0.25                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      2     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      2     1.00                  U     fucompp
-# CHECK-NEXT:  1      2     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  1      2     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  1      2     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  1      2     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  1      100   0.25                  U     fxam
 # CHECK-NEXT:  15     17    4.00                  U     fxch	%st(1)
@@ -375,26 +375,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     f2xm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fabs
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fadds	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     faddl	(%ecx)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fiaddl	(%ecx)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fbld	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   fbstp	(%eax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fchs
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00   1.00    -     fnclex
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fcoms	(%ecx)
@@ -404,24 +404,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fcompl	(%eax)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fcompp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fcos
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     fdecstp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivrl	(%eax)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     ffree	%st(0)
@@ -455,12 +455,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     fldln2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     fldpi
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     fldz
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmull	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fimuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fimull	(%eax)
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     fnop
@@ -488,20 +488,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     frstor	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubrl	(%eax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     ftst
@@ -510,8 +510,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucompp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fxam
 # CHECK-NEXT:  -      -     4.00   2.00    -      -      -     4.00   5.00    -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s
index aecb4a7ab9211b..75cca5297704b2 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.25                  U     f2xm1
 # CHECK-NEXT:  1      1     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.25                  U     fbld	(%ecx)
 # CHECK-NEXT:  2      1     1.00                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  4      4     1.00                  U     fnclex
-# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      3     1.00                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      3     1.00                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  2      8     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  2      8     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  2      8     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      100   0.25                  U     fcompp
-# CHECK-NEXT:  1      2     1.00                  U     fcomi	%st(3)
-# CHECK-NEXT:  1      2     1.00                  U     fcompi	%st(3)
+# CHECK-NEXT:  1      2     1.00                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  1      2     1.00                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.25                  U     fcos
 # CHECK-NEXT:  2      2     1.00                  U     fdecstp
-# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdiv	%st(2), %st
 # CHECK-NEXT:  2      22    1.00    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  2      22    1.00    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(2)
 # CHECK-NEXT:  3      25    1.00    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  3      25    1.00    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2), %st
 # CHECK-NEXT:  2      27    1.00    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  2      27    1.00    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(1)
-# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st(2)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      20    1.00                  U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  3      30    1.00    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  3      30    1.00    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      100   0.25                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  2      1     1.00                  U     fldln2
 # CHECK-NEXT:  2      1     1.00                  U     fldpi
 # CHECK-NEXT:  1      1     0.50                  U     fldz
-# CHECK-NEXT:  1      4     1.00                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      4     1.00                  U     fmul	%st(2)
+# CHECK-NEXT:  1      4     1.00                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      4     1.00                  U     fmul	%st(2), %st
 # CHECK-NEXT:  2      11    1.00    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  2      11    1.00    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      4     1.00                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  3      14    1.00    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  3      14    1.00    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     0.50                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.25                  U     frstor	(%eax)
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  1      100   0.25                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  2      10    1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  3      13    2.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      2     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      2     1.00                  U     fucompp
-# CHECK-NEXT:  1      2     1.00                  U     fucomi	%st(3)
-# CHECK-NEXT:  1      2     1.00                  U     fucompi	%st(3)
+# CHECK-NEXT:  1      2     1.00                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  1      2     1.00                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  2      2     0.50                  U     wait
 # CHECK-NEXT:  1      100   0.25                  U     fxam
 # CHECK-NEXT:  15     17    4.00                  U     fxch	%st(1)
@@ -375,26 +375,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     f2xm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fabs
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fadd	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fadds	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     faddl	(%ecx)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     faddp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fiadds	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fiaddl	(%ecx)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fbld	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   fbstp	(%eax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fchs
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00   1.00    -     fnclex
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fcom	%st(3)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fcoms	(%ecx)
@@ -404,24 +404,24 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fcomps	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fcompl	(%eax)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fcompp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcomi	%st(3)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcompi	%st(3)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fcos
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     fdecstp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdiv	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivr	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fdivrl	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fdivrp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivrs	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fidivrl	(%eax)
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     ffree	%st(0)
@@ -455,12 +455,12 @@ fyl2xp1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     fldln2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     fldpi
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     fldz
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmul	%st(2), %st
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     fmull	(%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fimuls	(%ecx)
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     fimull	(%eax)
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     fnop
@@ -488,20 +488,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     frstor	(%eax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsub	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubr	%st(2), %st
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubrs	(%ecx)
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     fisubrl	(%eax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     ftst
@@ -510,8 +510,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucompp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucomi	%st(3)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucompi	%st(3)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucomi	%st(3), %st
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     0.50   0.50    -     wait
 # CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     fxam
 # CHECK-NEXT:  -      -     4.00   2.00    -      -      -     4.00   5.00    -     fxch	%st(1)
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s
index 2f5f6ef08f1c72..030b71fb7b63f7 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s
@@ -5,7 +5,7 @@ f2xm1
 
 fabs
 
-fadd %st(0), %st(1)
+fadd %st, %st(1)
 fadd %st(2)
 fadds (%ecx)
 faddl (%ecx)
@@ -21,14 +21,14 @@ fchs
 
 fnclex
 
-fcmovb %st(1), %st(0)
-fcmovbe %st(1), %st(0)
-fcmove %st(1), %st(0)
-fcmovnb %st(1), %st(0)
-fcmovnbe %st(1), %st(0)
-fcmovne %st(1), %st(0)
-fcmovnu %st(1), %st(0)
-fcmovu %st(1), %st(0)
+fcmovb %st(1), %st
+fcmovbe %st(1), %st
+fcmove %st(1), %st
+fcmovnb %st(1), %st
+fcmovnbe %st(1), %st
+fcmovne %st(1), %st
+fcmovnu %st(1), %st
+fcmovu %st(1), %st
 
 fcom %st(1)
 fcom %st(3)
@@ -47,7 +47,7 @@ fcos
 
 fdecstp
 
-fdiv %st(0), %st(1)
+fdiv %st, %st(1)
 fdiv %st(2)
 fdivs (%ecx)
 fdivl (%eax)
@@ -56,7 +56,7 @@ fdivp %st(2)
 fidivs (%ecx)
 fidivl (%eax)
 
-fdivr %st(0), %st(1)
+fdivr %st, %st(1)
 fdivr %st(2)
 fdivrs (%ecx)
 fdivrl (%eax)
@@ -106,7 +106,7 @@ fldln2
 fldpi
 fldz
 
-fmul %st(0), %st(1)
+fmul %st, %st(1)
 fmul %st(2)
 fmuls (%ecx)
 fmull (%eax)
@@ -153,7 +153,7 @@ fnstsw (%eax)
 frstor (%eax)
 fsave (%eax)
 
-fsub %st(0), %st(1)
+fsub %st, %st(1)
 fsub %st(2)
 fsubs (%ecx)
 fsubl (%eax)
@@ -162,7 +162,7 @@ fsubp %st(2)
 fisubs (%ecx)
 fisubl (%eax)
 
-fsubr %st(0), %st(1)
+fsubr %st, %st(1)
 fsubr %st(2)
 fsubrs (%ecx)
 fsubrl (%eax)
@@ -208,26 +208,26 @@ fyl2xp1
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      100   0.25                  U     f2xm1
 # CHECK-NEXT:  1      2     1.00                  U     fabs
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fadd	%st(2), %st
 # CHECK-NEXT:  1      10    1.00    *             U     fadds	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     faddl	(%ecx)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     faddp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     faddp	%st, %st(2)
 # CHECK-NEXT:  1      10    1.00    *             U     fiadds	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fiaddl	(%ecx)
 # CHECK-NEXT:  1      100   0.25                  U     fbld	(%ecx)
 # CHECK-NEXT:  1      100   0.25                  U     fbstp	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fchs
 # CHECK-NEXT:  1      100   0.25                  U     fnclex
-# CHECK-NEXT:  1      100   0.25                  U     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  1      100   0.25                  U     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  1      100   0.25                  U     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  1      100   0.25                  U     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  1      100   0.25                  U     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  1      100   0.25                  U     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  1      100   0.25                  U     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  1      100   0.25                  U     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  1      100   0.25                  U     fcmovb	%st(1), %st
+# CHECK-NEXT:  1      100   0.25                  U     fcmovbe	%st(1), %st
+# CHECK-NEXT:  1      100   0.25                  U     fcmove	%st(1), %st
+# CHECK-NEXT:  1      100   0.25                  U     fcmovnb	%st(1), %st
+# CHECK-NEXT:  1      100   0.25                  U     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  1      100   0.25                  U     fcmovne	%st(1), %st
+# CHECK-NEXT:  1      100   0.25                  U     fcmovnu	%st(1), %st
+# CHECK-NEXT:  1      100   0.25                  U     fcmovu	%st(1), %st
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fcom	%st(3)
 # CHECK-NEXT:  1      8     1.00                  U     fcoms	(%ecx)
@@ -237,24 +237,24 @@ fyl2xp1
 # CHECK-NEXT:  1      8     1.00                  U     fcomps	(%ecx)
 # CHECK-NEXT:  1      8     1.00                  U     fcompl	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fcompp
-# CHECK-NEXT:  1      9     0.50                  U     fcomi	%st(3)
-# CHECK-NEXT:  1      9     0.50                  U     fcompi	%st(3)
+# CHECK-NEXT:  1      9     0.50                  U     fcomi	%st(3), %st
+# CHECK-NEXT:  1      9     0.50                  U     fcompi	%st(3), %st
 # CHECK-NEXT:  1      100   0.25                  U     fcos
 # CHECK-NEXT:  1      11    1.00                  U     fdecstp
-# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdiv	%st(2), %st
 # CHECK-NEXT:  1      22    1.00    *             U     fdivs	(%ecx)
 # CHECK-NEXT:  1      22    1.00    *             U     fdivl	(%eax)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivp	%st, %st(2)
 # CHECK-NEXT:  1      22    1.00    *             U     fidivs	(%ecx)
 # CHECK-NEXT:  1      22    1.00    *             U     fidivl	(%eax)
-# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivr	%st(2), %st
 # CHECK-NEXT:  1      22    1.00    *             U     fdivrs	(%ecx)
 # CHECK-NEXT:  1      22    1.00    *             U     fdivrl	(%eax)
-# CHECK-NEXT:  1      15    1.00                  U     fdivrp	%st(1)
-# CHECK-NEXT:  1      15    1.00                  U     fdivrp	%st(2)
+# CHECK-NEXT:  1      15    1.00                  U     fdivrp	%st, %st(1)
+# CHECK-NEXT:  1      15    1.00                  U     fdivrp	%st, %st(2)
 # CHECK-NEXT:  1      22    1.00    *             U     fidivrs	(%ecx)
 # CHECK-NEXT:  1      22    1.00    *             U     fidivrl	(%eax)
 # CHECK-NEXT:  1      11    1.00                  U     ffree	%st(0)
@@ -288,12 +288,12 @@ fyl2xp1
 # CHECK-NEXT:  1      11    1.00                  U     fldln2
 # CHECK-NEXT:  1      11    1.00                  U     fldpi
 # CHECK-NEXT:  1      8     0.50                  U     fldz
-# CHECK-NEXT:  1      3     0.50                  U     fmul	%st(0), %st(1)
-# CHECK-NEXT:  1      3     0.50                  U     fmul	%st(2)
+# CHECK-NEXT:  1      3     0.50                  U     fmul	%st, %st(1)
+# CHECK-NEXT:  1      3     0.50                  U     fmul	%st(2), %st
 # CHECK-NEXT:  2      10    0.50    *             U     fmuls	(%ecx)
 # CHECK-NEXT:  2      10    0.50    *             U     fmull	(%eax)
-# CHECK-NEXT:  1      3     0.50                  U     fmulp	%st(1)
-# CHECK-NEXT:  1      3     0.50                  U     fmulp	%st(2)
+# CHECK-NEXT:  1      3     0.50                  U     fmulp	%st, %st(1)
+# CHECK-NEXT:  1      3     0.50                  U     fmulp	%st, %st(2)
 # CHECK-NEXT:  2      10    0.50    *             U     fimuls	(%ecx)
 # CHECK-NEXT:  2      10    0.50    *             U     fimull	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     fnop
@@ -321,20 +321,20 @@ fyl2xp1
 # CHECK-NEXT:  1      100   0.25                  U     frstor	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     wait
 # CHECK-NEXT:  1      100   0.25                  U     fnsave	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsub	%st(2), %st
 # CHECK-NEXT:  1      10    1.00    *             U     fsubs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fsubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubp	%st, %st(2)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubr	%st(2), %st
 # CHECK-NEXT:  1      10    1.00    *             U     fsubrs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fsubrl	(%eax)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(1)
-# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st(2)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(1)
+# CHECK-NEXT:  1      3     1.00                  U     fsubrp	%st, %st(2)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubrs	(%ecx)
 # CHECK-NEXT:  1      10    1.00    *             U     fisubrl	(%eax)
 # CHECK-NEXT:  1      1     1.00                  U     ftst
@@ -343,8 +343,8 @@ fyl2xp1
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(1)
 # CHECK-NEXT:  1      1     1.00                  U     fucomp	%st(3)
 # CHECK-NEXT:  1      1     1.00                  U     fucompp
-# CHECK-NEXT:  1      9     0.50                  U     fucomi	%st(3)
-# CHECK-NEXT:  1      9     0.50                  U     fucompi	%st(3)
+# CHECK-NEXT:  1      9     0.50                  U     fucomi	%st(3), %st
+# CHECK-NEXT:  1      9     0.50                  U     fucompi	%st(3), %st
 # CHECK-NEXT:  1      1     1.00                  U     wait
 # CHECK-NEXT:  1      1     1.00                  U     fxam
 # CHECK-NEXT:  1      1     0.25                  U     fxch	%st(1)
@@ -377,26 +377,26 @@ fyl2xp1
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     f2xm1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fabs
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fadd	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fadd	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fadd	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fadd	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fadds	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     faddl	(%ecx)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     faddp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     faddp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     faddp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     faddp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fiadds	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fiaddl	(%ecx)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fbld	(%ecx)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fbstp	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fchs
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fnclex
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmove	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovnb	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovnbe	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovne	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovnu	%st(1), %st(0)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovu	%st(1), %st(0)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovb	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovbe	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmove	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovnb	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovnbe	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovne	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovnu	%st(1), %st
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcmovu	%st(1), %st
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fcom	%st(1)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fcom	%st(3)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fcoms	(%ecx)
@@ -406,24 +406,24 @@ fyl2xp1
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fcomps	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fcompl	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fcompp
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fcomi	%st(3)
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fcompi	%st(3)
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fcomi	%st(3), %st
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fcompi	%st(3), %st
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fcos
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fdecstp
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdiv	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdiv	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdiv	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdiv	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fdivs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fdivl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fidivs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fidivl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivr	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivr	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fdivrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fdivrl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivrp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivrp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fdivrp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fidivrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fidivrl	(%eax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     ffree	%st(0)
@@ -457,12 +457,12 @@ fyl2xp1
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fldln2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     fldpi
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50    -     0.50    -     fldz
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmul	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmul	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmul	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmul	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50    -      -      -     fmuls	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50    -      -      -     fmull	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmulp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmulp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmulp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -     fmulp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50    -      -      -     fimuls	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50    -      -      -     fimull	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fnop
@@ -490,20 +490,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     frstor	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     wait
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     fnsave	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsub	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsub	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsub	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsub	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fsubs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fsubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fisubs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fisubl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubr	%st(0), %st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubr	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubr	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubr	%st(2), %st
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fsubrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fsubrl	(%eax)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubrp	%st(1)
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubrp	%st(2)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubrp	%st, %st(1)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fsubrp	%st, %st(2)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fisubrs	(%ecx)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     fisubrl	(%eax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     ftst
@@ -512,8 +512,8 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fucomp	%st(1)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fucomp	%st(3)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     fucompp
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fucomi	%st(3)
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fucompi	%st(3)
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fucomi	%st(3), %st
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50    -     0.50    -      -     fucompi	%st(3), %st
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     wait
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     fxam
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     fxch	%st(1)
diff --git a/llvm/tools/yaml2obj/yaml2wasm.cpp b/llvm/tools/yaml2obj/yaml2wasm.cpp
index 2d3e3b71f0868e..7d08e62bcedd81 100644
--- a/llvm/tools/yaml2obj/yaml2wasm.cpp
+++ b/llvm/tools/yaml2obj/yaml2wasm.cpp
@@ -172,7 +172,8 @@ int WasmWriter::writeSectionContent(raw_ostream &OS,
       case wasm::WASM_SYMBOL_TYPE_GLOBAL:
       case wasm::WASM_SYMBOL_TYPE_EVENT:
         encodeULEB128(Info.ElementIndex, SubSection.GetStream());
-        if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0)
+        if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
+            (Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
           writeStringRef(Info.Name, SubSection.GetStream());
         break;
       case wasm::WASM_SYMBOL_TYPE_DATA:
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 2f9b428b8cfe8b..463609edb73b33 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -842,6 +842,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("f32mem",              TYPE_M)
   TYPE("ssmem",               TYPE_M)
   TYPE("RST",                 TYPE_ST)
+  TYPE("RSTi",                TYPE_ST)
   TYPE("i128mem",             TYPE_M)
   TYPE("i256mem",             TYPE_M)
   TYPE("i512mem",             TYPE_M)
@@ -964,6 +965,7 @@ OperandEncoding
 RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
                                                 uint8_t OpSize) {
   ENCODING("RST",             ENCODING_FP)
+  ENCODING("RSTi",            ENCODING_FP)
   ENCODING("GR16",            ENCODING_RM)
   ENCODING("GR32",            ENCODING_RM)
   ENCODING("GR32orGR64",      ENCODING_RM)
diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_package.bat
index 51f425633a3d69..6a0b19b719605c 100755
--- a/llvm/utils/release/build_llvm_package.bat
+++ b/llvm/utils/release/build_llvm_package.bat
@@ -54,7 +54,7 @@ svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lldb/%branch% llvm
 REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226.
 set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DCMAKE_INSTALL_UCRT_LIBRARIES=ON -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version% -DLLDB_RELOCATABLE_PYTHON=1 -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: "
 
-REM TODO: Run all tests, including lld and compiler-rt.
+REM TODO: Run the "check-all" tests.
 
 set "VSCMD_START_DIR=%CD%"
 call "%vsdevcmd%" -arch=x86
@@ -66,7 +66,9 @@ REM Work around VS2017 bug by using MinSizeRel.
 cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b
 ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
-ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
+ninja check-clang || ninja check-clang || ninja check-clang || exit /b
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
 cd..
 
 mkdir build32
@@ -76,7 +78,9 @@ set CXX=..\build32_stage0\bin\clang-cl
 cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
 ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
-ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
+ninja check-clang || ninja check-clang || ninja check-clang || exit /b
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
 ninja package || exit /b
 cd ..
 
@@ -101,7 +105,9 @@ REM Work around VS2017 bug by using MinSizeRel.
 cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b
 ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
-ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
+ninja check-clang || ninja check-clang || ninja check-clang || exit /b
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
 cd..
 
 mkdir build64
@@ -111,6 +117,8 @@ set CXX=..\build64_stage0\bin\clang-cl
 cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% ..\llvm || exit /b
 ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
-ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
+ninja check-clang || ninja check-clang || ninja check-clang || exit /b
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
 ninja package || exit /b
 cd ..
diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp
index cea00fff07accd..80a859196e68df 100644
--- a/openmp/runtime/src/ompt-general.cpp
+++ b/openmp/runtime/src/ompt-general.cpp
@@ -450,9 +450,6 @@ OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which,
 
 OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
                                        ompt_callback_t *callback) {
-  if (!ompt_enabled.enabled)
-    return ompt_get_callback_failure;
-
   switch (which) {
 
 #define ompt_event_macro(event_name, callback_type, event_id)                  \
@@ -460,7 +457,7 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
     if (ompt_event_implementation_status(event_name)) {                        \
       ompt_callback_t mycb =                                                   \
           (ompt_callback_t)ompt_callbacks.ompt_callback(event_name);           \
-      if (ompt_enabled.event_name && mycb) {                                   \
+      if (mycb) {                                                              \
         *callback = mycb;                                                      \
         return ompt_get_callback_success;                                      \
       }                                                                        \
@@ -483,15 +480,11 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
 OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level,
                                             ompt_data_t **parallel_data,
                                             int *team_size) {
-  if (!ompt_enabled.enabled)
-    return 0;
   return __ompt_get_parallel_info_internal(ancestor_level, parallel_data,
                                            team_size);
 }
 
 OMPT_API_ROUTINE int ompt_get_state(ompt_wait_id_t *wait_id) {
-  if (!ompt_enabled.enabled)
-    return ompt_state_work_serial;
   int thread_state = __ompt_get_state_internal(wait_id);
 
   if (thread_state == ompt_state_undefined) {
@@ -506,8 +499,6 @@ OMPT_API_ROUTINE int ompt_get_state(ompt_wait_id_t *wait_id) {
  ****************************************************************************/
 
 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) {
-  if (!ompt_enabled.enabled)
-    return NULL;
   return __ompt_get_thread_data_internal();
 }
 
@@ -516,8 +507,6 @@ OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type,
                                         ompt_frame_t **task_frame,
                                         ompt_data_t **parallel_data,
                                         int *thread_num) {
-  if (!ompt_enabled.enabled)
-    return 0;
   return __ompt_get_task_info_internal(ancestor_level, type, task_data,
                                        task_frame, parallel_data, thread_num);
 }
@@ -592,7 +581,7 @@ OMPT_API_ROUTINE int ompt_get_place_num(void) {
 #if !KMP_AFFINITY_SUPPORTED
   return -1;
 #else
-  if (!ompt_enabled.enabled || __kmp_get_gtid() < 0)
+  if (__kmp_get_gtid() < 0)
     return -1;
 
   int gtid;
@@ -613,7 +602,7 @@ OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size,
 #if !KMP_AFFINITY_SUPPORTED
   return 0;
 #else
-  if (!ompt_enabled.enabled || __kmp_get_gtid() < 0)
+  if (__kmp_get_gtid() < 0)
     return 0;
 
   int i, gtid, place_num, first_place, last_place, start, end;
@@ -648,7 +637,7 @@ OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size,
  ****************************************************************************/
 
 OMPT_API_ROUTINE int ompt_get_proc_id(void) {
-  if (!ompt_enabled.enabled || __kmp_get_gtid() < 0)
+  if (__kmp_get_gtid() < 0)
     return -1;
 #if KMP_OS_LINUX
   return sched_getcpu();
diff --git a/openmp/runtime/test/ompt/misc/api_calls_without_ompt.c b/openmp/runtime/test/ompt/misc/api_calls_without_ompt.c
deleted file mode 100644
index 976c20f47f1e22..00000000000000
--- a/openmp/runtime/test/ompt/misc/api_calls_without_ompt.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// RUN: %libomp-compile-and-run | FileCheck %s
-// REQUIRES: ompt
-
-#define _BSD_SOURCE
-#define _DEFAULT_SOURCE
-
-#include <stdio.h>
-#include <inttypes.h>
-#include <omp.h>
-#include <ompt.h>
-
-static ompt_set_callback_t ompt_set_callback;
-static ompt_get_callback_t ompt_get_callback;
-static ompt_get_state_t ompt_get_state;
-static ompt_get_task_info_t ompt_get_task_info;
-static ompt_get_thread_data_t ompt_get_thread_data;
-static ompt_get_parallel_info_t ompt_get_parallel_info;
-static ompt_get_unique_id_t ompt_get_unique_id;
-static ompt_get_num_procs_t ompt_get_num_procs;
-static ompt_get_num_places_t ompt_get_num_places;
-static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
-static ompt_get_place_num_t ompt_get_place_num;
-static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
-static ompt_get_proc_id_t ompt_get_proc_id;
-static ompt_enumerate_states_t ompt_enumerate_states;
-static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
-
-int main() {
-  // Call OpenMP API function to force initialization of OMPT.
-  // (omp_get_thread_num() does not work because it just returns 0 if the
-  // runtime isn't initialized yet...)
-  omp_get_num_threads();
-
-  ompt_data_t *tdata = ompt_get_thread_data();
-  uint64_t tvalue = tdata ? tdata->value : 0;
-
-  printf("%" PRIu64 ": ompt_get_num_places()=%d\n", tvalue,
-         ompt_get_num_places());
-
-  printf("%" PRIu64 ": ompt_get_place_proc_ids()=%d\n", tvalue,
-         ompt_get_place_proc_ids(0, 0, NULL));
-
-  printf("%" PRIu64 ": ompt_get_place_num()=%d\n", tvalue,
-         ompt_get_place_num());
-
-  printf("%" PRIu64 ": ompt_get_partition_place_nums()=%d\n", tvalue,
-         ompt_get_partition_place_nums(0, NULL));
-
-  printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", tvalue, ompt_get_proc_id());
-
-  printf("%" PRIu64 ": ompt_get_num_procs()=%d\n", tvalue,
-         ompt_get_num_procs());
-
-  ompt_callback_t callback;
-  printf("%" PRIu64 ": ompt_get_callback()=%d\n", tvalue,
-         ompt_get_callback(ompt_callback_thread_begin, &callback));
-
-  printf("%" PRIu64 ": ompt_get_state()=%d\n", tvalue, ompt_get_state(NULL));
-
-  int state = omp_state_undefined;
-  const char *state_name;
-  printf("%" PRIu64 ": ompt_enumerate_states()=%d\n", tvalue,
-         ompt_enumerate_states(state, &state, &state_name));
-
-  int impl = ompt_mutex_impl_unknown;
-  const char *impl_name;
-  printf("%" PRIu64 ": ompt_enumerate_mutex_impls()=%d\n", tvalue,
-         ompt_enumerate_mutex_impls(impl, &impl, &impl_name));
-
-  printf("%" PRIu64 ": ompt_get_thread_data()=%p\n", tvalue,
-         ompt_get_thread_data());
-
-  printf("%" PRIu64 ": ompt_get_parallel_info()=%d\n", tvalue,
-         ompt_get_parallel_info(0, NULL, NULL));
-
-  printf("%" PRIu64 ": ompt_get_task_info()=%d\n", tvalue,
-         ompt_get_task_info(0, NULL, NULL, NULL, NULL, NULL));
-
-  // Check if libomp supports the callbacks for this test.
-
-  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
-
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_get_num_places()={{[0-9]+}}
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids()={{[0-9]+}}
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=-1
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_partition_place_nums()=0
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=-1
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_procs()={{[0-9]+}}
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_callback()=0
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_state()=0
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_states()=1
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_enumerate_mutex_impls()=1
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_thread_data()=[[NULL]]
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_parallel_info()=0
-
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_get_task_info()=0
-
-  return 0;
-}
-
-int ompt_initialize(ompt_function_lookup_t lookup, ompt_data_t *tool_data) {
-  ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
-  ompt_get_callback = (ompt_get_callback_t)lookup("ompt_get_callback");
-  ompt_get_state = (ompt_get_state_t)lookup("ompt_get_state");
-  ompt_get_task_info = (ompt_get_task_info_t)lookup("ompt_get_task_info");
-  ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
-  ompt_get_parallel_info =
-      (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
-  ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id");
-
-  ompt_get_num_procs = (ompt_get_num_procs_t)lookup("ompt_get_num_procs");
-  ompt_get_num_places = (ompt_get_num_places_t)lookup("ompt_get_num_places");
-  ompt_get_place_proc_ids =
-      (ompt_get_place_proc_ids_t)lookup("ompt_get_place_proc_ids");
-  ompt_get_place_num = (ompt_get_place_num_t)lookup("ompt_get_place_num");
-  ompt_get_partition_place_nums =
-      (ompt_get_partition_place_nums_t)lookup("ompt_get_partition_place_nums");
-  ompt_get_proc_id = (ompt_get_proc_id_t)lookup("ompt_get_proc_id");
-  ompt_enumerate_states =
-      (ompt_enumerate_states_t)lookup("ompt_enumerate_states");
-  ompt_enumerate_mutex_impls =
-      (ompt_enumerate_mutex_impls_t)lookup("ompt_enumerate_mutex_impls");
-
-  printf("0: NULL_POINTER=%p\n", (void *)NULL);
-  return 0; // no success -> OMPT not enabled
-}
-
-void ompt_finalize(ompt_data_t *tool_data) {
-  printf("0: ompt_event_runtime_shutdown\n");
-}
-
-ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
-                                          const char *runtime_version) {
-  static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,
-                                                            &ompt_finalize, 0};
-  return &ompt_start_tool_result;
-}